From 8feda315f6b61dc5c9567b4b1f161e2b977bcc64 Mon Sep 17 00:00:00 2001 From: Andrew Mayorov Date: Tue, 18 Jul 2023 23:11:04 +0200 Subject: [PATCH 01/12] feat(index): add topic index facility Somewhat similar to `emqx_trie` in design and logic, yet built on top of a single, potentially pre-existing table. --- apps/emqx/src/emqx_topic_index.erl | 156 ++++++++++++++++++++++ apps/emqx/test/emqx_topic_index_SUITE.erl | 143 ++++++++++++++++++++ 2 files changed, 299 insertions(+) create mode 100644 apps/emqx/src/emqx_topic_index.erl create mode 100644 apps/emqx/test/emqx_topic_index_SUITE.erl diff --git a/apps/emqx/src/emqx_topic_index.erl b/apps/emqx/src/emqx_topic_index.erl new file mode 100644 index 000000000..9f0b5fba1 --- /dev/null +++ b/apps/emqx/src/emqx_topic_index.erl @@ -0,0 +1,156 @@ +%%-------------------------------------------------------------------- +%% Copyright (c) 2023 EMQ Technologies Co., Ltd. All Rights Reserved. +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. +%%-------------------------------------------------------------------- + +%% @doc Topic index for matching topics to topic filters. +%% +%% Works on top of ETS ordered_set table. Keys are parsed topic filters +%% with record ID appended to the end, wrapped in a tuple to disambiguate from +%% topic filter words. Existing table may be used if existing keys will not +%% collide with index keys. +%% +%% Designed to effectively answer questions like: +%% 1. Does any topic filter match given topic? +%% 2. Which records are associated with topic filters matching given topic? +%% +%% Questions like these are _only slightly_ less effective: +%% 1. Which topic filters match given topic? +%% 2. Which record IDs are associated with topic filters matching given topic? + +-module(emqx_topic_index). + +-export([new/0]). +-export([insert/4]). +-export([delete/3]). +-export([match/2]). +-export([matches/2]). + +-export([get_id/1]). +-export([get_topic/1]). +-export([get_record/2]). + +-type key(ID) :: [binary() | '+' | '#' | {ID}]. +-type match(ID) :: key(ID). + +new() -> + ets:new(?MODULE, [public, ordered_set, {write_concurrency, true}]). + +insert(Filter, ID, Record, Tab) -> + ets:insert(Tab, {emqx_topic:words(Filter) ++ [{ID}], Record}). + +delete(Filter, ID, Tab) -> + ets:delete(Tab, emqx_topic:words(Filter) ++ [{ID}]). + +-spec match(emqx_types:topic(), ets:table()) -> match(_ID) | false. +match(Topic, Tab) -> + {Words, RPrefix} = match_init(Topic), + match(Words, RPrefix, Tab). + +match(Words, RPrefix, Tab) -> + Prefix = lists:reverse(RPrefix), + K = ets:next(Tab, Prefix), + case match_filter(Prefix, K, Words =/= []) of + true -> + K; + stop -> + false; + Matched -> + match_rest(Matched, Words, RPrefix, Tab) + end. + +match_rest(false, [W | Rest], RPrefix, Tab) -> + match(Rest, [W | RPrefix], Tab); +match_rest(plus, [W | Rest], RPrefix, Tab) -> + case match(Rest, ['+' | RPrefix], Tab) of + Match when is_list(Match) -> + Match; + false -> + match(Rest, [W | RPrefix], Tab) + end; +match_rest(_, [], _RPrefix, _Tab) -> + false. + +-spec matches(emqx_types:topic(), ets:table()) -> [match(_ID)]. +matches(Topic, Tab) -> + {Words, RPrefix} = match_init(Topic), + matches(Words, RPrefix, Tab). + +matches(Words, RPrefix, Tab) -> + Prefix = lists:reverse(RPrefix), + matches(ets:next(Tab, Prefix), Prefix, Words, RPrefix, Tab). + +matches(K, Prefix, Words, RPrefix, Tab) -> + case match_filter(Prefix, K, Words =/= []) of + true -> + [K | matches(ets:next(Tab, K), Prefix, Words, RPrefix, Tab)]; + stop -> + []; + Matched -> + matches_rest(Matched, Words, RPrefix, Tab) + end. + +matches_rest(false, [W | Rest], RPrefix, Tab) -> + matches(Rest, [W | RPrefix], Tab); +matches_rest(plus, [W | Rest], RPrefix, Tab) -> + matches(Rest, ['+' | RPrefix], Tab) ++ matches(Rest, [W | RPrefix], Tab); +matches_rest(_, [], _RPrefix, _Tab) -> + []. + +match_filter([], [{_ID}], _IsPrefix = false) -> + % NOTE: exact match is `true` only if we match whole topic, not prefix + true; +match_filter([], ['#', {_ID}], _IsPrefix) -> + % NOTE: naturally, '#' < '+', so this is already optimal for `match/2` + true; +match_filter([], ['+' | _], _) -> + plus; +match_filter([], [_H | _], _) -> + false; +match_filter([H | T1], [H | T2], IsPrefix) -> + match_filter(T1, T2, IsPrefix); +match_filter([H1 | _], [H2 | _], _) when H2 > H1 -> + % NOTE: we're strictly past the prefix, no need to continue + stop; +match_filter(_, '$end_of_table', _) -> + stop. + +match_init(Topic) -> + case emqx_topic:words(Topic) of + [W = <<"$", _/bytes>> | Rest] -> + % NOTE + % This will effectively skip attempts to match special topics to `#` or `+/...`. + {Rest, [W]}; + Words -> + {Words, []} + end. + +-spec get_id(match(ID)) -> ID. +get_id([{ID}]) -> + ID; +get_id([_ | Rest]) -> + get_id(Rest). + +-spec get_topic(match(_ID)) -> emqx_types:topic(). +get_topic(K) -> + emqx_topic:join(cut_topic(K)). + +cut_topic([{_ID}]) -> + []; +cut_topic([W | Rest]) -> + [W | cut_topic(Rest)]. + +-spec get_record(match(_ID), ets:table()) -> _Record. +get_record(K, Tab) -> + ets:lookup_element(Tab, K, 2). diff --git a/apps/emqx/test/emqx_topic_index_SUITE.erl b/apps/emqx/test/emqx_topic_index_SUITE.erl new file mode 100644 index 000000000..98bfe48a1 --- /dev/null +++ b/apps/emqx/test/emqx_topic_index_SUITE.erl @@ -0,0 +1,143 @@ +%%-------------------------------------------------------------------- +%% Copyright (c) 2023 EMQ Technologies Co., Ltd. All Rights Reserved. +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. +%%-------------------------------------------------------------------- + +-module(emqx_topic_index_SUITE). + +-compile(export_all). +-compile(nowarn_export_all). + +-include_lib("eunit/include/eunit.hrl"). + +all() -> + emqx_common_test_helpers:all(?MODULE). + +t_insert(_) -> + Tab = emqx_topic_index:new(), + true = emqx_topic_index:insert(<<"sensor/1/metric/2">>, t_insert_1, <<>>, Tab), + true = emqx_topic_index:insert(<<"sensor/+/#">>, t_insert_2, <<>>, Tab), + true = emqx_topic_index:insert(<<"sensor/#">>, t_insert_3, <<>>, Tab), + ?assertEqual(<<"sensor/#">>, topic(match(<<"sensor">>, Tab))), + ?assertEqual(t_insert_3, id(match(<<"sensor">>, Tab))). + +t_match(_) -> + Tab = emqx_topic_index:new(), + true = emqx_topic_index:insert(<<"sensor/1/metric/2">>, t_match_1, <<>>, Tab), + true = emqx_topic_index:insert(<<"sensor/+/#">>, t_match_2, <<>>, Tab), + true = emqx_topic_index:insert(<<"sensor/#">>, t_match_3, <<>>, Tab), + ?assertMatch( + [<<"sensor/#">>, <<"sensor/+/#">>], + [topic(M) || M <- matches(<<"sensor/1">>, Tab)] + ). + +t_match2(_) -> + Tab = emqx_topic_index:new(), + true = emqx_topic_index:insert(<<"#">>, t_match2_1, <<>>, Tab), + true = emqx_topic_index:insert(<<"+/#">>, t_match2_2, <<>>, Tab), + true = emqx_topic_index:insert(<<"+/+/#">>, t_match2_3, <<>>, Tab), + ?assertEqual( + [<<"#">>, <<"+/#">>, <<"+/+/#">>], + [topic(M) || M <- matches(<<"a/b/c">>, Tab)] + ), + ?assertEqual( + false, + emqx_topic_index:match(<<"$SYS/broker/zenmq">>, Tab) + ). + +t_match3(_) -> + Tab = emqx_topic_index:new(), + Records = [ + {<<"d/#">>, t_match3_1}, + {<<"a/b/+">>, t_match3_2}, + {<<"a/#">>, t_match3_3}, + {<<"#">>, t_match3_4}, + {<<"$SYS/#">>, t_match3_sys} + ], + lists:foreach( + fun({Topic, ID}) -> emqx_topic_index:insert(Topic, ID, <<>>, Tab) end, + Records + ), + Matched = matches(<<"a/b/c">>, Tab), + case length(Matched) of + 3 -> ok; + _ -> error({unexpected, Matched}) + end, + ?assertEqual( + t_match3_sys, + id(match(<<"$SYS/a/b/c">>, Tab)) + ). + +t_match4(_) -> + Tab = emqx_topic_index:new(), + Records = [{<<"/#">>, t_match4_1}, {<<"/+">>, t_match4_2}, {<<"/+/a/b/c">>, t_match4_3}], + lists:foreach( + fun({Topic, ID}) -> emqx_topic_index:insert(Topic, ID, <<>>, Tab) end, + Records + ), + ?assertEqual( + [<<"/#">>, <<"/+">>], + [topic(M) || M <- matches(<<"/">>, Tab)] + ), + ?assertEqual( + [<<"/#">>, <<"/+/a/b/c">>], + [topic(M) || M <- matches(<<"/0/a/b/c">>, Tab)] + ). + +t_match5(_) -> + Tab = emqx_topic_index:new(), + T = <<"a/b/c/d/e/f/g/h/i/j/k/l/m/n/o/p/q/r/s/t/u/v/w/x/y/z">>, + Records = [ + {<<"#">>, t_match5_1}, + {<>, t_match5_2}, + {<>, t_match5_3} + ], + lists:foreach( + fun({Topic, ID}) -> emqx_topic_index:insert(Topic, ID, <<>>, Tab) end, + Records + ), + ?assertEqual( + [<<"#">>, <>], + [topic(M) || M <- matches(T, Tab)] + ), + ?assertEqual( + [<<"#">>, <>, <>], + [topic(M) || M <- matches(<>, Tab)] + ). + +t_match6(_) -> + Tab = emqx_topic_index:new(), + T = <<"a/b/c/d/e/f/g/h/i/j/k/l/m/n/o/p/q/r/s/t/u/v/w/x/y/z">>, + W = <<"+/+/+/+/+/+/+/+/+/+/+/+/+/+/+/+/+/+/+/+/+/+/+/+/+/+/#">>, + emqx_topic_index:insert(W, ID = t_match6, <<>>, Tab), + ?assertEqual(ID, id(match(T, Tab))). + +t_match7(_) -> + Tab = emqx_topic_index:new(), + T = <<"a/b/c/d/e/f/g/h/i/j/k/l/m/n/o/p/q/r/s/t/u/v/w/x/y/z">>, + W = <<"a/+/c/+/e/+/g/+/i/+/k/+/m/+/o/+/q/+/s/+/u/+/w/+/y/+/#">>, + emqx_topic_index:insert(W, t_match7, <<>>, Tab), + ?assertEqual(W, topic(match(T, Tab))). + +match(T, Tab) -> + emqx_topic_index:match(T, Tab). + +matches(T, Tab) -> + lists:sort(emqx_topic_index:matches(T, Tab)). + +id(Match) -> + emqx_topic_index:get_id(Match). + +topic(Match) -> + emqx_topic_index:get_topic(Match). From 28bcb394d150d066e30bb78d7be3ecdf375b6681 Mon Sep 17 00:00:00 2001 From: Andrew Mayorov Date: Fri, 21 Jul 2023 20:06:46 +0200 Subject: [PATCH 02/12] fix(topicidx): allow to return matches unique by record id --- apps/emqx/src/emqx_topic_index.erl | 132 ++++++++++++---------- apps/emqx/test/emqx_topic_index_SUITE.erl | 16 ++- 2 files changed, 88 insertions(+), 60 deletions(-) diff --git a/apps/emqx/src/emqx_topic_index.erl b/apps/emqx/src/emqx_topic_index.erl index 9f0b5fba1..ac4901a09 100644 --- a/apps/emqx/src/emqx_topic_index.erl +++ b/apps/emqx/src/emqx_topic_index.erl @@ -16,18 +16,16 @@ %% @doc Topic index for matching topics to topic filters. %% -%% Works on top of ETS ordered_set table. Keys are parsed topic filters -%% with record ID appended to the end, wrapped in a tuple to disambiguate from -%% topic filter words. Existing table may be used if existing keys will not -%% collide with index keys. +%% Works on top of ETS ordered_set table. Keys are tuples constructed from +%% parsed topic filters and record IDs, wrapped in a tuple to order them +%% strictly greater than unit tuple (`{}`). Existing table may be used if +%% existing keys will not collide with index keys. %% %% Designed to effectively answer questions like: %% 1. Does any topic filter match given topic? %% 2. Which records are associated with topic filters matching given topic? -%% -%% Questions like these are _only slightly_ less effective: -%% 1. Which topic filters match given topic? -%% 2. Which record IDs are associated with topic filters matching given topic? +%% 3. Which topic filters match given topic? +%% 4. Which record IDs are associated with topic filters matching given topic? -module(emqx_topic_index). @@ -35,23 +33,23 @@ -export([insert/4]). -export([delete/3]). -export([match/2]). --export([matches/2]). +-export([matches/3]). -export([get_id/1]). -export([get_topic/1]). -export([get_record/2]). --type key(ID) :: [binary() | '+' | '#' | {ID}]. +-type key(ID) :: {[binary() | '+' | '#'], {ID}}. -type match(ID) :: key(ID). new() -> ets:new(?MODULE, [public, ordered_set, {write_concurrency, true}]). insert(Filter, ID, Record, Tab) -> - ets:insert(Tab, {emqx_topic:words(Filter) ++ [{ID}], Record}). + ets:insert(Tab, {{emqx_topic:words(Filter), {ID}}, Record}). delete(Filter, ID, Tab) -> - ets:delete(Tab, emqx_topic:words(Filter) ++ [{ID}]). + ets:delete(Tab, {emqx_topic:words(Filter), {ID}}). -spec match(emqx_types:topic(), ets:table()) -> match(_ID) | false. match(Topic, Tab) -> @@ -60,8 +58,8 @@ match(Topic, Tab) -> match(Words, RPrefix, Tab) -> Prefix = lists:reverse(RPrefix), - K = ets:next(Tab, Prefix), - case match_filter(Prefix, K, Words =/= []) of + K = ets:next(Tab, {Prefix, {}}), + case match_filter(Prefix, K, Words == []) of true -> K; stop -> @@ -74,7 +72,7 @@ match_rest(false, [W | Rest], RPrefix, Tab) -> match(Rest, [W | RPrefix], Tab); match_rest(plus, [W | Rest], RPrefix, Tab) -> case match(Rest, ['+' | RPrefix], Tab) of - Match when is_list(Match) -> + Match = {_, _} -> Match; false -> match(Rest, [W | RPrefix], Tab) @@ -82,48 +80,71 @@ match_rest(plus, [W | Rest], RPrefix, Tab) -> match_rest(_, [], _RPrefix, _Tab) -> false. --spec matches(emqx_types:topic(), ets:table()) -> [match(_ID)]. -matches(Topic, Tab) -> +-spec matches(emqx_types:topic(), ets:table(), _Opts :: [unique]) -> [match(_ID)]. +matches(Topic, Tab, Opts) -> {Words, RPrefix} = match_init(Topic), - matches(Words, RPrefix, Tab). - -matches(Words, RPrefix, Tab) -> - Prefix = lists:reverse(RPrefix), - matches(ets:next(Tab, Prefix), Prefix, Words, RPrefix, Tab). - -matches(K, Prefix, Words, RPrefix, Tab) -> - case match_filter(Prefix, K, Words =/= []) of - true -> - [K | matches(ets:next(Tab, K), Prefix, Words, RPrefix, Tab)]; - stop -> - []; - Matched -> - matches_rest(Matched, Words, RPrefix, Tab) + AccIn = + case Opts of + [unique | _] -> #{}; + [] -> [] + end, + Matches = matches(Words, RPrefix, AccIn, Tab), + case Matches of + #{} -> maps:values(Matches); + _ -> Matches end. -matches_rest(false, [W | Rest], RPrefix, Tab) -> - matches(Rest, [W | RPrefix], Tab); -matches_rest(plus, [W | Rest], RPrefix, Tab) -> - matches(Rest, ['+' | RPrefix], Tab) ++ matches(Rest, [W | RPrefix], Tab); -matches_rest(_, [], _RPrefix, _Tab) -> - []. +matches(Words, RPrefix, Acc, Tab) -> + Prefix = lists:reverse(RPrefix), + matches(ets:next(Tab, {Prefix, {}}), Prefix, Words, RPrefix, Acc, Tab). -match_filter([], [{_ID}], _IsPrefix = false) -> - % NOTE: exact match is `true` only if we match whole topic, not prefix - true; -match_filter([], ['#', {_ID}], _IsPrefix) -> +matches(K, Prefix, Words, RPrefix, Acc, Tab) -> + case match_filter(Prefix, K, Words == []) of + true -> + matches(ets:next(Tab, K), Prefix, Words, RPrefix, match_add(K, Acc), Tab); + stop -> + Acc; + Matched -> + matches_rest(Matched, Words, RPrefix, Acc, Tab) + end. + +matches_rest(false, [W | Rest], RPrefix, Acc, Tab) -> + matches(Rest, [W | RPrefix], Acc, Tab); +matches_rest(plus, [W | Rest], RPrefix, Acc, Tab) -> + NAcc = matches(Rest, ['+' | RPrefix], Acc, Tab), + matches(Rest, [W | RPrefix], NAcc, Tab); +matches_rest(_, [], _RPrefix, Acc, _Tab) -> + Acc. + +match_add(K = {_Filter, ID}, Acc = #{}) -> + Acc#{ID => K}; +match_add(K, Acc) -> + [K | Acc]. + +match_filter(Prefix, {Filter, _ID}, NotPrefix) -> + case match_filter(Prefix, Filter) of + exact -> + % NOTE: exact match is `true` only if we match whole topic, not prefix + NotPrefix; + Match -> + Match + end; +match_filter(_, '$end_of_table', _) -> + stop. + +match_filter([], []) -> + exact; +match_filter([], ['#']) -> % NOTE: naturally, '#' < '+', so this is already optimal for `match/2` true; -match_filter([], ['+' | _], _) -> +match_filter([], ['+' | _]) -> plus; -match_filter([], [_H | _], _) -> +match_filter([], [_H | _]) -> false; -match_filter([H | T1], [H | T2], IsPrefix) -> - match_filter(T1, T2, IsPrefix); -match_filter([H1 | _], [H2 | _], _) when H2 > H1 -> +match_filter([H | T1], [H | T2]) -> + match_filter(T1, T2); +match_filter([H1 | _], [H2 | _]) when H2 > H1 -> % NOTE: we're strictly past the prefix, no need to continue - stop; -match_filter(_, '$end_of_table', _) -> stop. match_init(Topic) -> @@ -137,19 +158,12 @@ match_init(Topic) -> end. -spec get_id(match(ID)) -> ID. -get_id([{ID}]) -> - ID; -get_id([_ | Rest]) -> - get_id(Rest). +get_id({_Filter, {ID}}) -> + ID. -spec get_topic(match(_ID)) -> emqx_types:topic(). -get_topic(K) -> - emqx_topic:join(cut_topic(K)). - -cut_topic([{_ID}]) -> - []; -cut_topic([W | Rest]) -> - [W | cut_topic(Rest)]. +get_topic({Filter, _ID}) -> + emqx_topic:join(Filter). -spec get_record(match(_ID), ets:table()) -> _Record. get_record(K, Tab) -> diff --git a/apps/emqx/test/emqx_topic_index_SUITE.erl b/apps/emqx/test/emqx_topic_index_SUITE.erl index 98bfe48a1..b5faba4d9 100644 --- a/apps/emqx/test/emqx_topic_index_SUITE.erl +++ b/apps/emqx/test/emqx_topic_index_SUITE.erl @@ -130,11 +130,25 @@ t_match7(_) -> emqx_topic_index:insert(W, t_match7, <<>>, Tab), ?assertEqual(W, topic(match(T, Tab))). +t_match_unique(_) -> + Tab = emqx_topic_index:new(), + emqx_topic_index:insert(<<"a/b/c">>, t_match_id1, <<>>, Tab), + emqx_topic_index:insert(<<"a/b/+">>, t_match_id1, <<>>, Tab), + emqx_topic_index:insert(<<"a/b/c/+">>, t_match_id2, <<>>, Tab), + ?assertEqual( + [t_match_id1, t_match_id1], + [id(M) || M <- emqx_topic_index:matches(<<"a/b/c">>, Tab, [])] + ), + ?assertEqual( + [t_match_id1], + [id(M) || M <- emqx_topic_index:matches(<<"a/b/c">>, Tab, [unique])] + ). + match(T, Tab) -> emqx_topic_index:match(T, Tab). matches(T, Tab) -> - lists:sort(emqx_topic_index:matches(T, Tab)). + lists:sort(emqx_topic_index:matches(T, Tab, [])). id(Match) -> emqx_topic_index:get_id(Match). From 6a1340636369100a29188f9a63a0cb49a5b0b3dc Mon Sep 17 00:00:00 2001 From: Andrew Mayorov Date: Fri, 4 Aug 2023 16:18:07 +0400 Subject: [PATCH 03/12] fix(topicidx): use custom topic words to keep required ordering Otherwise, topic with empty tokens (e.g. `a/b///c`) would have some of their tokens ordered before `#` / `+`, because empty token was represented as empty atom (`''`). --- apps/emqx/src/emqx_topic_index.erl | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/apps/emqx/src/emqx_topic_index.erl b/apps/emqx/src/emqx_topic_index.erl index ac4901a09..48c685f04 100644 --- a/apps/emqx/src/emqx_topic_index.erl +++ b/apps/emqx/src/emqx_topic_index.erl @@ -39,17 +39,18 @@ -export([get_topic/1]). -export([get_record/2]). --type key(ID) :: {[binary() | '+' | '#'], {ID}}. +-type word() :: binary() | '+' | '#'. +-type key(ID) :: {[word()], {ID}}. -type match(ID) :: key(ID). new() -> - ets:new(?MODULE, [public, ordered_set, {write_concurrency, true}]). + ets:new(?MODULE, [public, ordered_set, {read_concurrency, true}]). insert(Filter, ID, Record, Tab) -> - ets:insert(Tab, {{emqx_topic:words(Filter), {ID}}, Record}). + ets:insert(Tab, {{words(Filter), {ID}}, Record}). delete(Filter, ID, Tab) -> - ets:delete(Tab, {emqx_topic:words(Filter), {ID}}). + ets:delete(Tab, {words(Filter), {ID}}). -spec match(emqx_types:topic(), ets:table()) -> match(_ID) | false. match(Topic, Tab) -> @@ -148,7 +149,7 @@ match_filter([H1 | _], [H2 | _]) when H2 > H1 -> stop. match_init(Topic) -> - case emqx_topic:words(Topic) of + case words(Topic) of [W = <<"$", _/bytes>> | Rest] -> % NOTE % This will effectively skip attempts to match special topics to `#` or `+/...`. @@ -168,3 +169,14 @@ get_topic({Filter, _ID}) -> -spec get_record(match(_ID), ets:table()) -> _Record. get_record(K, Tab) -> ets:lookup_element(Tab, K, 2). + +%% + +-spec words(emqx_types:topic()) -> [word()]. +words(Topic) when is_binary(Topic) -> + [word(W) || W <- emqx_topic:tokens(Topic)]. + +-spec word(binary()) -> word(). +word(<<"+">>) -> '+'; +word(<<"#">>) -> '#'; +word(Bin) -> Bin. From 48a50c9137a89fbe6fd55a94dec8987ff5822b97 Mon Sep 17 00:00:00 2001 From: Andrew Mayorov Date: Fri, 4 Aug 2023 16:24:13 +0400 Subject: [PATCH 04/12] fix(topicidx): fix missing matches when 'a/b' and 'a/b/#' both exist Thanks to @HJianBo for spotting this issue. The approach to fix it is different though: we try to keep the "recurrency branch factor" to a minimum, instead introducing new condition for the case when filter does not match, but iteration with `ets:next/2` is not yet finished for the prefix. Co-Authored-By: JianBo He --- apps/emqx/src/emqx_topic_index.erl | 43 ++++++++++++----------- apps/emqx/test/emqx_topic_index_SUITE.erl | 18 ++++++++++ 2 files changed, 41 insertions(+), 20 deletions(-) diff --git a/apps/emqx/src/emqx_topic_index.erl b/apps/emqx/src/emqx_topic_index.erl index 48c685f04..29d8694af 100644 --- a/apps/emqx/src/emqx_topic_index.erl +++ b/apps/emqx/src/emqx_topic_index.erl @@ -59,10 +59,14 @@ match(Topic, Tab) -> match(Words, RPrefix, Tab) -> Prefix = lists:reverse(RPrefix), - K = ets:next(Tab, {Prefix, {}}), - case match_filter(Prefix, K, Words == []) of + match(ets:next(Tab, {Prefix, {}}), Prefix, Words, RPrefix, Tab). + +match(K, Prefix, Words, RPrefix, Tab) -> + case match_next(Prefix, K, Words) of true -> K; + skip -> + match(ets:next(Tab, K), Prefix, Words, RPrefix, Tab); stop -> false; Matched -> @@ -100,9 +104,11 @@ matches(Words, RPrefix, Acc, Tab) -> matches(ets:next(Tab, {Prefix, {}}), Prefix, Words, RPrefix, Acc, Tab). matches(K, Prefix, Words, RPrefix, Acc, Tab) -> - case match_filter(Prefix, K, Words == []) of + case match_next(Prefix, K, Words) of true -> matches(ets:next(Tab, K), Prefix, Words, RPrefix, match_add(K, Acc), Tab); + skip -> + matches(ets:next(Tab, K), Prefix, Words, RPrefix, Acc, Tab); stop -> Acc; Matched -> @@ -122,29 +128,26 @@ match_add(K = {_Filter, ID}, Acc = #{}) -> match_add(K, Acc) -> [K | Acc]. -match_filter(Prefix, {Filter, _ID}, NotPrefix) -> - case match_filter(Prefix, Filter) of - exact -> - % NOTE: exact match is `true` only if we match whole topic, not prefix - NotPrefix; - Match -> - Match - end; -match_filter(_, '$end_of_table', _) -> +match_next(Prefix, {Filter, _ID}, Suffix) -> + match_filter(Prefix, Filter, Suffix); +match_next(_, '$end_of_table', _) -> stop. -match_filter([], []) -> - exact; -match_filter([], ['#']) -> +match_filter([], [], []) -> + true; +match_filter([], [], _Suffix) -> + % NOTE: we matched the prefix, but there may be more matches next + skip; +match_filter([], ['#'], _Suffix) -> % NOTE: naturally, '#' < '+', so this is already optimal for `match/2` true; -match_filter([], ['+' | _]) -> +match_filter([], ['+' | _], _Suffix) -> plus; -match_filter([], [_H | _]) -> +match_filter([], [_H | _], _Suffix) -> false; -match_filter([H | T1], [H | T2]) -> - match_filter(T1, T2); -match_filter([H1 | _], [H2 | _]) when H2 > H1 -> +match_filter([H | T1], [H | T2], Suffix) -> + match_filter(T1, T2, Suffix); +match_filter([H1 | _], [H2 | _], _Suffix) when H2 > H1 -> % NOTE: we're strictly past the prefix, no need to continue stop. diff --git a/apps/emqx/test/emqx_topic_index_SUITE.erl b/apps/emqx/test/emqx_topic_index_SUITE.erl index b5faba4d9..75e1adaf6 100644 --- a/apps/emqx/test/emqx_topic_index_SUITE.erl +++ b/apps/emqx/test/emqx_topic_index_SUITE.erl @@ -144,6 +144,24 @@ t_match_unique(_) -> [id(M) || M <- emqx_topic_index:matches(<<"a/b/c">>, Tab, [unique])] ). +t_match_wildcards(_) -> + Tab = emqx_topic_index:new(), + emqx_topic_index:insert(<<"a/b">>, id1, <<>>, Tab), + emqx_topic_index:insert(<<"a/b/#">>, id2, <<>>, Tab), + emqx_topic_index:insert(<<"a/b/#">>, id3, <<>>, Tab), + emqx_topic_index:insert(<<"a/b/c">>, id4, <<>>, Tab), + emqx_topic_index:insert(<<"a/b/+">>, id5, <<>>, Tab), + emqx_topic_index:insert(<<"a/b/d">>, id6, <<>>, Tab), + emqx_topic_index:insert(<<"a/+/+">>, id7, <<>>, Tab), + emqx_topic_index:insert(<<"a/+/#">>, id8, <<>>, Tab), + + Records = [id(M) || M <- matches(<<"a/b/c">>, Tab)], + ?assertEqual([id2, id3, id4, id5, id7, id8], lists:sort(Records)), + + Records1 = [id(M) || M <- matches(<<"a/b">>, Tab)], + ?assertEqual([id1, id2, id3, id8], lists:sort(Records1)), + ok. + match(T, Tab) -> emqx_topic_index:match(T, Tab). From 0c7bdbdab45ec2f9ecb5dc3f4f390afc5eae60f4 Mon Sep 17 00:00:00 2001 From: Andrew Mayorov Date: Fri, 4 Aug 2023 18:49:07 +0400 Subject: [PATCH 05/12] test(topicidx): add property test Co-Authored-By: JianBo He --- apps/emqx/test/emqx_topic_index_SUITE.erl | 174 ++++++++++++++++++++-- 1 file changed, 159 insertions(+), 15 deletions(-) diff --git a/apps/emqx/test/emqx_topic_index_SUITE.erl b/apps/emqx/test/emqx_topic_index_SUITE.erl index 75e1adaf6..f2b263a5a 100644 --- a/apps/emqx/test/emqx_topic_index_SUITE.erl +++ b/apps/emqx/test/emqx_topic_index_SUITE.erl @@ -19,8 +19,11 @@ -compile(export_all). -compile(nowarn_export_all). +-include_lib("proper/include/proper.hrl"). -include_lib("eunit/include/eunit.hrl"). +-import(emqx_proper_types, [scaled/2]). + all() -> emqx_common_test_helpers:all(?MODULE). @@ -144,23 +147,122 @@ t_match_unique(_) -> [id(M) || M <- emqx_topic_index:matches(<<"a/b/c">>, Tab, [unique])] ). -t_match_wildcards(_) -> - Tab = emqx_topic_index:new(), - emqx_topic_index:insert(<<"a/b">>, id1, <<>>, Tab), - emqx_topic_index:insert(<<"a/b/#">>, id2, <<>>, Tab), - emqx_topic_index:insert(<<"a/b/#">>, id3, <<>>, Tab), - emqx_topic_index:insert(<<"a/b/c">>, id4, <<>>, Tab), - emqx_topic_index:insert(<<"a/b/+">>, id5, <<>>, Tab), - emqx_topic_index:insert(<<"a/b/d">>, id6, <<>>, Tab), - emqx_topic_index:insert(<<"a/+/+">>, id7, <<>>, Tab), - emqx_topic_index:insert(<<"a/+/#">>, id8, <<>>, Tab), +t_match_wildcard_edge_cases(_) -> + CommonTopics = [ + <<"a/b">>, + <<"a/b/#">>, + <<"a/b/#">>, + <<"a/b/c">>, + <<"a/b/+">>, + <<"a/b/d">>, + <<"a/+/+">>, + <<"a/+/#">> + ], + Datasets = + [ + %% Topics, TopicName, Results + {CommonTopics, <<"a/b/c">>, [2, 3, 4, 5, 7, 8]}, + {CommonTopics, <<"a/b">>, [1, 2, 3, 8]}, + {[<<"+/b/c">>, <<"/">>], <<"a/b/c">>, [1]}, + {[<<"#">>, <<"/">>], <<"a">>, [1]}, + {[<<"/">>, <<"+">>], <<"a">>, [2]} + ], + F = fun({Topics, TopicName, Expected}) -> + Tab = emqx_topic_index:new(), + _ = [emqx_topic_index:insert(T, N, <<>>, Tab) || {N, T} <- lists:enumerate(Topics)], + Results = [id(M) || M <- emqx_topic_index:matches(TopicName, Tab, [unique])], + ?assertEqual( + Expected, + Results, + #{ + "Base topics" => Topics, + "Topic name" => TopicName + } + ) + end, + lists:foreach(F, Datasets). - Records = [id(M) || M <- matches(<<"a/b/c">>, Tab)], - ?assertEqual([id2, id3, id4, id5, id7, id8], lists:sort(Records)), +t_prop_matches(_) -> + ?assert( + proper:quickcheck( + topic_matches_prop(), + [{max_size, 100}, {numtests, 100}] + ) + ), + Statistics = [{C, account(C)} || C <- [filters, topics, matches, maxhits]], + ct:pal("Statistics: ~p", [maps:from_list(Statistics)]). - Records1 = [id(M) || M <- matches(<<"a/b">>, Tab)], - ?assertEqual([id1, id2, id3, id8], lists:sort(Records1)), - ok. +topic_matches_prop() -> + ?FORALL( + % Generate a longer list of topics and a shorter list of topic filter patterns. + #{ + topics := TTopics, + patterns := Pats + }, + emqx_proper_types:fixedmap(#{ + % NOTE + % Beware adding non-empty contraint, proper will have a hard time with `topic_t/1` + % for some reason. + topics => scaled(4, list(topic_t([1, 2, 3, 4]))), + patterns => list(topic_filter_pattern_t()) + }), + begin + Tab = emqx_topic_index:new(), + Topics = [emqx_topic:join(T) || T <- TTopics], + % Produce topic filters from generated topics and patterns. + % Number of filters is equal to the number of patterns, most of the time. + Filters = lists:enumerate(mk_filters(Pats, TTopics)), + _ = [emqx_topic_index:insert(F, N, <<>>, Tab) || {N, F} <- Filters], + % Gather some basic statistics + _ = account(filters, length(Filters)), + _ = account(topics, NTopics = length(Topics)), + _ = account(maxhits, NTopics * NTopics), + % Verify that matching each topic against index returns the same results as + % matching it against the list of filters one by one. + lists:all( + fun(Topic) -> + Ids1 = [id(M) || M <- emqx_topic_index:matches(Topic, Tab, [unique])], + Ids2 = lists:filtermap( + fun({N, F}) -> + case emqx_topic:match(Topic, F) of + true -> {true, N}; + false -> false + end + end, + Filters + ), + % Account a number of matches to compute hitrate later + _ = account(matches, length(Ids1)), + case (Ids2 -- Ids1) ++ (Ids2 -- Ids1) of + [] -> + true; + [_ | _] = _Differences -> + ct:pal( + "Topic name: ~p~n" + "Index results: ~p~n" + "Topic match results:: ~p~n", + [Topic, Ids1, Ids2] + ), + false + end + end, + Topics + ) + end + ). + +mk_filters([Pat | PRest], [Topic | TRest]) -> + [emqx_topic:join(mk_topic_filter(Pat, Topic)) | mk_filters(PRest, TRest)]; +mk_filters(_, _) -> + []. + +account(Counter, N) -> + put({?MODULE, Counter}, account(Counter) + N). + +account(Counter) -> + emqx_maybe:define(get({?MODULE, Counter}), 0). + +%% match(T, Tab) -> emqx_topic_index:match(T, Tab). @@ -173,3 +275,45 @@ id(Match) -> topic(Match) -> emqx_topic_index:get_topic(Match). + +%% + +topic_t(EntropyWeights) -> + EWLast = lists:last(EntropyWeights), + ?LET(L, scaled(1 / 4, list(EWLast)), begin + EWs = lists:sublist(EntropyWeights ++ L, length(L)), + ?SIZED(S, [oneof([topic_level_t(S * EW), topic_level_fixed_t()]) || EW <- EWs]) + end). + +topic_level_t(Entropy) -> + S = floor(1 + math:log2(Entropy) / 4), + ?LET(I, range(1, Entropy), iolist_to_binary(io_lib:format("~*.16.0B", [S, I]))). + +topic_level_fixed_t() -> + oneof([ + <<"foo">>, + <<"bar">>, + <<"baz">>, + <<"xyzzy">> + ]). + +topic_filter_pattern_t() -> + list(topic_level_pattern_t()). + +topic_level_pattern_t() -> + frequency([ + {5, level}, + {2, '+'}, + {1, '#'} + ]). + +mk_topic_filter([], _) -> + []; +mk_topic_filter(_, []) -> + []; +mk_topic_filter(['#' | _], _) -> + ['#']; +mk_topic_filter(['+' | Rest], [_ | Levels]) -> + ['+' | mk_topic_filter(Rest, Levels)]; +mk_topic_filter([level | Rest], [L | Levels]) -> + [L | mk_topic_filter(Rest, Levels)]. From fd0986071c082170fa154ce6d7695805ad513515 Mon Sep 17 00:00:00 2001 From: Andrew Mayorov Date: Fri, 4 Aug 2023 19:27:43 +0400 Subject: [PATCH 06/12] perf(topicidx): implement fast-forwarding prefixes This should give less `ets:next/2` calls in general and much less when index has relatively small number of long non-wildcard topics. --- apps/emqx/src/emqx_topic_index.erl | 20 ++++++++++++++++---- apps/emqx/test/emqx_topic_index_SUITE.erl | 10 ++++++++++ 2 files changed, 26 insertions(+), 4 deletions(-) diff --git a/apps/emqx/src/emqx_topic_index.erl b/apps/emqx/src/emqx_topic_index.erl index 29d8694af..44e88e659 100644 --- a/apps/emqx/src/emqx_topic_index.erl +++ b/apps/emqx/src/emqx_topic_index.erl @@ -73,7 +73,13 @@ match(K, Prefix, Words, RPrefix, Tab) -> match_rest(Matched, Words, RPrefix, Tab) end. -match_rest(false, [W | Rest], RPrefix, Tab) -> +match_rest([W1 | [W2 | _] = SLast], [W1 | [W2 | _] = Rest], RPrefix, Tab) -> + % NOTE + % Fast-forward through identical words in the topic and the last key suffixes. + % This should save us a few redundant `ets:next` calls at the cost of slightly + % more complex match patterns. + match_rest(SLast, Rest, [W1 | RPrefix], Tab); +match_rest(SLast, [W | Rest], RPrefix, Tab) when is_list(SLast) -> match(Rest, [W | RPrefix], Tab); match_rest(plus, [W | Rest], RPrefix, Tab) -> case match(Rest, ['+' | RPrefix], Tab) of @@ -115,7 +121,13 @@ matches(K, Prefix, Words, RPrefix, Acc, Tab) -> matches_rest(Matched, Words, RPrefix, Acc, Tab) end. -matches_rest(false, [W | Rest], RPrefix, Acc, Tab) -> +matches_rest([W1 | [W2 | _] = SLast], [W1 | [W2 | _] = Rest], RPrefix, Acc, Tab) -> + % NOTE + % Fast-forward through identical words in the topic and the last key suffixes. + % This should save us a few redundant `ets:next` calls at the cost of slightly + % more complex match patterns. + matches_rest(SLast, Rest, [W1 | RPrefix], Acc, Tab); +matches_rest(SLast, [W | Rest], RPrefix, Acc, Tab) when is_list(SLast) -> matches(Rest, [W | RPrefix], Acc, Tab); matches_rest(plus, [W | Rest], RPrefix, Acc, Tab) -> NAcc = matches(Rest, ['+' | RPrefix], Acc, Tab), @@ -143,8 +155,8 @@ match_filter([], ['#'], _Suffix) -> true; match_filter([], ['+' | _], _Suffix) -> plus; -match_filter([], [_H | _], _Suffix) -> - false; +match_filter([], [_H | _] = Rest, _Suffix) -> + Rest; match_filter([H | T1], [H | T2], Suffix) -> match_filter(T1, T2, Suffix); match_filter([H1 | _], [H2 | _], _Suffix) when H2 > H1 -> diff --git a/apps/emqx/test/emqx_topic_index_SUITE.erl b/apps/emqx/test/emqx_topic_index_SUITE.erl index f2b263a5a..80ca536b4 100644 --- a/apps/emqx/test/emqx_topic_index_SUITE.erl +++ b/apps/emqx/test/emqx_topic_index_SUITE.erl @@ -133,6 +133,16 @@ t_match7(_) -> emqx_topic_index:insert(W, t_match7, <<>>, Tab), ?assertEqual(W, topic(match(T, Tab))). +t_match_fast_forward(_) -> + Tab = emqx_topic_index:new(), + emqx_topic_index:insert(<<"a/b/1/2/3/4/5/6/7/8/9/#">>, id1, <<>>, Tab), + emqx_topic_index:insert(<<"z/y/x/+/+">>, id2, <<>>, Tab), + emqx_topic_index:insert(<<"a/b/c/+">>, id3, <<>>, Tab), + % dbg:tracer(), + % dbg:p(all, c), + % dbg:tpl({ets, next, '_'}, x), + ?assertEqual([id1], [id(M) || M <- matches(<<"a/b/1/2/3/4/5/6/7/8/9/0">>, Tab)]). + t_match_unique(_) -> Tab = emqx_topic_index:new(), emqx_topic_index:insert(<<"a/b/c">>, t_match_id1, <<>>, Tab), From 9a249e4b01fc9ab717a1ab02c0a5db255fa87989 Mon Sep 17 00:00:00 2001 From: Andrew Mayorov Date: Mon, 14 Aug 2023 13:11:25 +0400 Subject: [PATCH 07/12] test(topicidx): increase test coverage --- apps/emqx/test/emqx_topic_index_SUITE.erl | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/apps/emqx/test/emqx_topic_index_SUITE.erl b/apps/emqx/test/emqx_topic_index_SUITE.erl index 80ca536b4..ade98acec 100644 --- a/apps/emqx/test/emqx_topic_index_SUITE.erl +++ b/apps/emqx/test/emqx_topic_index_SUITE.erl @@ -141,6 +141,7 @@ t_match_fast_forward(_) -> % dbg:tracer(), % dbg:p(all, c), % dbg:tpl({ets, next, '_'}, x), + ?assertEqual(id1, id(match(<<"a/b/1/2/3/4/5/6/7/8/9/0">>, Tab))), ?assertEqual([id1], [id(M) || M <- matches(<<"a/b/1/2/3/4/5/6/7/8/9/0">>, Tab)]). t_match_unique(_) -> @@ -180,14 +181,15 @@ t_match_wildcard_edge_cases(_) -> F = fun({Topics, TopicName, Expected}) -> Tab = emqx_topic_index:new(), _ = [emqx_topic_index:insert(T, N, <<>>, Tab) || {N, T} <- lists:enumerate(Topics)], - Results = [id(M) || M <- emqx_topic_index:matches(TopicName, Tab, [unique])], + ?assertEqual( + lists:last(Expected), + id(emqx_topic_index:match(TopicName, Tab)), + #{"Base topics" => Topics, "Topic name" => TopicName} + ), ?assertEqual( Expected, - Results, - #{ - "Base topics" => Topics, - "Topic name" => TopicName - } + [id(M) || M <- emqx_topic_index:matches(TopicName, Tab, [unique])], + #{"Base topics" => Topics, "Topic name" => TopicName} ) end, lists:foreach(F, Datasets). From 47dfba4341ea312967e33dd5b21dd549b7319ea8 Mon Sep 17 00:00:00 2001 From: Andrew Mayorov Date: Thu, 20 Jul 2023 14:49:10 +0200 Subject: [PATCH 08/12] perf(ruleeng): employ `emqx_topic_index` to speed up topic matching --- apps/emqx_rule_engine/include/rule_engine.hrl | 1 + .../emqx_rule_engine/src/emqx_rule_engine.erl | 68 +++++++++++++------ .../src/emqx_rule_engine_app.erl | 1 + 3 files changed, 49 insertions(+), 21 deletions(-) diff --git a/apps/emqx_rule_engine/include/rule_engine.hrl b/apps/emqx_rule_engine/include/rule_engine.hrl index b2a6a549e..7df5d9941 100644 --- a/apps/emqx_rule_engine/include/rule_engine.hrl +++ b/apps/emqx_rule_engine/include/rule_engine.hrl @@ -109,6 +109,7 @@ %% Tables -define(RULE_TAB, emqx_rule_engine). +-define(RULE_TOPIC_INDEX, emqx_rule_engine_topic_index). %% Allowed sql function provider modules -define(DEFAULT_SQL_FUNC_PROVIDER, emqx_rule_funcs). diff --git a/apps/emqx_rule_engine/src/emqx_rule_engine.erl b/apps/emqx_rule_engine/src/emqx_rule_engine.erl index 66c82d3a1..41d1ed433 100644 --- a/apps/emqx_rule_engine/src/emqx_rule_engine.erl +++ b/apps/emqx_rule_engine/src/emqx_rule_engine.erl @@ -176,7 +176,7 @@ create_rule(Params) -> create_rule(Params = #{id := RuleId}, CreatedAt) when is_binary(RuleId) -> case get_rule(RuleId) of - not_found -> parse_and_insert(Params, CreatedAt); + not_found -> with_parsed_rule(Params, CreatedAt, fun insert_rule/1); {ok, _} -> {error, already_exists} end. @@ -185,18 +185,27 @@ update_rule(Params = #{id := RuleId}) when is_binary(RuleId) -> case get_rule(RuleId) of not_found -> {error, not_found}; - {ok, #{created_at := CreatedAt}} -> - parse_and_insert(Params, CreatedAt) + {ok, RulePrev = #{created_at := CreatedAt}} -> + with_parsed_rule(Params, CreatedAt, fun(Rule) -> update_rule(Rule, RulePrev) end) end. -spec delete_rule(RuleId :: rule_id()) -> ok. delete_rule(RuleId) when is_binary(RuleId) -> - gen_server:call(?RULE_ENGINE, {delete_rule, RuleId}, ?T_CALL). + case get_rule(RuleId) of + not_found -> + ok; + {ok, Rule} -> + gen_server:call(?RULE_ENGINE, {delete_rule, Rule}, ?T_CALL) + end. -spec insert_rule(Rule :: rule()) -> ok. insert_rule(Rule) -> gen_server:call(?RULE_ENGINE, {insert_rule, Rule}, ?T_CALL). +-spec update_rule(Rule :: rule(), RulePrev :: rule()) -> ok. +update_rule(Rule, RulePrev) -> + gen_server:call(?RULE_ENGINE, {update_rule, Rule, RulePrev}, ?T_CALL). + %%---------------------------------------------------------------------------------------- %% Rule Management %%---------------------------------------------------------------------------------------- @@ -216,9 +225,8 @@ get_rules_ordered_by_ts() -> -spec get_rules_for_topic(Topic :: binary()) -> [rule()]. get_rules_for_topic(Topic) -> [ - Rule - || Rule = #{from := From} <- get_rules(), - emqx_topic:match_any(Topic, From) + emqx_topic_index:get_record(M, ?RULE_TOPIC_INDEX) + || M <- emqx_topic_index:matches(Topic, ?RULE_TOPIC_INDEX, [unique]) ]. -spec get_rules_with_same_event(Topic :: binary()) -> [rule()]. @@ -411,10 +419,17 @@ init([]) -> {ok, #{}}. handle_call({insert_rule, Rule}, _From, State) -> - do_insert_rule(Rule), + ok = do_insert_rule(Rule), + ok = do_update_rule_index(Rule), + {reply, ok, State}; +handle_call({update_rule, Rule, RulePrev}, _From, State) -> + ok = do_delete_rule_index(RulePrev), + ok = do_insert_rule(Rule), + ok = do_update_rule_index(Rule), {reply, ok, State}; handle_call({delete_rule, Rule}, _From, State) -> - do_delete_rule(Rule), + ok = do_delete_rule_index(Rule), + ok = do_delete_rule(Rule), {reply, ok, State}; handle_call(Req, _From, State) -> ?SLOG(error, #{msg => "unexpected_call", request => Req}), @@ -438,7 +453,7 @@ code_change(_OldVsn, State, _Extra) -> %% Internal Functions %%---------------------------------------------------------------------------------------- -parse_and_insert(Params = #{id := RuleId, sql := Sql, actions := Actions}, CreatedAt) -> +with_parsed_rule(Params = #{id := RuleId, sql := Sql, actions := Actions}, CreatedAt, Fun) -> case emqx_rule_sqlparser:parse(Sql) of {ok, Select} -> Rule = #{ @@ -459,7 +474,7 @@ parse_and_insert(Params = #{id := RuleId, sql := Sql, actions := Actions}, Creat conditions => emqx_rule_sqlparser:select_where(Select) %% -- calculated fields end }, - ok = insert_rule(Rule), + ok = Fun(Rule), {ok, Rule}; {error, Reason} -> {error, Reason} @@ -471,16 +486,27 @@ do_insert_rule(#{id := Id} = Rule) -> true = ets:insert(?RULE_TAB, {Id, maps:remove(id, Rule)}), ok. -do_delete_rule(RuleId) -> - case get_rule(RuleId) of - {ok, Rule} -> - ok = unload_hooks_for_rule(Rule), - ok = clear_metrics_for_rule(RuleId), - true = ets:delete(?RULE_TAB, RuleId), - ok; - not_found -> - ok - end. +do_delete_rule(#{id := Id} = Rule) -> + ok = unload_hooks_for_rule(Rule), + ok = clear_metrics_for_rule(Id), + true = ets:delete(?RULE_TAB, Id), + ok. + +do_update_rule_index(#{id := Id, from := From} = Rule) -> + ok = lists:foreach( + fun(Topic) -> + true = emqx_topic_index:insert(Topic, Id, Rule, ?RULE_TOPIC_INDEX) + end, + From + ). + +do_delete_rule_index(#{id := Id, from := From}) -> + ok = lists:foreach( + fun(Topic) -> + true = emqx_topic_index:delete(Topic, Id, ?RULE_TOPIC_INDEX) + end, + From + ). parse_actions(Actions) -> [do_parse_action(Act) || Act <- Actions]. diff --git a/apps/emqx_rule_engine/src/emqx_rule_engine_app.erl b/apps/emqx_rule_engine/src/emqx_rule_engine_app.erl index d8b031bdd..28515cb1a 100644 --- a/apps/emqx_rule_engine/src/emqx_rule_engine_app.erl +++ b/apps/emqx_rule_engine/src/emqx_rule_engine_app.erl @@ -26,6 +26,7 @@ start(_Type, _Args) -> _ = ets:new(?RULE_TAB, [named_table, public, ordered_set, {read_concurrency, true}]), + _ = ets:new(?RULE_TOPIC_INDEX, [named_table, public, ordered_set, {read_concurrency, true}]), ok = emqx_rule_events:reload(), SupRet = emqx_rule_engine_sup:start_link(), ok = emqx_rule_engine:load_rules(), From fe9477f92ea972b6d5b003658c2d4a957679f907 Mon Sep 17 00:00:00 2001 From: Andrew Mayorov Date: Mon, 14 Aug 2023 14:00:23 +0400 Subject: [PATCH 09/12] chore: bump applications versions * emqx_rule_engine 5.0.23 --- apps/emqx_rule_engine/src/emqx_rule_engine.app.src | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/apps/emqx_rule_engine/src/emqx_rule_engine.app.src b/apps/emqx_rule_engine/src/emqx_rule_engine.app.src index 09d57a4f9..e6d00bcae 100644 --- a/apps/emqx_rule_engine/src/emqx_rule_engine.app.src +++ b/apps/emqx_rule_engine/src/emqx_rule_engine.app.src @@ -2,7 +2,7 @@ {application, emqx_rule_engine, [ {description, "EMQX Rule Engine"}, % strict semver, bump manually! - {vsn, "5.0.22"}, + {vsn, "5.0.23"}, {modules, []}, {registered, [emqx_rule_engine_sup, emqx_rule_engine]}, {applications, [kernel, stdlib, rulesql, getopt, emqx_ctl, uuid]}, From d302aaae4cde883a290a4875e21b5e7891a611a3 Mon Sep 17 00:00:00 2001 From: Andrew Mayorov Date: Mon, 14 Aug 2023 15:24:24 +0400 Subject: [PATCH 10/12] chore: add changelog entry --- changes/ce/perf-11396.en.md | 1 + 1 file changed, 1 insertion(+) create mode 100644 changes/ce/perf-11396.en.md diff --git a/changes/ce/perf-11396.en.md b/changes/ce/perf-11396.en.md new file mode 100644 index 000000000..fd8df9a9d --- /dev/null +++ b/changes/ce/perf-11396.en.md @@ -0,0 +1 @@ +Introduce topic index for the rule engine runtime that significantly improves the performance of EMQX with a non-trivial number of rules consuming messages matching different topic filters. From e39bbf4c495c2a16485a1e9e646a2845a2651603 Mon Sep 17 00:00:00 2001 From: Andrew Mayorov Date: Tue, 15 Aug 2023 16:55:48 +0400 Subject: [PATCH 11/12] chore(topicidx): add more descriptive comments and specs To (hopefully) better illustrate what is happening there. --- apps/emqx/src/emqx_topic_index.erl | 33 ++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/apps/emqx/src/emqx_topic_index.erl b/apps/emqx/src/emqx_topic_index.erl index 44e88e659..09e19a9f7 100644 --- a/apps/emqx/src/emqx_topic_index.erl +++ b/apps/emqx/src/emqx_topic_index.erl @@ -43,15 +43,27 @@ -type key(ID) :: {[word()], {ID}}. -type match(ID) :: key(ID). +%% @doc Create a new ETS table suitable for topic index. +%% Usable mostly for testing purposes. +-spec new() -> ets:table(). new() -> ets:new(?MODULE, [public, ordered_set, {read_concurrency, true}]). +%% @doc Insert a new entry into the index that associates given topic filter to given +%% record ID, and attaches arbitrary record to the entry. This allows users to choose +%% between regular and "materialized" indexes, for example. +-spec insert(emqx_types:topic(), _ID, _Record, ets:table()) -> true. insert(Filter, ID, Record, Tab) -> ets:insert(Tab, {{words(Filter), {ID}}, Record}). +%% @doc Delete an entry from the index that associates given topic filter to given +%% record ID. Deleting non-existing entry is not an error. +-spec delete(emqx_types:topic(), _ID, ets:table()) -> true. delete(Filter, ID, Tab) -> ets:delete(Tab, {words(Filter), {ID}}). +%% @doc Match given topic against the index and return the first match, or `false` if +%% no match is found. -spec match(emqx_types:topic(), ets:table()) -> match(_ID) | false. match(Topic, Tab) -> {Words, RPrefix} = match_init(Topic), @@ -82,6 +94,10 @@ match_rest([W1 | [W2 | _] = SLast], [W1 | [W2 | _] = Rest], RPrefix, Tab) -> match_rest(SLast, [W | Rest], RPrefix, Tab) when is_list(SLast) -> match(Rest, [W | RPrefix], Tab); match_rest(plus, [W | Rest], RPrefix, Tab) -> + % NOTE + % There's '+' in the key suffix, meaning we should consider 2 alternatives: + % 1. Match the rest of the topic as if there was '+' in the current position. + % 2. Skip this key and try to match the topic as it is. case match(Rest, ['+' | RPrefix], Tab) of Match = {_, _} -> Match; @@ -91,6 +107,8 @@ match_rest(plus, [W | Rest], RPrefix, Tab) -> match_rest(_, [], _RPrefix, _Tab) -> false. +%% @doc Match given topic against the index and return _all_ matches. +%% If `unique` option is given, return only unique matches by record ID. -spec matches(emqx_types:topic(), ets:table(), _Opts :: [unique]) -> [match(_ID)]. matches(Topic, Tab, Opts) -> {Words, RPrefix} = match_init(Topic), @@ -130,12 +148,18 @@ matches_rest([W1 | [W2 | _] = SLast], [W1 | [W2 | _] = Rest], RPrefix, Acc, Tab) matches_rest(SLast, [W | Rest], RPrefix, Acc, Tab) when is_list(SLast) -> matches(Rest, [W | RPrefix], Acc, Tab); matches_rest(plus, [W | Rest], RPrefix, Acc, Tab) -> + % NOTE + % There's '+' in the key suffix, meaning we should accumulate all matches from + % each of 2 branches: + % 1. Match the rest of the topic as if there was '+' in the current position. + % 2. Skip this key and try to match the topic as it is. NAcc = matches(Rest, ['+' | RPrefix], Acc, Tab), matches(Rest, [W | RPrefix], NAcc, Tab); matches_rest(_, [], _RPrefix, Acc, _Tab) -> Acc. match_add(K = {_Filter, ID}, Acc = #{}) -> + % NOTE: ensuring uniqueness by record ID Acc#{ID => K}; match_add(K, Acc) -> [K | Acc]. @@ -146,6 +170,7 @@ match_next(_, '$end_of_table', _) -> stop. match_filter([], [], []) -> + % NOTE: we matched the topic exactly true; match_filter([], [], _Suffix) -> % NOTE: we matched the prefix, but there may be more matches next @@ -173,14 +198,18 @@ match_init(Topic) -> {Words, []} end. +%% @doc Extract record ID from the match. -spec get_id(match(ID)) -> ID. get_id({_Filter, {ID}}) -> ID. +%% @doc Extract topic (or topic filter) from the match. -spec get_topic(match(_ID)) -> emqx_types:topic(). get_topic({Filter, _ID}) -> emqx_topic:join(Filter). +%% @doc Fetch the record associated with the match. +%% NOTE: Only really useful for ETS tables where the record ID is the first element. -spec get_record(match(_ID), ets:table()) -> _Record. get_record(K, Tab) -> ets:lookup_element(Tab, K, 2). @@ -189,6 +218,10 @@ get_record(K, Tab) -> -spec words(emqx_types:topic()) -> [word()]. words(Topic) when is_binary(Topic) -> + % NOTE + % This is almost identical to `emqx_topic:words/1`, but it doesn't convert empty + % tokens to ''. This is needed to keep ordering of words consistent with what + % `match_filter/3` expects. [word(W) || W <- emqx_topic:tokens(Topic)]. -spec word(binary()) -> word(). From 5d79823891f32ec2fc6333bfdbc9e091e218c769 Mon Sep 17 00:00:00 2001 From: Andrew Mayorov Date: Thu, 17 Aug 2023 01:13:19 +0400 Subject: [PATCH 12/12] perf(topicidx): preserve next key on the stack to reuse later This should further optimize the number of `ets:next/2` calls required for a single match query. --- apps/emqx/src/emqx_topic_index.erl | 30 +++++++++++++++++++++--------- 1 file changed, 21 insertions(+), 9 deletions(-) diff --git a/apps/emqx/src/emqx_topic_index.erl b/apps/emqx/src/emqx_topic_index.erl index 09e19a9f7..a6f662f74 100644 --- a/apps/emqx/src/emqx_topic_index.erl +++ b/apps/emqx/src/emqx_topic_index.erl @@ -127,6 +127,17 @@ matches(Words, RPrefix, Acc, Tab) -> Prefix = lists:reverse(RPrefix), matches(ets:next(Tab, {Prefix, {}}), Prefix, Words, RPrefix, Acc, Tab). +matches(Words, RPrefix, K = {Filter, _}, Acc, Tab) -> + Prefix = lists:reverse(RPrefix), + case Prefix > Filter of + true -> + % NOTE: Prefix already greater than the last key seen, need to `ets:next/2`. + matches(ets:next(Tab, {Prefix, {}}), Prefix, Words, RPrefix, Acc, Tab); + false -> + % NOTE: Prefix is still less than or equal to the last key seen, reuse it. + matches(K, Prefix, Words, RPrefix, Acc, Tab) + end. + matches(K, Prefix, Words, RPrefix, Acc, Tab) -> case match_next(Prefix, K, Words) of true -> @@ -136,26 +147,27 @@ matches(K, Prefix, Words, RPrefix, Acc, Tab) -> stop -> Acc; Matched -> - matches_rest(Matched, Words, RPrefix, Acc, Tab) + % NOTE: Prserve next key on the stack to save on `ets:next/2` calls. + matches_rest(Matched, Words, RPrefix, K, Acc, Tab) end. -matches_rest([W1 | [W2 | _] = SLast], [W1 | [W2 | _] = Rest], RPrefix, Acc, Tab) -> +matches_rest([W1 | [W2 | _] = SLast], [W1 | [W2 | _] = Rest], RPrefix, K, Acc, Tab) -> % NOTE % Fast-forward through identical words in the topic and the last key suffixes. % This should save us a few redundant `ets:next` calls at the cost of slightly % more complex match patterns. - matches_rest(SLast, Rest, [W1 | RPrefix], Acc, Tab); -matches_rest(SLast, [W | Rest], RPrefix, Acc, Tab) when is_list(SLast) -> - matches(Rest, [W | RPrefix], Acc, Tab); -matches_rest(plus, [W | Rest], RPrefix, Acc, Tab) -> + matches_rest(SLast, Rest, [W1 | RPrefix], K, Acc, Tab); +matches_rest(SLast, [W | Rest], RPrefix, K, Acc, Tab) when is_list(SLast) -> + matches(Rest, [W | RPrefix], K, Acc, Tab); +matches_rest(plus, [W | Rest], RPrefix, K, Acc, Tab) -> % NOTE % There's '+' in the key suffix, meaning we should accumulate all matches from % each of 2 branches: % 1. Match the rest of the topic as if there was '+' in the current position. % 2. Skip this key and try to match the topic as it is. - NAcc = matches(Rest, ['+' | RPrefix], Acc, Tab), - matches(Rest, [W | RPrefix], NAcc, Tab); -matches_rest(_, [], _RPrefix, Acc, _Tab) -> + NAcc = matches(Rest, ['+' | RPrefix], K, Acc, Tab), + matches(Rest, [W | RPrefix], K, NAcc, Tab); +matches_rest(_, [], _RPrefix, _K, Acc, _Tab) -> Acc. match_add(K = {_Filter, ID}, Acc = #{}) ->