From 8feda315f6b61dc5c9567b4b1f161e2b977bcc64 Mon Sep 17 00:00:00 2001 From: Andrew Mayorov Date: Tue, 18 Jul 2023 23:11:04 +0200 Subject: [PATCH 01/20] feat(index): add topic index facility Somewhat similar to `emqx_trie` in design and logic, yet built on top of a single, potentially pre-existing table. --- apps/emqx/src/emqx_topic_index.erl | 156 ++++++++++++++++++++++ apps/emqx/test/emqx_topic_index_SUITE.erl | 143 ++++++++++++++++++++ 2 files changed, 299 insertions(+) create mode 100644 apps/emqx/src/emqx_topic_index.erl create mode 100644 apps/emqx/test/emqx_topic_index_SUITE.erl diff --git a/apps/emqx/src/emqx_topic_index.erl b/apps/emqx/src/emqx_topic_index.erl new file mode 100644 index 000000000..9f0b5fba1 --- /dev/null +++ b/apps/emqx/src/emqx_topic_index.erl @@ -0,0 +1,156 @@ +%%-------------------------------------------------------------------- +%% Copyright (c) 2023 EMQ Technologies Co., Ltd. All Rights Reserved. +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. +%%-------------------------------------------------------------------- + +%% @doc Topic index for matching topics to topic filters. +%% +%% Works on top of ETS ordered_set table. Keys are parsed topic filters +%% with record ID appended to the end, wrapped in a tuple to disambiguate from +%% topic filter words. Existing table may be used if existing keys will not +%% collide with index keys. +%% +%% Designed to effectively answer questions like: +%% 1. Does any topic filter match given topic? +%% 2. Which records are associated with topic filters matching given topic? +%% +%% Questions like these are _only slightly_ less effective: +%% 1. Which topic filters match given topic? +%% 2. Which record IDs are associated with topic filters matching given topic? + +-module(emqx_topic_index). + +-export([new/0]). +-export([insert/4]). +-export([delete/3]). +-export([match/2]). +-export([matches/2]). + +-export([get_id/1]). +-export([get_topic/1]). +-export([get_record/2]). + +-type key(ID) :: [binary() | '+' | '#' | {ID}]. +-type match(ID) :: key(ID). + +new() -> + ets:new(?MODULE, [public, ordered_set, {write_concurrency, true}]). + +insert(Filter, ID, Record, Tab) -> + ets:insert(Tab, {emqx_topic:words(Filter) ++ [{ID}], Record}). + +delete(Filter, ID, Tab) -> + ets:delete(Tab, emqx_topic:words(Filter) ++ [{ID}]). + +-spec match(emqx_types:topic(), ets:table()) -> match(_ID) | false. +match(Topic, Tab) -> + {Words, RPrefix} = match_init(Topic), + match(Words, RPrefix, Tab). + +match(Words, RPrefix, Tab) -> + Prefix = lists:reverse(RPrefix), + K = ets:next(Tab, Prefix), + case match_filter(Prefix, K, Words =/= []) of + true -> + K; + stop -> + false; + Matched -> + match_rest(Matched, Words, RPrefix, Tab) + end. + +match_rest(false, [W | Rest], RPrefix, Tab) -> + match(Rest, [W | RPrefix], Tab); +match_rest(plus, [W | Rest], RPrefix, Tab) -> + case match(Rest, ['+' | RPrefix], Tab) of + Match when is_list(Match) -> + Match; + false -> + match(Rest, [W | RPrefix], Tab) + end; +match_rest(_, [], _RPrefix, _Tab) -> + false. + +-spec matches(emqx_types:topic(), ets:table()) -> [match(_ID)]. +matches(Topic, Tab) -> + {Words, RPrefix} = match_init(Topic), + matches(Words, RPrefix, Tab). + +matches(Words, RPrefix, Tab) -> + Prefix = lists:reverse(RPrefix), + matches(ets:next(Tab, Prefix), Prefix, Words, RPrefix, Tab). + +matches(K, Prefix, Words, RPrefix, Tab) -> + case match_filter(Prefix, K, Words =/= []) of + true -> + [K | matches(ets:next(Tab, K), Prefix, Words, RPrefix, Tab)]; + stop -> + []; + Matched -> + matches_rest(Matched, Words, RPrefix, Tab) + end. + +matches_rest(false, [W | Rest], RPrefix, Tab) -> + matches(Rest, [W | RPrefix], Tab); +matches_rest(plus, [W | Rest], RPrefix, Tab) -> + matches(Rest, ['+' | RPrefix], Tab) ++ matches(Rest, [W | RPrefix], Tab); +matches_rest(_, [], _RPrefix, _Tab) -> + []. + +match_filter([], [{_ID}], _IsPrefix = false) -> + % NOTE: exact match is `true` only if we match whole topic, not prefix + true; +match_filter([], ['#', {_ID}], _IsPrefix) -> + % NOTE: naturally, '#' < '+', so this is already optimal for `match/2` + true; +match_filter([], ['+' | _], _) -> + plus; +match_filter([], [_H | _], _) -> + false; +match_filter([H | T1], [H | T2], IsPrefix) -> + match_filter(T1, T2, IsPrefix); +match_filter([H1 | _], [H2 | _], _) when H2 > H1 -> + % NOTE: we're strictly past the prefix, no need to continue + stop; +match_filter(_, '$end_of_table', _) -> + stop. + +match_init(Topic) -> + case emqx_topic:words(Topic) of + [W = <<"$", _/bytes>> | Rest] -> + % NOTE + % This will effectively skip attempts to match special topics to `#` or `+/...`. + {Rest, [W]}; + Words -> + {Words, []} + end. + +-spec get_id(match(ID)) -> ID. +get_id([{ID}]) -> + ID; +get_id([_ | Rest]) -> + get_id(Rest). + +-spec get_topic(match(_ID)) -> emqx_types:topic(). +get_topic(K) -> + emqx_topic:join(cut_topic(K)). + +cut_topic([{_ID}]) -> + []; +cut_topic([W | Rest]) -> + [W | cut_topic(Rest)]. + +-spec get_record(match(_ID), ets:table()) -> _Record. +get_record(K, Tab) -> + ets:lookup_element(Tab, K, 2). diff --git a/apps/emqx/test/emqx_topic_index_SUITE.erl b/apps/emqx/test/emqx_topic_index_SUITE.erl new file mode 100644 index 000000000..98bfe48a1 --- /dev/null +++ b/apps/emqx/test/emqx_topic_index_SUITE.erl @@ -0,0 +1,143 @@ +%%-------------------------------------------------------------------- +%% Copyright (c) 2023 EMQ Technologies Co., Ltd. All Rights Reserved. +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. +%%-------------------------------------------------------------------- + +-module(emqx_topic_index_SUITE). + +-compile(export_all). +-compile(nowarn_export_all). + +-include_lib("eunit/include/eunit.hrl"). + +all() -> + emqx_common_test_helpers:all(?MODULE). + +t_insert(_) -> + Tab = emqx_topic_index:new(), + true = emqx_topic_index:insert(<<"sensor/1/metric/2">>, t_insert_1, <<>>, Tab), + true = emqx_topic_index:insert(<<"sensor/+/#">>, t_insert_2, <<>>, Tab), + true = emqx_topic_index:insert(<<"sensor/#">>, t_insert_3, <<>>, Tab), + ?assertEqual(<<"sensor/#">>, topic(match(<<"sensor">>, Tab))), + ?assertEqual(t_insert_3, id(match(<<"sensor">>, Tab))). + +t_match(_) -> + Tab = emqx_topic_index:new(), + true = emqx_topic_index:insert(<<"sensor/1/metric/2">>, t_match_1, <<>>, Tab), + true = emqx_topic_index:insert(<<"sensor/+/#">>, t_match_2, <<>>, Tab), + true = emqx_topic_index:insert(<<"sensor/#">>, t_match_3, <<>>, Tab), + ?assertMatch( + [<<"sensor/#">>, <<"sensor/+/#">>], + [topic(M) || M <- matches(<<"sensor/1">>, Tab)] + ). + +t_match2(_) -> + Tab = emqx_topic_index:new(), + true = emqx_topic_index:insert(<<"#">>, t_match2_1, <<>>, Tab), + true = emqx_topic_index:insert(<<"+/#">>, t_match2_2, <<>>, Tab), + true = emqx_topic_index:insert(<<"+/+/#">>, t_match2_3, <<>>, Tab), + ?assertEqual( + [<<"#">>, <<"+/#">>, <<"+/+/#">>], + [topic(M) || M <- matches(<<"a/b/c">>, Tab)] + ), + ?assertEqual( + false, + emqx_topic_index:match(<<"$SYS/broker/zenmq">>, Tab) + ). + +t_match3(_) -> + Tab = emqx_topic_index:new(), + Records = [ + {<<"d/#">>, t_match3_1}, + {<<"a/b/+">>, t_match3_2}, + {<<"a/#">>, t_match3_3}, + {<<"#">>, t_match3_4}, + {<<"$SYS/#">>, t_match3_sys} + ], + lists:foreach( + fun({Topic, ID}) -> emqx_topic_index:insert(Topic, ID, <<>>, Tab) end, + Records + ), + Matched = matches(<<"a/b/c">>, Tab), + case length(Matched) of + 3 -> ok; + _ -> error({unexpected, Matched}) + end, + ?assertEqual( + t_match3_sys, + id(match(<<"$SYS/a/b/c">>, Tab)) + ). + +t_match4(_) -> + Tab = emqx_topic_index:new(), + Records = [{<<"/#">>, t_match4_1}, {<<"/+">>, t_match4_2}, {<<"/+/a/b/c">>, t_match4_3}], + lists:foreach( + fun({Topic, ID}) -> emqx_topic_index:insert(Topic, ID, <<>>, Tab) end, + Records + ), + ?assertEqual( + [<<"/#">>, <<"/+">>], + [topic(M) || M <- matches(<<"/">>, Tab)] + ), + ?assertEqual( + [<<"/#">>, <<"/+/a/b/c">>], + [topic(M) || M <- matches(<<"/0/a/b/c">>, Tab)] + ). + +t_match5(_) -> + Tab = emqx_topic_index:new(), + T = <<"a/b/c/d/e/f/g/h/i/j/k/l/m/n/o/p/q/r/s/t/u/v/w/x/y/z">>, + Records = [ + {<<"#">>, t_match5_1}, + {<>, t_match5_2}, + {<>, t_match5_3} + ], + lists:foreach( + fun({Topic, ID}) -> emqx_topic_index:insert(Topic, ID, <<>>, Tab) end, + Records + ), + ?assertEqual( + [<<"#">>, <>], + [topic(M) || M <- matches(T, Tab)] + ), + ?assertEqual( + [<<"#">>, <>, <>], + [topic(M) || M <- matches(<>, Tab)] + ). + +t_match6(_) -> + Tab = emqx_topic_index:new(), + T = <<"a/b/c/d/e/f/g/h/i/j/k/l/m/n/o/p/q/r/s/t/u/v/w/x/y/z">>, + W = <<"+/+/+/+/+/+/+/+/+/+/+/+/+/+/+/+/+/+/+/+/+/+/+/+/+/+/#">>, + emqx_topic_index:insert(W, ID = t_match6, <<>>, Tab), + ?assertEqual(ID, id(match(T, Tab))). + +t_match7(_) -> + Tab = emqx_topic_index:new(), + T = <<"a/b/c/d/e/f/g/h/i/j/k/l/m/n/o/p/q/r/s/t/u/v/w/x/y/z">>, + W = <<"a/+/c/+/e/+/g/+/i/+/k/+/m/+/o/+/q/+/s/+/u/+/w/+/y/+/#">>, + emqx_topic_index:insert(W, t_match7, <<>>, Tab), + ?assertEqual(W, topic(match(T, Tab))). + +match(T, Tab) -> + emqx_topic_index:match(T, Tab). + +matches(T, Tab) -> + lists:sort(emqx_topic_index:matches(T, Tab)). + +id(Match) -> + emqx_topic_index:get_id(Match). + +topic(Match) -> + emqx_topic_index:get_topic(Match). From 28bcb394d150d066e30bb78d7be3ecdf375b6681 Mon Sep 17 00:00:00 2001 From: Andrew Mayorov Date: Fri, 21 Jul 2023 20:06:46 +0200 Subject: [PATCH 02/20] fix(topicidx): allow to return matches unique by record id --- apps/emqx/src/emqx_topic_index.erl | 132 ++++++++++++---------- apps/emqx/test/emqx_topic_index_SUITE.erl | 16 ++- 2 files changed, 88 insertions(+), 60 deletions(-) diff --git a/apps/emqx/src/emqx_topic_index.erl b/apps/emqx/src/emqx_topic_index.erl index 9f0b5fba1..ac4901a09 100644 --- a/apps/emqx/src/emqx_topic_index.erl +++ b/apps/emqx/src/emqx_topic_index.erl @@ -16,18 +16,16 @@ %% @doc Topic index for matching topics to topic filters. %% -%% Works on top of ETS ordered_set table. Keys are parsed topic filters -%% with record ID appended to the end, wrapped in a tuple to disambiguate from -%% topic filter words. Existing table may be used if existing keys will not -%% collide with index keys. +%% Works on top of ETS ordered_set table. Keys are tuples constructed from +%% parsed topic filters and record IDs, wrapped in a tuple to order them +%% strictly greater than unit tuple (`{}`). Existing table may be used if +%% existing keys will not collide with index keys. %% %% Designed to effectively answer questions like: %% 1. Does any topic filter match given topic? %% 2. Which records are associated with topic filters matching given topic? -%% -%% Questions like these are _only slightly_ less effective: -%% 1. Which topic filters match given topic? -%% 2. Which record IDs are associated with topic filters matching given topic? +%% 3. Which topic filters match given topic? +%% 4. Which record IDs are associated with topic filters matching given topic? -module(emqx_topic_index). @@ -35,23 +33,23 @@ -export([insert/4]). -export([delete/3]). -export([match/2]). --export([matches/2]). +-export([matches/3]). -export([get_id/1]). -export([get_topic/1]). -export([get_record/2]). --type key(ID) :: [binary() | '+' | '#' | {ID}]. +-type key(ID) :: {[binary() | '+' | '#'], {ID}}. -type match(ID) :: key(ID). new() -> ets:new(?MODULE, [public, ordered_set, {write_concurrency, true}]). insert(Filter, ID, Record, Tab) -> - ets:insert(Tab, {emqx_topic:words(Filter) ++ [{ID}], Record}). + ets:insert(Tab, {{emqx_topic:words(Filter), {ID}}, Record}). delete(Filter, ID, Tab) -> - ets:delete(Tab, emqx_topic:words(Filter) ++ [{ID}]). + ets:delete(Tab, {emqx_topic:words(Filter), {ID}}). -spec match(emqx_types:topic(), ets:table()) -> match(_ID) | false. match(Topic, Tab) -> @@ -60,8 +58,8 @@ match(Topic, Tab) -> match(Words, RPrefix, Tab) -> Prefix = lists:reverse(RPrefix), - K = ets:next(Tab, Prefix), - case match_filter(Prefix, K, Words =/= []) of + K = ets:next(Tab, {Prefix, {}}), + case match_filter(Prefix, K, Words == []) of true -> K; stop -> @@ -74,7 +72,7 @@ match_rest(false, [W | Rest], RPrefix, Tab) -> match(Rest, [W | RPrefix], Tab); match_rest(plus, [W | Rest], RPrefix, Tab) -> case match(Rest, ['+' | RPrefix], Tab) of - Match when is_list(Match) -> + Match = {_, _} -> Match; false -> match(Rest, [W | RPrefix], Tab) @@ -82,48 +80,71 @@ match_rest(plus, [W | Rest], RPrefix, Tab) -> match_rest(_, [], _RPrefix, _Tab) -> false. --spec matches(emqx_types:topic(), ets:table()) -> [match(_ID)]. -matches(Topic, Tab) -> +-spec matches(emqx_types:topic(), ets:table(), _Opts :: [unique]) -> [match(_ID)]. +matches(Topic, Tab, Opts) -> {Words, RPrefix} = match_init(Topic), - matches(Words, RPrefix, Tab). - -matches(Words, RPrefix, Tab) -> - Prefix = lists:reverse(RPrefix), - matches(ets:next(Tab, Prefix), Prefix, Words, RPrefix, Tab). - -matches(K, Prefix, Words, RPrefix, Tab) -> - case match_filter(Prefix, K, Words =/= []) of - true -> - [K | matches(ets:next(Tab, K), Prefix, Words, RPrefix, Tab)]; - stop -> - []; - Matched -> - matches_rest(Matched, Words, RPrefix, Tab) + AccIn = + case Opts of + [unique | _] -> #{}; + [] -> [] + end, + Matches = matches(Words, RPrefix, AccIn, Tab), + case Matches of + #{} -> maps:values(Matches); + _ -> Matches end. -matches_rest(false, [W | Rest], RPrefix, Tab) -> - matches(Rest, [W | RPrefix], Tab); -matches_rest(plus, [W | Rest], RPrefix, Tab) -> - matches(Rest, ['+' | RPrefix], Tab) ++ matches(Rest, [W | RPrefix], Tab); -matches_rest(_, [], _RPrefix, _Tab) -> - []. +matches(Words, RPrefix, Acc, Tab) -> + Prefix = lists:reverse(RPrefix), + matches(ets:next(Tab, {Prefix, {}}), Prefix, Words, RPrefix, Acc, Tab). -match_filter([], [{_ID}], _IsPrefix = false) -> - % NOTE: exact match is `true` only if we match whole topic, not prefix - true; -match_filter([], ['#', {_ID}], _IsPrefix) -> +matches(K, Prefix, Words, RPrefix, Acc, Tab) -> + case match_filter(Prefix, K, Words == []) of + true -> + matches(ets:next(Tab, K), Prefix, Words, RPrefix, match_add(K, Acc), Tab); + stop -> + Acc; + Matched -> + matches_rest(Matched, Words, RPrefix, Acc, Tab) + end. + +matches_rest(false, [W | Rest], RPrefix, Acc, Tab) -> + matches(Rest, [W | RPrefix], Acc, Tab); +matches_rest(plus, [W | Rest], RPrefix, Acc, Tab) -> + NAcc = matches(Rest, ['+' | RPrefix], Acc, Tab), + matches(Rest, [W | RPrefix], NAcc, Tab); +matches_rest(_, [], _RPrefix, Acc, _Tab) -> + Acc. + +match_add(K = {_Filter, ID}, Acc = #{}) -> + Acc#{ID => K}; +match_add(K, Acc) -> + [K | Acc]. + +match_filter(Prefix, {Filter, _ID}, NotPrefix) -> + case match_filter(Prefix, Filter) of + exact -> + % NOTE: exact match is `true` only if we match whole topic, not prefix + NotPrefix; + Match -> + Match + end; +match_filter(_, '$end_of_table', _) -> + stop. + +match_filter([], []) -> + exact; +match_filter([], ['#']) -> % NOTE: naturally, '#' < '+', so this is already optimal for `match/2` true; -match_filter([], ['+' | _], _) -> +match_filter([], ['+' | _]) -> plus; -match_filter([], [_H | _], _) -> +match_filter([], [_H | _]) -> false; -match_filter([H | T1], [H | T2], IsPrefix) -> - match_filter(T1, T2, IsPrefix); -match_filter([H1 | _], [H2 | _], _) when H2 > H1 -> +match_filter([H | T1], [H | T2]) -> + match_filter(T1, T2); +match_filter([H1 | _], [H2 | _]) when H2 > H1 -> % NOTE: we're strictly past the prefix, no need to continue - stop; -match_filter(_, '$end_of_table', _) -> stop. match_init(Topic) -> @@ -137,19 +158,12 @@ match_init(Topic) -> end. -spec get_id(match(ID)) -> ID. -get_id([{ID}]) -> - ID; -get_id([_ | Rest]) -> - get_id(Rest). +get_id({_Filter, {ID}}) -> + ID. -spec get_topic(match(_ID)) -> emqx_types:topic(). -get_topic(K) -> - emqx_topic:join(cut_topic(K)). - -cut_topic([{_ID}]) -> - []; -cut_topic([W | Rest]) -> - [W | cut_topic(Rest)]. +get_topic({Filter, _ID}) -> + emqx_topic:join(Filter). -spec get_record(match(_ID), ets:table()) -> _Record. get_record(K, Tab) -> diff --git a/apps/emqx/test/emqx_topic_index_SUITE.erl b/apps/emqx/test/emqx_topic_index_SUITE.erl index 98bfe48a1..b5faba4d9 100644 --- a/apps/emqx/test/emqx_topic_index_SUITE.erl +++ b/apps/emqx/test/emqx_topic_index_SUITE.erl @@ -130,11 +130,25 @@ t_match7(_) -> emqx_topic_index:insert(W, t_match7, <<>>, Tab), ?assertEqual(W, topic(match(T, Tab))). +t_match_unique(_) -> + Tab = emqx_topic_index:new(), + emqx_topic_index:insert(<<"a/b/c">>, t_match_id1, <<>>, Tab), + emqx_topic_index:insert(<<"a/b/+">>, t_match_id1, <<>>, Tab), + emqx_topic_index:insert(<<"a/b/c/+">>, t_match_id2, <<>>, Tab), + ?assertEqual( + [t_match_id1, t_match_id1], + [id(M) || M <- emqx_topic_index:matches(<<"a/b/c">>, Tab, [])] + ), + ?assertEqual( + [t_match_id1], + [id(M) || M <- emqx_topic_index:matches(<<"a/b/c">>, Tab, [unique])] + ). + match(T, Tab) -> emqx_topic_index:match(T, Tab). matches(T, Tab) -> - lists:sort(emqx_topic_index:matches(T, Tab)). + lists:sort(emqx_topic_index:matches(T, Tab, [])). id(Match) -> emqx_topic_index:get_id(Match). From 6a1340636369100a29188f9a63a0cb49a5b0b3dc Mon Sep 17 00:00:00 2001 From: Andrew Mayorov Date: Fri, 4 Aug 2023 16:18:07 +0400 Subject: [PATCH 03/20] fix(topicidx): use custom topic words to keep required ordering Otherwise, topic with empty tokens (e.g. `a/b///c`) would have some of their tokens ordered before `#` / `+`, because empty token was represented as empty atom (`''`). --- apps/emqx/src/emqx_topic_index.erl | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/apps/emqx/src/emqx_topic_index.erl b/apps/emqx/src/emqx_topic_index.erl index ac4901a09..48c685f04 100644 --- a/apps/emqx/src/emqx_topic_index.erl +++ b/apps/emqx/src/emqx_topic_index.erl @@ -39,17 +39,18 @@ -export([get_topic/1]). -export([get_record/2]). --type key(ID) :: {[binary() | '+' | '#'], {ID}}. +-type word() :: binary() | '+' | '#'. +-type key(ID) :: {[word()], {ID}}. -type match(ID) :: key(ID). new() -> - ets:new(?MODULE, [public, ordered_set, {write_concurrency, true}]). + ets:new(?MODULE, [public, ordered_set, {read_concurrency, true}]). insert(Filter, ID, Record, Tab) -> - ets:insert(Tab, {{emqx_topic:words(Filter), {ID}}, Record}). + ets:insert(Tab, {{words(Filter), {ID}}, Record}). delete(Filter, ID, Tab) -> - ets:delete(Tab, {emqx_topic:words(Filter), {ID}}). + ets:delete(Tab, {words(Filter), {ID}}). -spec match(emqx_types:topic(), ets:table()) -> match(_ID) | false. match(Topic, Tab) -> @@ -148,7 +149,7 @@ match_filter([H1 | _], [H2 | _]) when H2 > H1 -> stop. match_init(Topic) -> - case emqx_topic:words(Topic) of + case words(Topic) of [W = <<"$", _/bytes>> | Rest] -> % NOTE % This will effectively skip attempts to match special topics to `#` or `+/...`. @@ -168,3 +169,14 @@ get_topic({Filter, _ID}) -> -spec get_record(match(_ID), ets:table()) -> _Record. get_record(K, Tab) -> ets:lookup_element(Tab, K, 2). + +%% + +-spec words(emqx_types:topic()) -> [word()]. +words(Topic) when is_binary(Topic) -> + [word(W) || W <- emqx_topic:tokens(Topic)]. + +-spec word(binary()) -> word(). +word(<<"+">>) -> '+'; +word(<<"#">>) -> '#'; +word(Bin) -> Bin. From 48a50c9137a89fbe6fd55a94dec8987ff5822b97 Mon Sep 17 00:00:00 2001 From: Andrew Mayorov Date: Fri, 4 Aug 2023 16:24:13 +0400 Subject: [PATCH 04/20] fix(topicidx): fix missing matches when 'a/b' and 'a/b/#' both exist Thanks to @HJianBo for spotting this issue. The approach to fix it is different though: we try to keep the "recurrency branch factor" to a minimum, instead introducing new condition for the case when filter does not match, but iteration with `ets:next/2` is not yet finished for the prefix. Co-Authored-By: JianBo He --- apps/emqx/src/emqx_topic_index.erl | 43 ++++++++++++----------- apps/emqx/test/emqx_topic_index_SUITE.erl | 18 ++++++++++ 2 files changed, 41 insertions(+), 20 deletions(-) diff --git a/apps/emqx/src/emqx_topic_index.erl b/apps/emqx/src/emqx_topic_index.erl index 48c685f04..29d8694af 100644 --- a/apps/emqx/src/emqx_topic_index.erl +++ b/apps/emqx/src/emqx_topic_index.erl @@ -59,10 +59,14 @@ match(Topic, Tab) -> match(Words, RPrefix, Tab) -> Prefix = lists:reverse(RPrefix), - K = ets:next(Tab, {Prefix, {}}), - case match_filter(Prefix, K, Words == []) of + match(ets:next(Tab, {Prefix, {}}), Prefix, Words, RPrefix, Tab). + +match(K, Prefix, Words, RPrefix, Tab) -> + case match_next(Prefix, K, Words) of true -> K; + skip -> + match(ets:next(Tab, K), Prefix, Words, RPrefix, Tab); stop -> false; Matched -> @@ -100,9 +104,11 @@ matches(Words, RPrefix, Acc, Tab) -> matches(ets:next(Tab, {Prefix, {}}), Prefix, Words, RPrefix, Acc, Tab). matches(K, Prefix, Words, RPrefix, Acc, Tab) -> - case match_filter(Prefix, K, Words == []) of + case match_next(Prefix, K, Words) of true -> matches(ets:next(Tab, K), Prefix, Words, RPrefix, match_add(K, Acc), Tab); + skip -> + matches(ets:next(Tab, K), Prefix, Words, RPrefix, Acc, Tab); stop -> Acc; Matched -> @@ -122,29 +128,26 @@ match_add(K = {_Filter, ID}, Acc = #{}) -> match_add(K, Acc) -> [K | Acc]. -match_filter(Prefix, {Filter, _ID}, NotPrefix) -> - case match_filter(Prefix, Filter) of - exact -> - % NOTE: exact match is `true` only if we match whole topic, not prefix - NotPrefix; - Match -> - Match - end; -match_filter(_, '$end_of_table', _) -> +match_next(Prefix, {Filter, _ID}, Suffix) -> + match_filter(Prefix, Filter, Suffix); +match_next(_, '$end_of_table', _) -> stop. -match_filter([], []) -> - exact; -match_filter([], ['#']) -> +match_filter([], [], []) -> + true; +match_filter([], [], _Suffix) -> + % NOTE: we matched the prefix, but there may be more matches next + skip; +match_filter([], ['#'], _Suffix) -> % NOTE: naturally, '#' < '+', so this is already optimal for `match/2` true; -match_filter([], ['+' | _]) -> +match_filter([], ['+' | _], _Suffix) -> plus; -match_filter([], [_H | _]) -> +match_filter([], [_H | _], _Suffix) -> false; -match_filter([H | T1], [H | T2]) -> - match_filter(T1, T2); -match_filter([H1 | _], [H2 | _]) when H2 > H1 -> +match_filter([H | T1], [H | T2], Suffix) -> + match_filter(T1, T2, Suffix); +match_filter([H1 | _], [H2 | _], _Suffix) when H2 > H1 -> % NOTE: we're strictly past the prefix, no need to continue stop. diff --git a/apps/emqx/test/emqx_topic_index_SUITE.erl b/apps/emqx/test/emqx_topic_index_SUITE.erl index b5faba4d9..75e1adaf6 100644 --- a/apps/emqx/test/emqx_topic_index_SUITE.erl +++ b/apps/emqx/test/emqx_topic_index_SUITE.erl @@ -144,6 +144,24 @@ t_match_unique(_) -> [id(M) || M <- emqx_topic_index:matches(<<"a/b/c">>, Tab, [unique])] ). +t_match_wildcards(_) -> + Tab = emqx_topic_index:new(), + emqx_topic_index:insert(<<"a/b">>, id1, <<>>, Tab), + emqx_topic_index:insert(<<"a/b/#">>, id2, <<>>, Tab), + emqx_topic_index:insert(<<"a/b/#">>, id3, <<>>, Tab), + emqx_topic_index:insert(<<"a/b/c">>, id4, <<>>, Tab), + emqx_topic_index:insert(<<"a/b/+">>, id5, <<>>, Tab), + emqx_topic_index:insert(<<"a/b/d">>, id6, <<>>, Tab), + emqx_topic_index:insert(<<"a/+/+">>, id7, <<>>, Tab), + emqx_topic_index:insert(<<"a/+/#">>, id8, <<>>, Tab), + + Records = [id(M) || M <- matches(<<"a/b/c">>, Tab)], + ?assertEqual([id2, id3, id4, id5, id7, id8], lists:sort(Records)), + + Records1 = [id(M) || M <- matches(<<"a/b">>, Tab)], + ?assertEqual([id1, id2, id3, id8], lists:sort(Records1)), + ok. + match(T, Tab) -> emqx_topic_index:match(T, Tab). From 0c7bdbdab45ec2f9ecb5dc3f4f390afc5eae60f4 Mon Sep 17 00:00:00 2001 From: Andrew Mayorov Date: Fri, 4 Aug 2023 18:49:07 +0400 Subject: [PATCH 05/20] test(topicidx): add property test Co-Authored-By: JianBo He --- apps/emqx/test/emqx_topic_index_SUITE.erl | 174 ++++++++++++++++++++-- 1 file changed, 159 insertions(+), 15 deletions(-) diff --git a/apps/emqx/test/emqx_topic_index_SUITE.erl b/apps/emqx/test/emqx_topic_index_SUITE.erl index 75e1adaf6..f2b263a5a 100644 --- a/apps/emqx/test/emqx_topic_index_SUITE.erl +++ b/apps/emqx/test/emqx_topic_index_SUITE.erl @@ -19,8 +19,11 @@ -compile(export_all). -compile(nowarn_export_all). +-include_lib("proper/include/proper.hrl"). -include_lib("eunit/include/eunit.hrl"). +-import(emqx_proper_types, [scaled/2]). + all() -> emqx_common_test_helpers:all(?MODULE). @@ -144,23 +147,122 @@ t_match_unique(_) -> [id(M) || M <- emqx_topic_index:matches(<<"a/b/c">>, Tab, [unique])] ). -t_match_wildcards(_) -> - Tab = emqx_topic_index:new(), - emqx_topic_index:insert(<<"a/b">>, id1, <<>>, Tab), - emqx_topic_index:insert(<<"a/b/#">>, id2, <<>>, Tab), - emqx_topic_index:insert(<<"a/b/#">>, id3, <<>>, Tab), - emqx_topic_index:insert(<<"a/b/c">>, id4, <<>>, Tab), - emqx_topic_index:insert(<<"a/b/+">>, id5, <<>>, Tab), - emqx_topic_index:insert(<<"a/b/d">>, id6, <<>>, Tab), - emqx_topic_index:insert(<<"a/+/+">>, id7, <<>>, Tab), - emqx_topic_index:insert(<<"a/+/#">>, id8, <<>>, Tab), +t_match_wildcard_edge_cases(_) -> + CommonTopics = [ + <<"a/b">>, + <<"a/b/#">>, + <<"a/b/#">>, + <<"a/b/c">>, + <<"a/b/+">>, + <<"a/b/d">>, + <<"a/+/+">>, + <<"a/+/#">> + ], + Datasets = + [ + %% Topics, TopicName, Results + {CommonTopics, <<"a/b/c">>, [2, 3, 4, 5, 7, 8]}, + {CommonTopics, <<"a/b">>, [1, 2, 3, 8]}, + {[<<"+/b/c">>, <<"/">>], <<"a/b/c">>, [1]}, + {[<<"#">>, <<"/">>], <<"a">>, [1]}, + {[<<"/">>, <<"+">>], <<"a">>, [2]} + ], + F = fun({Topics, TopicName, Expected}) -> + Tab = emqx_topic_index:new(), + _ = [emqx_topic_index:insert(T, N, <<>>, Tab) || {N, T} <- lists:enumerate(Topics)], + Results = [id(M) || M <- emqx_topic_index:matches(TopicName, Tab, [unique])], + ?assertEqual( + Expected, + Results, + #{ + "Base topics" => Topics, + "Topic name" => TopicName + } + ) + end, + lists:foreach(F, Datasets). - Records = [id(M) || M <- matches(<<"a/b/c">>, Tab)], - ?assertEqual([id2, id3, id4, id5, id7, id8], lists:sort(Records)), +t_prop_matches(_) -> + ?assert( + proper:quickcheck( + topic_matches_prop(), + [{max_size, 100}, {numtests, 100}] + ) + ), + Statistics = [{C, account(C)} || C <- [filters, topics, matches, maxhits]], + ct:pal("Statistics: ~p", [maps:from_list(Statistics)]). - Records1 = [id(M) || M <- matches(<<"a/b">>, Tab)], - ?assertEqual([id1, id2, id3, id8], lists:sort(Records1)), - ok. +topic_matches_prop() -> + ?FORALL( + % Generate a longer list of topics and a shorter list of topic filter patterns. + #{ + topics := TTopics, + patterns := Pats + }, + emqx_proper_types:fixedmap(#{ + % NOTE + % Beware adding non-empty contraint, proper will have a hard time with `topic_t/1` + % for some reason. + topics => scaled(4, list(topic_t([1, 2, 3, 4]))), + patterns => list(topic_filter_pattern_t()) + }), + begin + Tab = emqx_topic_index:new(), + Topics = [emqx_topic:join(T) || T <- TTopics], + % Produce topic filters from generated topics and patterns. + % Number of filters is equal to the number of patterns, most of the time. + Filters = lists:enumerate(mk_filters(Pats, TTopics)), + _ = [emqx_topic_index:insert(F, N, <<>>, Tab) || {N, F} <- Filters], + % Gather some basic statistics + _ = account(filters, length(Filters)), + _ = account(topics, NTopics = length(Topics)), + _ = account(maxhits, NTopics * NTopics), + % Verify that matching each topic against index returns the same results as + % matching it against the list of filters one by one. + lists:all( + fun(Topic) -> + Ids1 = [id(M) || M <- emqx_topic_index:matches(Topic, Tab, [unique])], + Ids2 = lists:filtermap( + fun({N, F}) -> + case emqx_topic:match(Topic, F) of + true -> {true, N}; + false -> false + end + end, + Filters + ), + % Account a number of matches to compute hitrate later + _ = account(matches, length(Ids1)), + case (Ids2 -- Ids1) ++ (Ids2 -- Ids1) of + [] -> + true; + [_ | _] = _Differences -> + ct:pal( + "Topic name: ~p~n" + "Index results: ~p~n" + "Topic match results:: ~p~n", + [Topic, Ids1, Ids2] + ), + false + end + end, + Topics + ) + end + ). + +mk_filters([Pat | PRest], [Topic | TRest]) -> + [emqx_topic:join(mk_topic_filter(Pat, Topic)) | mk_filters(PRest, TRest)]; +mk_filters(_, _) -> + []. + +account(Counter, N) -> + put({?MODULE, Counter}, account(Counter) + N). + +account(Counter) -> + emqx_maybe:define(get({?MODULE, Counter}), 0). + +%% match(T, Tab) -> emqx_topic_index:match(T, Tab). @@ -173,3 +275,45 @@ id(Match) -> topic(Match) -> emqx_topic_index:get_topic(Match). + +%% + +topic_t(EntropyWeights) -> + EWLast = lists:last(EntropyWeights), + ?LET(L, scaled(1 / 4, list(EWLast)), begin + EWs = lists:sublist(EntropyWeights ++ L, length(L)), + ?SIZED(S, [oneof([topic_level_t(S * EW), topic_level_fixed_t()]) || EW <- EWs]) + end). + +topic_level_t(Entropy) -> + S = floor(1 + math:log2(Entropy) / 4), + ?LET(I, range(1, Entropy), iolist_to_binary(io_lib:format("~*.16.0B", [S, I]))). + +topic_level_fixed_t() -> + oneof([ + <<"foo">>, + <<"bar">>, + <<"baz">>, + <<"xyzzy">> + ]). + +topic_filter_pattern_t() -> + list(topic_level_pattern_t()). + +topic_level_pattern_t() -> + frequency([ + {5, level}, + {2, '+'}, + {1, '#'} + ]). + +mk_topic_filter([], _) -> + []; +mk_topic_filter(_, []) -> + []; +mk_topic_filter(['#' | _], _) -> + ['#']; +mk_topic_filter(['+' | Rest], [_ | Levels]) -> + ['+' | mk_topic_filter(Rest, Levels)]; +mk_topic_filter([level | Rest], [L | Levels]) -> + [L | mk_topic_filter(Rest, Levels)]. From fd0986071c082170fa154ce6d7695805ad513515 Mon Sep 17 00:00:00 2001 From: Andrew Mayorov Date: Fri, 4 Aug 2023 19:27:43 +0400 Subject: [PATCH 06/20] perf(topicidx): implement fast-forwarding prefixes This should give less `ets:next/2` calls in general and much less when index has relatively small number of long non-wildcard topics. --- apps/emqx/src/emqx_topic_index.erl | 20 ++++++++++++++++---- apps/emqx/test/emqx_topic_index_SUITE.erl | 10 ++++++++++ 2 files changed, 26 insertions(+), 4 deletions(-) diff --git a/apps/emqx/src/emqx_topic_index.erl b/apps/emqx/src/emqx_topic_index.erl index 29d8694af..44e88e659 100644 --- a/apps/emqx/src/emqx_topic_index.erl +++ b/apps/emqx/src/emqx_topic_index.erl @@ -73,7 +73,13 @@ match(K, Prefix, Words, RPrefix, Tab) -> match_rest(Matched, Words, RPrefix, Tab) end. -match_rest(false, [W | Rest], RPrefix, Tab) -> +match_rest([W1 | [W2 | _] = SLast], [W1 | [W2 | _] = Rest], RPrefix, Tab) -> + % NOTE + % Fast-forward through identical words in the topic and the last key suffixes. + % This should save us a few redundant `ets:next` calls at the cost of slightly + % more complex match patterns. + match_rest(SLast, Rest, [W1 | RPrefix], Tab); +match_rest(SLast, [W | Rest], RPrefix, Tab) when is_list(SLast) -> match(Rest, [W | RPrefix], Tab); match_rest(plus, [W | Rest], RPrefix, Tab) -> case match(Rest, ['+' | RPrefix], Tab) of @@ -115,7 +121,13 @@ matches(K, Prefix, Words, RPrefix, Acc, Tab) -> matches_rest(Matched, Words, RPrefix, Acc, Tab) end. -matches_rest(false, [W | Rest], RPrefix, Acc, Tab) -> +matches_rest([W1 | [W2 | _] = SLast], [W1 | [W2 | _] = Rest], RPrefix, Acc, Tab) -> + % NOTE + % Fast-forward through identical words in the topic and the last key suffixes. + % This should save us a few redundant `ets:next` calls at the cost of slightly + % more complex match patterns. + matches_rest(SLast, Rest, [W1 | RPrefix], Acc, Tab); +matches_rest(SLast, [W | Rest], RPrefix, Acc, Tab) when is_list(SLast) -> matches(Rest, [W | RPrefix], Acc, Tab); matches_rest(plus, [W | Rest], RPrefix, Acc, Tab) -> NAcc = matches(Rest, ['+' | RPrefix], Acc, Tab), @@ -143,8 +155,8 @@ match_filter([], ['#'], _Suffix) -> true; match_filter([], ['+' | _], _Suffix) -> plus; -match_filter([], [_H | _], _Suffix) -> - false; +match_filter([], [_H | _] = Rest, _Suffix) -> + Rest; match_filter([H | T1], [H | T2], Suffix) -> match_filter(T1, T2, Suffix); match_filter([H1 | _], [H2 | _], _Suffix) when H2 > H1 -> diff --git a/apps/emqx/test/emqx_topic_index_SUITE.erl b/apps/emqx/test/emqx_topic_index_SUITE.erl index f2b263a5a..80ca536b4 100644 --- a/apps/emqx/test/emqx_topic_index_SUITE.erl +++ b/apps/emqx/test/emqx_topic_index_SUITE.erl @@ -133,6 +133,16 @@ t_match7(_) -> emqx_topic_index:insert(W, t_match7, <<>>, Tab), ?assertEqual(W, topic(match(T, Tab))). +t_match_fast_forward(_) -> + Tab = emqx_topic_index:new(), + emqx_topic_index:insert(<<"a/b/1/2/3/4/5/6/7/8/9/#">>, id1, <<>>, Tab), + emqx_topic_index:insert(<<"z/y/x/+/+">>, id2, <<>>, Tab), + emqx_topic_index:insert(<<"a/b/c/+">>, id3, <<>>, Tab), + % dbg:tracer(), + % dbg:p(all, c), + % dbg:tpl({ets, next, '_'}, x), + ?assertEqual([id1], [id(M) || M <- matches(<<"a/b/1/2/3/4/5/6/7/8/9/0">>, Tab)]). + t_match_unique(_) -> Tab = emqx_topic_index:new(), emqx_topic_index:insert(<<"a/b/c">>, t_match_id1, <<>>, Tab), From 9a249e4b01fc9ab717a1ab02c0a5db255fa87989 Mon Sep 17 00:00:00 2001 From: Andrew Mayorov Date: Mon, 14 Aug 2023 13:11:25 +0400 Subject: [PATCH 07/20] test(topicidx): increase test coverage --- apps/emqx/test/emqx_topic_index_SUITE.erl | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/apps/emqx/test/emqx_topic_index_SUITE.erl b/apps/emqx/test/emqx_topic_index_SUITE.erl index 80ca536b4..ade98acec 100644 --- a/apps/emqx/test/emqx_topic_index_SUITE.erl +++ b/apps/emqx/test/emqx_topic_index_SUITE.erl @@ -141,6 +141,7 @@ t_match_fast_forward(_) -> % dbg:tracer(), % dbg:p(all, c), % dbg:tpl({ets, next, '_'}, x), + ?assertEqual(id1, id(match(<<"a/b/1/2/3/4/5/6/7/8/9/0">>, Tab))), ?assertEqual([id1], [id(M) || M <- matches(<<"a/b/1/2/3/4/5/6/7/8/9/0">>, Tab)]). t_match_unique(_) -> @@ -180,14 +181,15 @@ t_match_wildcard_edge_cases(_) -> F = fun({Topics, TopicName, Expected}) -> Tab = emqx_topic_index:new(), _ = [emqx_topic_index:insert(T, N, <<>>, Tab) || {N, T} <- lists:enumerate(Topics)], - Results = [id(M) || M <- emqx_topic_index:matches(TopicName, Tab, [unique])], + ?assertEqual( + lists:last(Expected), + id(emqx_topic_index:match(TopicName, Tab)), + #{"Base topics" => Topics, "Topic name" => TopicName} + ), ?assertEqual( Expected, - Results, - #{ - "Base topics" => Topics, - "Topic name" => TopicName - } + [id(M) || M <- emqx_topic_index:matches(TopicName, Tab, [unique])], + #{"Base topics" => Topics, "Topic name" => TopicName} ) end, lists:foreach(F, Datasets). From 47dfba4341ea312967e33dd5b21dd549b7319ea8 Mon Sep 17 00:00:00 2001 From: Andrew Mayorov Date: Thu, 20 Jul 2023 14:49:10 +0200 Subject: [PATCH 08/20] perf(ruleeng): employ `emqx_topic_index` to speed up topic matching --- apps/emqx_rule_engine/include/rule_engine.hrl | 1 + .../emqx_rule_engine/src/emqx_rule_engine.erl | 68 +++++++++++++------ .../src/emqx_rule_engine_app.erl | 1 + 3 files changed, 49 insertions(+), 21 deletions(-) diff --git a/apps/emqx_rule_engine/include/rule_engine.hrl b/apps/emqx_rule_engine/include/rule_engine.hrl index b2a6a549e..7df5d9941 100644 --- a/apps/emqx_rule_engine/include/rule_engine.hrl +++ b/apps/emqx_rule_engine/include/rule_engine.hrl @@ -109,6 +109,7 @@ %% Tables -define(RULE_TAB, emqx_rule_engine). +-define(RULE_TOPIC_INDEX, emqx_rule_engine_topic_index). %% Allowed sql function provider modules -define(DEFAULT_SQL_FUNC_PROVIDER, emqx_rule_funcs). diff --git a/apps/emqx_rule_engine/src/emqx_rule_engine.erl b/apps/emqx_rule_engine/src/emqx_rule_engine.erl index 66c82d3a1..41d1ed433 100644 --- a/apps/emqx_rule_engine/src/emqx_rule_engine.erl +++ b/apps/emqx_rule_engine/src/emqx_rule_engine.erl @@ -176,7 +176,7 @@ create_rule(Params) -> create_rule(Params = #{id := RuleId}, CreatedAt) when is_binary(RuleId) -> case get_rule(RuleId) of - not_found -> parse_and_insert(Params, CreatedAt); + not_found -> with_parsed_rule(Params, CreatedAt, fun insert_rule/1); {ok, _} -> {error, already_exists} end. @@ -185,18 +185,27 @@ update_rule(Params = #{id := RuleId}) when is_binary(RuleId) -> case get_rule(RuleId) of not_found -> {error, not_found}; - {ok, #{created_at := CreatedAt}} -> - parse_and_insert(Params, CreatedAt) + {ok, RulePrev = #{created_at := CreatedAt}} -> + with_parsed_rule(Params, CreatedAt, fun(Rule) -> update_rule(Rule, RulePrev) end) end. -spec delete_rule(RuleId :: rule_id()) -> ok. delete_rule(RuleId) when is_binary(RuleId) -> - gen_server:call(?RULE_ENGINE, {delete_rule, RuleId}, ?T_CALL). + case get_rule(RuleId) of + not_found -> + ok; + {ok, Rule} -> + gen_server:call(?RULE_ENGINE, {delete_rule, Rule}, ?T_CALL) + end. -spec insert_rule(Rule :: rule()) -> ok. insert_rule(Rule) -> gen_server:call(?RULE_ENGINE, {insert_rule, Rule}, ?T_CALL). +-spec update_rule(Rule :: rule(), RulePrev :: rule()) -> ok. +update_rule(Rule, RulePrev) -> + gen_server:call(?RULE_ENGINE, {update_rule, Rule, RulePrev}, ?T_CALL). + %%---------------------------------------------------------------------------------------- %% Rule Management %%---------------------------------------------------------------------------------------- @@ -216,9 +225,8 @@ get_rules_ordered_by_ts() -> -spec get_rules_for_topic(Topic :: binary()) -> [rule()]. get_rules_for_topic(Topic) -> [ - Rule - || Rule = #{from := From} <- get_rules(), - emqx_topic:match_any(Topic, From) + emqx_topic_index:get_record(M, ?RULE_TOPIC_INDEX) + || M <- emqx_topic_index:matches(Topic, ?RULE_TOPIC_INDEX, [unique]) ]. -spec get_rules_with_same_event(Topic :: binary()) -> [rule()]. @@ -411,10 +419,17 @@ init([]) -> {ok, #{}}. handle_call({insert_rule, Rule}, _From, State) -> - do_insert_rule(Rule), + ok = do_insert_rule(Rule), + ok = do_update_rule_index(Rule), + {reply, ok, State}; +handle_call({update_rule, Rule, RulePrev}, _From, State) -> + ok = do_delete_rule_index(RulePrev), + ok = do_insert_rule(Rule), + ok = do_update_rule_index(Rule), {reply, ok, State}; handle_call({delete_rule, Rule}, _From, State) -> - do_delete_rule(Rule), + ok = do_delete_rule_index(Rule), + ok = do_delete_rule(Rule), {reply, ok, State}; handle_call(Req, _From, State) -> ?SLOG(error, #{msg => "unexpected_call", request => Req}), @@ -438,7 +453,7 @@ code_change(_OldVsn, State, _Extra) -> %% Internal Functions %%---------------------------------------------------------------------------------------- -parse_and_insert(Params = #{id := RuleId, sql := Sql, actions := Actions}, CreatedAt) -> +with_parsed_rule(Params = #{id := RuleId, sql := Sql, actions := Actions}, CreatedAt, Fun) -> case emqx_rule_sqlparser:parse(Sql) of {ok, Select} -> Rule = #{ @@ -459,7 +474,7 @@ parse_and_insert(Params = #{id := RuleId, sql := Sql, actions := Actions}, Creat conditions => emqx_rule_sqlparser:select_where(Select) %% -- calculated fields end }, - ok = insert_rule(Rule), + ok = Fun(Rule), {ok, Rule}; {error, Reason} -> {error, Reason} @@ -471,16 +486,27 @@ do_insert_rule(#{id := Id} = Rule) -> true = ets:insert(?RULE_TAB, {Id, maps:remove(id, Rule)}), ok. -do_delete_rule(RuleId) -> - case get_rule(RuleId) of - {ok, Rule} -> - ok = unload_hooks_for_rule(Rule), - ok = clear_metrics_for_rule(RuleId), - true = ets:delete(?RULE_TAB, RuleId), - ok; - not_found -> - ok - end. +do_delete_rule(#{id := Id} = Rule) -> + ok = unload_hooks_for_rule(Rule), + ok = clear_metrics_for_rule(Id), + true = ets:delete(?RULE_TAB, Id), + ok. + +do_update_rule_index(#{id := Id, from := From} = Rule) -> + ok = lists:foreach( + fun(Topic) -> + true = emqx_topic_index:insert(Topic, Id, Rule, ?RULE_TOPIC_INDEX) + end, + From + ). + +do_delete_rule_index(#{id := Id, from := From}) -> + ok = lists:foreach( + fun(Topic) -> + true = emqx_topic_index:delete(Topic, Id, ?RULE_TOPIC_INDEX) + end, + From + ). parse_actions(Actions) -> [do_parse_action(Act) || Act <- Actions]. diff --git a/apps/emqx_rule_engine/src/emqx_rule_engine_app.erl b/apps/emqx_rule_engine/src/emqx_rule_engine_app.erl index d8b031bdd..28515cb1a 100644 --- a/apps/emqx_rule_engine/src/emqx_rule_engine_app.erl +++ b/apps/emqx_rule_engine/src/emqx_rule_engine_app.erl @@ -26,6 +26,7 @@ start(_Type, _Args) -> _ = ets:new(?RULE_TAB, [named_table, public, ordered_set, {read_concurrency, true}]), + _ = ets:new(?RULE_TOPIC_INDEX, [named_table, public, ordered_set, {read_concurrency, true}]), ok = emqx_rule_events:reload(), SupRet = emqx_rule_engine_sup:start_link(), ok = emqx_rule_engine:load_rules(), From fe9477f92ea972b6d5b003658c2d4a957679f907 Mon Sep 17 00:00:00 2001 From: Andrew Mayorov Date: Mon, 14 Aug 2023 14:00:23 +0400 Subject: [PATCH 09/20] chore: bump applications versions * emqx_rule_engine 5.0.23 --- apps/emqx_rule_engine/src/emqx_rule_engine.app.src | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/apps/emqx_rule_engine/src/emqx_rule_engine.app.src b/apps/emqx_rule_engine/src/emqx_rule_engine.app.src index 09d57a4f9..e6d00bcae 100644 --- a/apps/emqx_rule_engine/src/emqx_rule_engine.app.src +++ b/apps/emqx_rule_engine/src/emqx_rule_engine.app.src @@ -2,7 +2,7 @@ {application, emqx_rule_engine, [ {description, "EMQX Rule Engine"}, % strict semver, bump manually! - {vsn, "5.0.22"}, + {vsn, "5.0.23"}, {modules, []}, {registered, [emqx_rule_engine_sup, emqx_rule_engine]}, {applications, [kernel, stdlib, rulesql, getopt, emqx_ctl, uuid]}, From d302aaae4cde883a290a4875e21b5e7891a611a3 Mon Sep 17 00:00:00 2001 From: Andrew Mayorov Date: Mon, 14 Aug 2023 15:24:24 +0400 Subject: [PATCH 10/20] chore: add changelog entry --- changes/ce/perf-11396.en.md | 1 + 1 file changed, 1 insertion(+) create mode 100644 changes/ce/perf-11396.en.md diff --git a/changes/ce/perf-11396.en.md b/changes/ce/perf-11396.en.md new file mode 100644 index 000000000..fd8df9a9d --- /dev/null +++ b/changes/ce/perf-11396.en.md @@ -0,0 +1 @@ +Introduce topic index for the rule engine runtime that significantly improves the performance of EMQX with a non-trivial number of rules consuming messages matching different topic filters. From e39bbf4c495c2a16485a1e9e646a2845a2651603 Mon Sep 17 00:00:00 2001 From: Andrew Mayorov Date: Tue, 15 Aug 2023 16:55:48 +0400 Subject: [PATCH 11/20] chore(topicidx): add more descriptive comments and specs To (hopefully) better illustrate what is happening there. --- apps/emqx/src/emqx_topic_index.erl | 33 ++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/apps/emqx/src/emqx_topic_index.erl b/apps/emqx/src/emqx_topic_index.erl index 44e88e659..09e19a9f7 100644 --- a/apps/emqx/src/emqx_topic_index.erl +++ b/apps/emqx/src/emqx_topic_index.erl @@ -43,15 +43,27 @@ -type key(ID) :: {[word()], {ID}}. -type match(ID) :: key(ID). +%% @doc Create a new ETS table suitable for topic index. +%% Usable mostly for testing purposes. +-spec new() -> ets:table(). new() -> ets:new(?MODULE, [public, ordered_set, {read_concurrency, true}]). +%% @doc Insert a new entry into the index that associates given topic filter to given +%% record ID, and attaches arbitrary record to the entry. This allows users to choose +%% between regular and "materialized" indexes, for example. +-spec insert(emqx_types:topic(), _ID, _Record, ets:table()) -> true. insert(Filter, ID, Record, Tab) -> ets:insert(Tab, {{words(Filter), {ID}}, Record}). +%% @doc Delete an entry from the index that associates given topic filter to given +%% record ID. Deleting non-existing entry is not an error. +-spec delete(emqx_types:topic(), _ID, ets:table()) -> true. delete(Filter, ID, Tab) -> ets:delete(Tab, {words(Filter), {ID}}). +%% @doc Match given topic against the index and return the first match, or `false` if +%% no match is found. -spec match(emqx_types:topic(), ets:table()) -> match(_ID) | false. match(Topic, Tab) -> {Words, RPrefix} = match_init(Topic), @@ -82,6 +94,10 @@ match_rest([W1 | [W2 | _] = SLast], [W1 | [W2 | _] = Rest], RPrefix, Tab) -> match_rest(SLast, [W | Rest], RPrefix, Tab) when is_list(SLast) -> match(Rest, [W | RPrefix], Tab); match_rest(plus, [W | Rest], RPrefix, Tab) -> + % NOTE + % There's '+' in the key suffix, meaning we should consider 2 alternatives: + % 1. Match the rest of the topic as if there was '+' in the current position. + % 2. Skip this key and try to match the topic as it is. case match(Rest, ['+' | RPrefix], Tab) of Match = {_, _} -> Match; @@ -91,6 +107,8 @@ match_rest(plus, [W | Rest], RPrefix, Tab) -> match_rest(_, [], _RPrefix, _Tab) -> false. +%% @doc Match given topic against the index and return _all_ matches. +%% If `unique` option is given, return only unique matches by record ID. -spec matches(emqx_types:topic(), ets:table(), _Opts :: [unique]) -> [match(_ID)]. matches(Topic, Tab, Opts) -> {Words, RPrefix} = match_init(Topic), @@ -130,12 +148,18 @@ matches_rest([W1 | [W2 | _] = SLast], [W1 | [W2 | _] = Rest], RPrefix, Acc, Tab) matches_rest(SLast, [W | Rest], RPrefix, Acc, Tab) when is_list(SLast) -> matches(Rest, [W | RPrefix], Acc, Tab); matches_rest(plus, [W | Rest], RPrefix, Acc, Tab) -> + % NOTE + % There's '+' in the key suffix, meaning we should accumulate all matches from + % each of 2 branches: + % 1. Match the rest of the topic as if there was '+' in the current position. + % 2. Skip this key and try to match the topic as it is. NAcc = matches(Rest, ['+' | RPrefix], Acc, Tab), matches(Rest, [W | RPrefix], NAcc, Tab); matches_rest(_, [], _RPrefix, Acc, _Tab) -> Acc. match_add(K = {_Filter, ID}, Acc = #{}) -> + % NOTE: ensuring uniqueness by record ID Acc#{ID => K}; match_add(K, Acc) -> [K | Acc]. @@ -146,6 +170,7 @@ match_next(_, '$end_of_table', _) -> stop. match_filter([], [], []) -> + % NOTE: we matched the topic exactly true; match_filter([], [], _Suffix) -> % NOTE: we matched the prefix, but there may be more matches next @@ -173,14 +198,18 @@ match_init(Topic) -> {Words, []} end. +%% @doc Extract record ID from the match. -spec get_id(match(ID)) -> ID. get_id({_Filter, {ID}}) -> ID. +%% @doc Extract topic (or topic filter) from the match. -spec get_topic(match(_ID)) -> emqx_types:topic(). get_topic({Filter, _ID}) -> emqx_topic:join(Filter). +%% @doc Fetch the record associated with the match. +%% NOTE: Only really useful for ETS tables where the record ID is the first element. -spec get_record(match(_ID), ets:table()) -> _Record. get_record(K, Tab) -> ets:lookup_element(Tab, K, 2). @@ -189,6 +218,10 @@ get_record(K, Tab) -> -spec words(emqx_types:topic()) -> [word()]. words(Topic) when is_binary(Topic) -> + % NOTE + % This is almost identical to `emqx_topic:words/1`, but it doesn't convert empty + % tokens to ''. This is needed to keep ordering of words consistent with what + % `match_filter/3` expects. [word(W) || W <- emqx_topic:tokens(Topic)]. -spec word(binary()) -> word(). From ba956ebe880d0d399274b2ae4123479ffd01a1a7 Mon Sep 17 00:00:00 2001 From: Thales Macedo Garitezi Date: Tue, 15 Aug 2023 11:52:35 -0300 Subject: [PATCH 12/20] fix(gcp_consumer): handle 403 responses Fixes https://emqx.atlassian.net/browse/EMQX-10736 --- .../src/emqx_bridge_gcp_pubsub.app.src | 2 +- .../src/emqx_bridge_gcp_pubsub_client.erl | 2 +- ...emqx_bridge_gcp_pubsub_consumer_worker.erl | 27 +++- .../emqx_bridge_gcp_pubsub_impl_consumer.erl | 57 +++++--- .../emqx_bridge_gcp_pubsub_consumer_SUITE.erl | 135 +++++++++++++++++- 5 files changed, 193 insertions(+), 30 deletions(-) diff --git a/apps/emqx_bridge_gcp_pubsub/src/emqx_bridge_gcp_pubsub.app.src b/apps/emqx_bridge_gcp_pubsub/src/emqx_bridge_gcp_pubsub.app.src index 9faf65860..c7dcea5c0 100644 --- a/apps/emqx_bridge_gcp_pubsub/src/emqx_bridge_gcp_pubsub.app.src +++ b/apps/emqx_bridge_gcp_pubsub/src/emqx_bridge_gcp_pubsub.app.src @@ -1,6 +1,6 @@ {application, emqx_bridge_gcp_pubsub, [ {description, "EMQX Enterprise GCP Pub/Sub Bridge"}, - {vsn, "0.1.6"}, + {vsn, "0.1.7"}, {registered, []}, {applications, [ kernel, diff --git a/apps/emqx_bridge_gcp_pubsub/src/emqx_bridge_gcp_pubsub_client.erl b/apps/emqx_bridge_gcp_pubsub/src/emqx_bridge_gcp_pubsub_client.erl index cb4aa853c..eeceb0c43 100644 --- a/apps/emqx_bridge_gcp_pubsub/src/emqx_bridge_gcp_pubsub_client.erl +++ b/apps/emqx_bridge_gcp_pubsub/src/emqx_bridge_gcp_pubsub_client.erl @@ -205,7 +205,7 @@ get_topic(Topic, ConnectorState) -> Path = <<"/v1/projects/", ProjectId/binary, "/topics/", Topic/binary>>, Body = <<>>, PreparedRequest = {prepared_request, {Method, Path, Body}}, - query_sync(PreparedRequest, ConnectorState). + ?MODULE:query_sync(PreparedRequest, ConnectorState). %%------------------------------------------------------------------------------------------------- %% Helper fns diff --git a/apps/emqx_bridge_gcp_pubsub/src/emqx_bridge_gcp_pubsub_consumer_worker.erl b/apps/emqx_bridge_gcp_pubsub/src/emqx_bridge_gcp_pubsub_consumer_worker.erl index ddceb4a11..d984b42ed 100644 --- a/apps/emqx_bridge_gcp_pubsub/src/emqx_bridge_gcp_pubsub_consumer_worker.erl +++ b/apps/emqx_bridge_gcp_pubsub/src/emqx_bridge_gcp_pubsub_consumer_worker.erl @@ -217,7 +217,9 @@ handle_continue(?ensure_subscription, State0) -> {noreply, State0, {continue, ?ensure_subscription}}; not_found -> %% there's nothing much to do if the topic suddenly doesn't exist anymore. - {stop, {error, topic_not_found}, State0} + {stop, {error, topic_not_found}, State0}; + permission_denied -> + {stop, {error, permission_denied}, State0} end; handle_continue(?patch_subscription, State0) -> ?tp(gcp_pubsub_consumer_worker_patch_subscription_enter, #{}), @@ -291,14 +293,17 @@ handle_info(Msg, State0) -> }), {noreply, State0}. -terminate({error, topic_not_found} = _Reason, State) -> +terminate({error, Reason}, State) when + Reason =:= topic_not_found; + Reason =:= permission_denied +-> #{ instance_id := InstanceId, topic := _Topic } = State, optvar:unset(?OPTVAR_SUB_OK(self())), - emqx_bridge_gcp_pubsub_impl_consumer:mark_topic_as_nonexistent(InstanceId), - ?tp(gcp_pubsub_consumer_worker_terminate, #{reason => _Reason, topic => _Topic}), + emqx_bridge_gcp_pubsub_impl_consumer:mark_as_unhealthy(InstanceId, Reason), + ?tp(gcp_pubsub_consumer_worker_terminate, #{reason => {error, Reason}, topic => _Topic}), ok; terminate(_Reason, _State) -> optvar:unset(?OPTVAR_SUB_OK(self())), @@ -329,7 +334,8 @@ ensure_pull_timer(State = #{pull_timer := TRef}) when is_reference(TRef) -> ensure_pull_timer(State = #{pull_retry_interval := PullRetryInterval}) -> State#{pull_timer := emqx_utils:start_timer(PullRetryInterval, pull)}. --spec ensure_subscription_exists(state()) -> continue | retry | not_found | already_exists. +-spec ensure_subscription_exists(state()) -> + continue | retry | not_found | permission_denied | already_exists. ensure_subscription_exists(State) -> ?tp(gcp_pubsub_consumer_worker_create_subscription_enter, #{}), #{ @@ -367,6 +373,17 @@ ensure_subscription_exists(State) -> } ), not_found; + {error, #{status_code := 403}} -> + %% permission denied + ?tp( + warning, + "gcp_pubsub_consumer_worker_permission_denied", + #{ + instance_id => InstanceId, + topic => Topic + } + ), + permission_denied; {ok, #{status_code := 200}} -> ?tp( debug, diff --git a/apps/emqx_bridge_gcp_pubsub/src/emqx_bridge_gcp_pubsub_impl_consumer.erl b/apps/emqx_bridge_gcp_pubsub/src/emqx_bridge_gcp_pubsub_impl_consumer.erl index 74ee941ec..998a95a48 100644 --- a/apps/emqx_bridge_gcp_pubsub/src/emqx_bridge_gcp_pubsub_impl_consumer.erl +++ b/apps/emqx_bridge_gcp_pubsub/src/emqx_bridge_gcp_pubsub_impl_consumer.erl @@ -17,9 +17,9 @@ %% health check API -export([ - mark_topic_as_nonexistent/1, - unset_nonexistent_topic/1, - is_nonexistent_topic/1 + mark_as_unhealthy/2, + clear_unhealthy/1, + check_if_unhealthy/1 ]). -include_lib("emqx/include/logger.hrl"). @@ -47,11 +47,15 @@ -define(AUTO_RECONNECT_S, 2). -define(DEFAULT_FORGET_INTERVAL, timer:seconds(60)). --define(OPTVAR_TOPIC_NOT_FOUND(INSTANCE_ID), {?MODULE, topic_not_found, INSTANCE_ID}). +-define(OPTVAR_UNHEALTHY(INSTANCE_ID), {?MODULE, topic_not_found, INSTANCE_ID}). -define(TOPIC_MESSAGE, "GCP PubSub topics are invalid. Please check the logs, check if the " "topics exist in GCP and if the service account has permissions to use them." ). +-define(PERMISSION_MESSAGE, + "Permission denied while verifying topic existence. Please check that the " + "provided service account has the correct permissions configured." +). %%------------------------------------------------------------------------------------------------- %% `emqx_resource' API @@ -77,7 +81,7 @@ on_start(InstanceId, Config0) -> -spec on_stop(resource_id(), state()) -> ok | {error, term()}. on_stop(InstanceId, _State) -> ?tp(gcp_pubsub_consumer_stop_enter, #{}), - unset_nonexistent_topic(InstanceId), + clear_unhealthy(InstanceId), ok = stop_consumers(InstanceId), emqx_bridge_gcp_pubsub_client:stop(InstanceId). @@ -85,10 +89,12 @@ on_stop(InstanceId, _State) -> on_get_status(InstanceId, State) -> %% We need to check this flag separately because the workers might be gone when we %% check them. - case is_nonexistent_topic(InstanceId) of - true -> + case check_if_unhealthy(InstanceId) of + {error, topic_not_found} -> {disconnected, State, {unhealthy_target, ?TOPIC_MESSAGE}}; - false -> + {error, permission_denied} -> + {disconnected, State, {unhealthy_target, ?PERMISSION_MESSAGE}}; + ok -> #{client := Client} = State, check_workers(InstanceId, Client) end. @@ -97,24 +103,24 @@ on_get_status(InstanceId, State) -> %% Health check API (signalled by consumer worker) %%------------------------------------------------------------------------------------------------- --spec mark_topic_as_nonexistent(resource_id()) -> ok. -mark_topic_as_nonexistent(InstanceId) -> - optvar:set(?OPTVAR_TOPIC_NOT_FOUND(InstanceId), true), +-spec mark_as_unhealthy(resource_id(), topic_not_found | permission_denied) -> ok. +mark_as_unhealthy(InstanceId, Reason) -> + optvar:set(?OPTVAR_UNHEALTHY(InstanceId), Reason), ok. --spec unset_nonexistent_topic(resource_id()) -> ok. -unset_nonexistent_topic(InstanceId) -> - optvar:unset(?OPTVAR_TOPIC_NOT_FOUND(InstanceId)), - ?tp(gcp_pubsub_consumer_unset_nonexistent_topic, #{}), +-spec clear_unhealthy(resource_id()) -> ok. +clear_unhealthy(InstanceId) -> + optvar:unset(?OPTVAR_UNHEALTHY(InstanceId)), + ?tp(gcp_pubsub_consumer_clear_unhealthy, #{}), ok. --spec is_nonexistent_topic(resource_id()) -> boolean(). -is_nonexistent_topic(InstanceId) -> - case optvar:peek(?OPTVAR_TOPIC_NOT_FOUND(InstanceId)) of - {ok, true} -> - true; - _ -> - false +-spec check_if_unhealthy(resource_id()) -> ok | {error, topic_not_found | permission_denied}. +check_if_unhealthy(InstanceId) -> + case optvar:peek(?OPTVAR_UNHEALTHY(InstanceId)) of + {ok, Reason} -> + {error, Reason}; + undefined -> + ok end. %%------------------------------------------------------------------------------------------------- @@ -153,6 +159,11 @@ start_consumers(InstanceId, Client, Config) -> throw( {unhealthy_target, ?TOPIC_MESSAGE} ); + {error, permission_denied} -> + _ = emqx_bridge_gcp_pubsub_client:stop(InstanceId), + throw( + {unhealthy_target, ?PERMISSION_MESSAGE} + ); {error, _} -> %% connection might be down; we'll have to check topic existence during health %% check, or the workers will kill themselves when they realized there's no @@ -229,6 +240,8 @@ check_for_topic_existence(Topic, Client) -> ok; {error, #{status_code := 404}} -> {error, not_found}; + {error, #{status_code := 403}} -> + {error, permission_denied}; {error, Reason} -> ?tp(warning, "gcp_pubsub_consumer_check_topic_error", #{reason => Reason}), {error, Reason} diff --git a/apps/emqx_bridge_gcp_pubsub/test/emqx_bridge_gcp_pubsub_consumer_SUITE.erl b/apps/emqx_bridge_gcp_pubsub/test/emqx_bridge_gcp_pubsub_consumer_SUITE.erl index 8cb0ef2f9..681e5fed7 100644 --- a/apps/emqx_bridge_gcp_pubsub/test/emqx_bridge_gcp_pubsub_consumer_SUITE.erl +++ b/apps/emqx_bridge_gcp_pubsub/test/emqx_bridge_gcp_pubsub_consumer_SUITE.erl @@ -760,6 +760,64 @@ prop_acked_ids_eventually_forgotten(Trace) -> ), ok. +permission_denied_response() -> + Link = + <<"https://console.developers.google.com/project/9999/apiui/credential">>, + {error, #{ + status_code => 403, + headers => + [ + {<<"vary">>, <<"X-Origin">>}, + {<<"vary">>, <<"Referer">>}, + {<<"content-type">>, <<"application/json; charset=UTF-8">>}, + {<<"date">>, <<"Tue, 15 Aug 2023 13:59:09 GMT">>}, + {<<"server">>, <<"ESF">>}, + {<<"cache-control">>, <<"private">>}, + {<<"x-xss-protection">>, <<"0">>}, + {<<"x-frame-options">>, <<"SAMEORIGIN">>}, + {<<"x-content-type-options">>, <<"nosniff">>}, + {<<"alt-svc">>, <<"h3=\":443\"; ma=2592000,h3-29=\":443\"; ma=2592000">>}, + {<<"accept-ranges">>, <<"none">>}, + {<<"vary">>, <<"Origin,Accept-Encoding">>}, + {<<"transfer-encoding">>, <<"chunked">>} + ], + body => emqx_utils_json:encode( + #{ + <<"error">> => + #{ + <<"code">> => 403, + <<"details">> => + [ + #{ + <<"@type">> => <<"type.googleapis.com/google.rpc.Help">>, + <<"links">> => + [ + #{ + <<"description">> => + <<"Google developer console API key">>, + <<"url">> => + Link + } + ] + }, + #{ + <<"@type">> => <<"type.googleapis.com/google.rpc.ErrorInfo">>, + <<"domain">> => <<"googleapis.com">>, + <<"metadata">> => + #{ + <<"consumer">> => <<"projects/9999">>, + <<"service">> => <<"pubsub.googleapis.com">> + }, + <<"reason">> => <<"CONSUMER_INVALID">> + } + ], + <<"message">> => <<"Project #9999 has been deleted.">>, + <<"status">> => <<"PERMISSION_DENIED">> + } + } + ) + }}. + %%------------------------------------------------------------------------------ %% Testcases %%------------------------------------------------------------------------------ @@ -785,7 +843,7 @@ t_start_stop(Config) -> prop_client_stopped(), prop_workers_stopped(PubSubTopic), fun(Trace) -> - ?assertMatch([_], ?of_kind(gcp_pubsub_consumer_unset_nonexistent_topic, Trace)), + ?assertMatch([_], ?of_kind(gcp_pubsub_consumer_clear_unhealthy, Trace)), ok end ] @@ -1992,6 +2050,81 @@ t_get_subscription(Config) -> ), ok. +t_permission_denied_topic_check(Config) -> + [#{pubsub_topic := PubSubTopic}] = ?config(topic_mapping, Config), + ResourceId = resource_id(Config), + ?check_trace( + begin + %% the emulator does not check any credentials + emqx_common_test_helpers:with_mock( + emqx_bridge_gcp_pubsub_client, + query_sync, + fun(PreparedRequest = {prepared_request, {Method, Path, _Body}}, Client) -> + RE = iolist_to_binary(["/topics/", PubSubTopic, "$"]), + case {Method =:= get, re:run(Path, RE)} of + {true, {match, _}} -> + permission_denied_response(); + _ -> + meck:passthrough([PreparedRequest, Client]) + end + end, + fun() -> + {{ok, _}, {ok, _}} = + ?wait_async_action( + create_bridge(Config), + #{?snk_kind := gcp_pubsub_stop}, + 5_000 + ), + ?assertMatch( + {ok, disconnected}, + emqx_resource_manager:health_check(ResourceId) + ), + ?assertMatch( + {ok, _Group, #{error := {unhealthy_target, "Permission denied" ++ _}}}, + emqx_resource_manager:lookup_cached(ResourceId) + ), + ok + end + ), + ok + end, + [] + ), + ok. + +t_permission_denied_worker(Config) -> + ?check_trace( + begin + emqx_common_test_helpers:with_mock( + emqx_bridge_gcp_pubsub_client, + query_sync, + fun(PreparedRequest = {prepared_request, {Method, _Path, _Body}}, Client) -> + case Method =:= put of + true -> + permission_denied_response(); + false -> + meck:passthrough([PreparedRequest, Client]) + end + end, + fun() -> + {{ok, _}, {ok, _}} = + ?wait_async_action( + create_bridge( + Config + ), + #{?snk_kind := gcp_pubsub_consumer_worker_terminate}, + 10_000 + ), + + ok + end + ), + ok + end, + [] + ), + ok. + t_cluster_subscription(Config) -> [ #{ From 4e80d669b005d709ff7cb8ca29c9cc564f058238 Mon Sep 17 00:00:00 2001 From: Thales Macedo Garitezi Date: Tue, 15 Aug 2023 17:03:24 -0300 Subject: [PATCH 13/20] fix(influxdb_bridge): avoid double-parsing write syntax during probe Fixes https://emqx.atlassian.net/browse/EMQX-10771 --- .../src/emqx_bridge_influxdb.app.src | 2 +- apps/emqx_bridge_influxdb/src/emqx_bridge_influxdb.erl | 3 +++ .../src/emqx_bridge_influxdb_connector.erl | 4 +++- .../test/emqx_bridge_influxdb_SUITE.erl | 10 ++++++++++ changes/ee/fix-11453.en.md | 1 + 5 files changed, 18 insertions(+), 2 deletions(-) create mode 100644 changes/ee/fix-11453.en.md diff --git a/apps/emqx_bridge_influxdb/src/emqx_bridge_influxdb.app.src b/apps/emqx_bridge_influxdb/src/emqx_bridge_influxdb.app.src index a612c225b..2a0eef72e 100644 --- a/apps/emqx_bridge_influxdb/src/emqx_bridge_influxdb.app.src +++ b/apps/emqx_bridge_influxdb/src/emqx_bridge_influxdb.app.src @@ -1,6 +1,6 @@ {application, emqx_bridge_influxdb, [ {description, "EMQX Enterprise InfluxDB Bridge"}, - {vsn, "0.1.3"}, + {vsn, "0.1.4"}, {registered, []}, {applications, [ kernel, diff --git a/apps/emqx_bridge_influxdb/src/emqx_bridge_influxdb.erl b/apps/emqx_bridge_influxdb/src/emqx_bridge_influxdb.erl index b178f77e0..47eeecb4e 100644 --- a/apps/emqx_bridge_influxdb/src/emqx_bridge_influxdb.erl +++ b/apps/emqx_bridge_influxdb/src/emqx_bridge_influxdb.erl @@ -168,6 +168,9 @@ write_syntax(format) -> write_syntax(_) -> undefined. +to_influx_lines(Lines = [#{} | _]) -> + %% already parsed/converted (e.g.: bridge_probe, after hocon_tconf:check_plain) + Lines; to_influx_lines(RawLines) -> try influx_lines(str(RawLines), []) diff --git a/apps/emqx_bridge_influxdb/src/emqx_bridge_influxdb_connector.erl b/apps/emqx_bridge_influxdb/src/emqx_bridge_influxdb_connector.erl index be5ed6b1c..b39d46b59 100644 --- a/apps/emqx_bridge_influxdb/src/emqx_bridge_influxdb_connector.erl +++ b/apps/emqx_bridge_influxdb/src/emqx_bridge_influxdb_connector.erl @@ -66,7 +66,9 @@ on_start(InstId, Config) -> on_stop(InstId, _State) -> case emqx_resource:get_allocated_resources(InstId) of #{?influx_client := Client} -> - influxdb:stop_client(Client); + Res = influxdb:stop_client(Client), + ?tp(influxdb_client_stopped, #{instance_id => InstId}), + Res; _ -> ok end. diff --git a/apps/emqx_bridge_influxdb/test/emqx_bridge_influxdb_SUITE.erl b/apps/emqx_bridge_influxdb/test/emqx_bridge_influxdb_SUITE.erl index 3976d187a..c0d63002b 100644 --- a/apps/emqx_bridge_influxdb/test/emqx_bridge_influxdb_SUITE.erl +++ b/apps/emqx_bridge_influxdb/test/emqx_bridge_influxdb_SUITE.erl @@ -124,6 +124,9 @@ init_per_group(InfluxDBType, Config0) when {influxdb_config, InfluxDBConfig}, {influxdb_config_string, ConfigString}, {ehttpc_pool_name, EHttpcPoolName}, + {bridge_type, influxdb_api_v1}, + {bridge_name, Name}, + {bridge_config, InfluxDBConfig}, {influxdb_name, Name} | Config ]; @@ -193,6 +196,9 @@ init_per_group(InfluxDBType, Config0) when {influxdb_config, InfluxDBConfig}, {influxdb_config_string, ConfigString}, {ehttpc_pool_name, EHttpcPoolName}, + {bridge_type, influxdb_api_v2}, + {bridge_name, Name}, + {bridge_config, InfluxDBConfig}, {influxdb_name, Name} | Config ]; @@ -570,6 +576,10 @@ t_start_ok(Config) -> ), ok. +t_start_stop(Config) -> + ok = emqx_bridge_testlib:t_start_stop(Config, influxdb_client_stopped), + ok. + t_start_already_started(Config) -> Type = influxdb_type_bin(?config(influxdb_type, Config)), Name = ?config(influxdb_name, Config), diff --git a/changes/ee/fix-11453.en.md b/changes/ee/fix-11453.en.md new file mode 100644 index 000000000..428f51d5b --- /dev/null +++ b/changes/ee/fix-11453.en.md @@ -0,0 +1 @@ +Fixed an issue which would yield false negatives when testing the connectivity of InfluxDB bridges. From ffca5812293da368f7878340aa06eba0552df8a6 Mon Sep 17 00:00:00 2001 From: Thales Macedo Garitezi Date: Wed, 16 Aug 2023 11:05:33 -0300 Subject: [PATCH 14/20] feat(kafka): add option to configure health check interval Fixes https://emqx.atlassian.net/browse/EMQX-10781 --- .../src/emqx_bridge_kafka.app.src | 2 +- apps/emqx_bridge_kafka/src/emqx_bridge_kafka.erl | 14 +++++++++++--- .../test/emqx_bridge_kafka_impl_producer_SUITE.erl | 1 - .../test/emqx_bridge_kafka_tests.erl | 6 ++++++ changes/ee/feat-11459.en.md | 1 + 5 files changed, 19 insertions(+), 5 deletions(-) create mode 100644 changes/ee/feat-11459.en.md diff --git a/apps/emqx_bridge_kafka/src/emqx_bridge_kafka.app.src b/apps/emqx_bridge_kafka/src/emqx_bridge_kafka.app.src index 3792409c6..55b02560b 100644 --- a/apps/emqx_bridge_kafka/src/emqx_bridge_kafka.app.src +++ b/apps/emqx_bridge_kafka/src/emqx_bridge_kafka.app.src @@ -1,7 +1,7 @@ %% -*- mode: erlang -*- {application, emqx_bridge_kafka, [ {description, "EMQX Enterprise Kafka Bridge"}, - {vsn, "0.1.7"}, + {vsn, "0.1.8"}, {registered, [emqx_bridge_kafka_consumer_sup]}, {applications, [ kernel, diff --git a/apps/emqx_bridge_kafka/src/emqx_bridge_kafka.erl b/apps/emqx_bridge_kafka/src/emqx_bridge_kafka.erl index 544c95b85..6b3f3cd64 100644 --- a/apps/emqx_bridge_kafka/src/emqx_bridge_kafka.erl +++ b/apps/emqx_bridge_kafka/src/emqx_bridge_kafka.erl @@ -268,7 +268,8 @@ fields(producer_opts) -> required => true, desc => ?DESC(producer_kafka_opts), validator => fun producer_strategy_key_validator/1 - })} + })}, + {resource_opts, mk(ref(resource_opts), #{default => #{}})} ]; fields(producer_kafka_opts) -> [ @@ -425,7 +426,8 @@ fields(consumer_opts) -> {value_encoding_mode, mk(enum([none, base64]), #{ default => none, desc => ?DESC(consumer_value_encoding_mode) - })} + })}, + {resource_opts, mk(ref(resource_opts), #{default => #{}})} ]; fields(consumer_topic_mapping) -> [ @@ -460,10 +462,16 @@ fields(consumer_kafka_opts) -> emqx_schema:timeout_duration_s(), #{default => <<"5s">>, desc => ?DESC(consumer_offset_commit_interval_seconds)} )} - ]. + ]; +fields(resource_opts) -> + SupportedFields = [health_check_interval], + CreationOpts = emqx_resource_schema:create_opts(_Overrides = []), + lists:filter(fun({Field, _}) -> lists:member(Field, SupportedFields) end, CreationOpts). desc("config") -> ?DESC("desc_config"); +desc(resource_opts) -> + ?DESC(emqx_resource_schema, "resource_opts"); desc("get_" ++ Type) when Type =:= "consumer"; Type =:= "producer" -> ["Configuration for Kafka using `GET` method."]; desc("put_" ++ Type) when Type =:= "consumer"; Type =:= "producer" -> diff --git a/apps/emqx_bridge_kafka/test/emqx_bridge_kafka_impl_producer_SUITE.erl b/apps/emqx_bridge_kafka/test/emqx_bridge_kafka_impl_producer_SUITE.erl index 31cd4c66a..d93b6dd7d 100644 --- a/apps/emqx_bridge_kafka/test/emqx_bridge_kafka_impl_producer_SUITE.erl +++ b/apps/emqx_bridge_kafka/test/emqx_bridge_kafka_impl_producer_SUITE.erl @@ -596,7 +596,6 @@ t_send_message_with_headers(Config) -> }, KafkaMsg ), - ?assertMatch(#kafka_message{key = BinTime}, KafkaMsg), %% TODO: refactor those into init/end per testcase ok = ?PRODUCER:on_stop(ResourceId, State), ?assertEqual([], supervisor:which_children(wolff_client_sup)), diff --git a/apps/emqx_bridge_kafka/test/emqx_bridge_kafka_tests.erl b/apps/emqx_bridge_kafka/test/emqx_bridge_kafka_tests.erl index 367423cd4..f476ded39 100644 --- a/apps/emqx_bridge_kafka/test/emqx_bridge_kafka_tests.erl +++ b/apps/emqx_bridge_kafka/test/emqx_bridge_kafka_tests.erl @@ -306,6 +306,9 @@ kafka_producer_new_hocon() -> " sndbuf = \"1024KB\"\n" " }\n" " ssl {enable = false, verify = \"verify_peer\"}\n" + " resource_opts {\n" + " health_check_interval = 10s\n" + " }\n" " }\n" "}\n" "". @@ -351,5 +354,8 @@ bridges.kafka_consumer.my_consumer { verify = verify_none server_name_indication = \"auto\" } + resource_opts { + health_check_interval = 10s + } } """. diff --git a/changes/ee/feat-11459.en.md b/changes/ee/feat-11459.en.md new file mode 100644 index 000000000..88b2047c4 --- /dev/null +++ b/changes/ee/feat-11459.en.md @@ -0,0 +1 @@ +Added the option to configure health check interval for Kafka bridges. From 5d79823891f32ec2fc6333bfdbc9e091e218c769 Mon Sep 17 00:00:00 2001 From: Andrew Mayorov Date: Thu, 17 Aug 2023 01:13:19 +0400 Subject: [PATCH 15/20] perf(topicidx): preserve next key on the stack to reuse later This should further optimize the number of `ets:next/2` calls required for a single match query. --- apps/emqx/src/emqx_topic_index.erl | 30 +++++++++++++++++++++--------- 1 file changed, 21 insertions(+), 9 deletions(-) diff --git a/apps/emqx/src/emqx_topic_index.erl b/apps/emqx/src/emqx_topic_index.erl index 09e19a9f7..a6f662f74 100644 --- a/apps/emqx/src/emqx_topic_index.erl +++ b/apps/emqx/src/emqx_topic_index.erl @@ -127,6 +127,17 @@ matches(Words, RPrefix, Acc, Tab) -> Prefix = lists:reverse(RPrefix), matches(ets:next(Tab, {Prefix, {}}), Prefix, Words, RPrefix, Acc, Tab). +matches(Words, RPrefix, K = {Filter, _}, Acc, Tab) -> + Prefix = lists:reverse(RPrefix), + case Prefix > Filter of + true -> + % NOTE: Prefix already greater than the last key seen, need to `ets:next/2`. + matches(ets:next(Tab, {Prefix, {}}), Prefix, Words, RPrefix, Acc, Tab); + false -> + % NOTE: Prefix is still less than or equal to the last key seen, reuse it. + matches(K, Prefix, Words, RPrefix, Acc, Tab) + end. + matches(K, Prefix, Words, RPrefix, Acc, Tab) -> case match_next(Prefix, K, Words) of true -> @@ -136,26 +147,27 @@ matches(K, Prefix, Words, RPrefix, Acc, Tab) -> stop -> Acc; Matched -> - matches_rest(Matched, Words, RPrefix, Acc, Tab) + % NOTE: Prserve next key on the stack to save on `ets:next/2` calls. + matches_rest(Matched, Words, RPrefix, K, Acc, Tab) end. -matches_rest([W1 | [W2 | _] = SLast], [W1 | [W2 | _] = Rest], RPrefix, Acc, Tab) -> +matches_rest([W1 | [W2 | _] = SLast], [W1 | [W2 | _] = Rest], RPrefix, K, Acc, Tab) -> % NOTE % Fast-forward through identical words in the topic and the last key suffixes. % This should save us a few redundant `ets:next` calls at the cost of slightly % more complex match patterns. - matches_rest(SLast, Rest, [W1 | RPrefix], Acc, Tab); -matches_rest(SLast, [W | Rest], RPrefix, Acc, Tab) when is_list(SLast) -> - matches(Rest, [W | RPrefix], Acc, Tab); -matches_rest(plus, [W | Rest], RPrefix, Acc, Tab) -> + matches_rest(SLast, Rest, [W1 | RPrefix], K, Acc, Tab); +matches_rest(SLast, [W | Rest], RPrefix, K, Acc, Tab) when is_list(SLast) -> + matches(Rest, [W | RPrefix], K, Acc, Tab); +matches_rest(plus, [W | Rest], RPrefix, K, Acc, Tab) -> % NOTE % There's '+' in the key suffix, meaning we should accumulate all matches from % each of 2 branches: % 1. Match the rest of the topic as if there was '+' in the current position. % 2. Skip this key and try to match the topic as it is. - NAcc = matches(Rest, ['+' | RPrefix], Acc, Tab), - matches(Rest, [W | RPrefix], NAcc, Tab); -matches_rest(_, [], _RPrefix, Acc, _Tab) -> + NAcc = matches(Rest, ['+' | RPrefix], K, Acc, Tab), + matches(Rest, [W | RPrefix], K, NAcc, Tab); +matches_rest(_, [], _RPrefix, _K, Acc, _Tab) -> Acc. match_add(K = {_Filter, ID}, Acc = #{}) -> From 7bfad01e9a35f4790d5951022c0a12204e2fa95e Mon Sep 17 00:00:00 2001 From: Kinplemelon Date: Wed, 16 Aug 2023 21:38:26 +0800 Subject: [PATCH 16/20] chore: upgrade dashboard to e1.2.0-beta.4 for ee --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 037d33cea..2f3d7067c 100644 --- a/Makefile +++ b/Makefile @@ -16,7 +16,7 @@ endif # Dashboard version # from https://github.com/emqx/emqx-dashboard5 export EMQX_DASHBOARD_VERSION ?= v1.3.2 -export EMQX_EE_DASHBOARD_VERSION ?= e1.1.1 +export EMQX_EE_DASHBOARD_VERSION ?= e1.2.0-beta.4 # `:=` should be used here, otherwise the `$(shell ...)` will be executed every time when the variable is used # In make 4.4+, for backward-compatibility the value from the original environment is used. From 7bad7d68de305fd8149618feb9cee57ad9909f70 Mon Sep 17 00:00:00 2001 From: Kinplemelon Date: Thu, 17 Aug 2023 18:28:58 +0800 Subject: [PATCH 17/20] ci: change element path in ui test case --- scripts/ui-tests/dashboard_test.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/scripts/ui-tests/dashboard_test.py b/scripts/ui-tests/dashboard_test.py index 4b93262b1..91a7264ec 100644 --- a/scripts/ui-tests/dashboard_test.py +++ b/scripts/ui-tests/dashboard_test.py @@ -62,12 +62,13 @@ def test_log(driver, login, dashboard_url): ensure_current_url(driver, dest_url) title = wait_title(driver) assert "Logging" == title.text - label = driver.find_element(By.XPATH, "//div[@id='app']//form//label[./label/span[text()='Enable Log Handler']]") + + label = driver.find_element(By.XPATH, "//div[@id='app']//form//label[contains(., 'Enable Log Handler')]") assert driver.find_elements(By.ID, label.get_attribute("for")) - label = driver.find_element(By.XPATH, "//div[@id='app']//form//label[./label/span[text()='Log Level']]") + label = driver.find_element(By.XPATH, "//div[@id='app']//form//label[contains(., 'Log Level')]") assert driver.find_elements(By.ID, label.get_attribute("for")) - label = driver.find_element(By.XPATH, "//div[@id='app']//form//label[./label/span[text()='Log Formatter']]") + label = driver.find_element(By.XPATH, "//div[@id='app']//form//label[contains(., 'Log Formatter')]") assert driver.find_elements(By.ID, label.get_attribute("for")) - label = driver.find_element(By.XPATH, "//div[@id='app']//form//label[./label/span[text()='Time Offset']]") + label = driver.find_element(By.XPATH, "//div[@id='app']//form//label[contains(., 'Time Offset')]") assert driver.find_elements(By.ID, label.get_attribute("for")) From bd2155198ae6f560e98c01f6039b6316c102dd20 Mon Sep 17 00:00:00 2001 From: Ilya Averyanov Date: Thu, 17 Aug 2023 17:21:55 +0300 Subject: [PATCH 18/20] ci: wait for hstore container when starting hstream server --- .../docker-compose-hstreamdb.yaml | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/.ci/docker-compose-file/docker-compose-hstreamdb.yaml b/.ci/docker-compose-file/docker-compose-hstreamdb.yaml index f3c4dbd4c..bf367e408 100644 --- a/.ci/docker-compose-file/docker-compose-hstreamdb.yaml +++ b/.ci/docker-compose-file/docker-compose-hstreamdb.yaml @@ -5,8 +5,10 @@ services: image: hstreamdb/hstream:v0.15.0 container_name: hstreamdb depends_on: - - zookeeper - - hstore + zookeeper: + condition: service_started + hstore: + condition: service_healthy # ports: # - "127.0.0.1:6570:6570" expose: @@ -53,7 +55,14 @@ services: --use-tcp --tcp-host $$(hostname -I | awk '{print $$1}') \ --user-admin-port 6440 \ --param enable-dscp-reflection=false \ - --no-interactive + --no-interactive \ + > /data/store/hstore.log 2>&1 + healthcheck: + test: ["CMD", "grep", "LogDevice Cluster running", "/data/store/hstore.log"] + interval: 10s + timeout: 10s + retries: 60 + start_period: 60s zookeeper: image: zookeeper From a0c2fe8cc121bf90fd340e0be877862c221b1f28 Mon Sep 17 00:00:00 2001 From: Ivan Dyachkov Date: Thu, 17 Aug 2023 09:57:00 +0200 Subject: [PATCH 19/20] chore: e5.2.0-alpha.3 --- apps/emqx/include/emqx_release.hrl | 2 +- deploy/charts/emqx-enterprise/Chart.yaml | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/apps/emqx/include/emqx_release.hrl b/apps/emqx/include/emqx_release.hrl index e0601cc7f..c7fa97be7 100644 --- a/apps/emqx/include/emqx_release.hrl +++ b/apps/emqx/include/emqx_release.hrl @@ -35,7 +35,7 @@ -define(EMQX_RELEASE_CE, "5.1.5-build.3"). %% Enterprise edition --define(EMQX_RELEASE_EE, "5.2.0-alpha.1"). +-define(EMQX_RELEASE_EE, "5.2.0-alpha.3"). %% The HTTP API version -define(EMQX_API_VERSION, "5.0"). diff --git a/deploy/charts/emqx-enterprise/Chart.yaml b/deploy/charts/emqx-enterprise/Chart.yaml index 971817e10..575c6b354 100644 --- a/deploy/charts/emqx-enterprise/Chart.yaml +++ b/deploy/charts/emqx-enterprise/Chart.yaml @@ -14,8 +14,8 @@ type: application # This is the chart version. This version number should be incremented each time you make changes # to the chart and its templates, including the app version. -version: 5.2.0-alpha.1 +version: 5.2.0-alpha.3 # This is the version number of the application being deployed. This version number should be # incremented each time you make changes to the application. -appVersion: 5.2.0-alpha.1 +appVersion: 5.2.0-alpha.3 From fa62931aff852ca4fc9186583f6c0eae79d89675 Mon Sep 17 00:00:00 2001 From: Ilya Averyanov Date: Thu, 17 Aug 2023 19:04:19 +0300 Subject: [PATCH 20/20] chore(ci): fix zookeeper version for hstreamdb --- .ci/docker-compose-file/.env | 2 ++ .ci/docker-compose-file/docker-compose-hstreamdb.yaml | 6 +++--- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/.ci/docker-compose-file/.env b/.ci/docker-compose-file/.env index e99a6d13f..b7033caae 100644 --- a/.ci/docker-compose-file/.env +++ b/.ci/docker-compose-file/.env @@ -10,6 +10,8 @@ CASSANDRA_TAG=3.11.6 MINIO_TAG=RELEASE.2023-03-20T20-16-18Z OPENTS_TAG=9aa7f88 KINESIS_TAG=2.1 +HSTREAMDB_TAG=v0.15.0 +HSTREAMDB_ZK_TAG=3.8.1 MS_IMAGE_ADDR=mcr.microsoft.com/mssql/server SQLSERVER_TAG=2019-CU19-ubuntu-20.04 diff --git a/.ci/docker-compose-file/docker-compose-hstreamdb.yaml b/.ci/docker-compose-file/docker-compose-hstreamdb.yaml index bf367e408..d42fd9fa2 100644 --- a/.ci/docker-compose-file/docker-compose-hstreamdb.yaml +++ b/.ci/docker-compose-file/docker-compose-hstreamdb.yaml @@ -2,7 +2,7 @@ version: "3.5" services: hserver: - image: hstreamdb/hstream:v0.15.0 + image: hstreamdb/hstream:${HSTREAMDB_TAG} container_name: hstreamdb depends_on: zookeeper: @@ -39,7 +39,7 @@ services: --io-tasks-network emqx_bridge hstore: - image: hstreamdb/hstream:v0.15.0 + image: hstreamdb/hstream:${HSTREAMDB_TAG} networks: - emqx_bridge volumes: @@ -65,7 +65,7 @@ services: start_period: 60s zookeeper: - image: zookeeper + image: zookeeper:${HSTREAMDB_ZK_TAG} expose: - 2181 networks: