feat(index): add topic index facility

Somewhat similar to `emqx_trie` in design and logic, yet built on
top of a single, potentially pre-existing table.
This commit is contained in:
Andrew Mayorov 2023-07-18 23:11:04 +02:00
parent 7852af896c
commit 8feda315f6
No known key found for this signature in database
GPG Key ID: 2837C62ACFBFED5D
2 changed files with 299 additions and 0 deletions

View File

@ -0,0 +1,156 @@
%%--------------------------------------------------------------------
%% Copyright (c) 2023 EMQ Technologies Co., Ltd. All Rights Reserved.
%%
%% Licensed under the Apache License, Version 2.0 (the "License");
%% you may not use this file except in compliance with the License.
%% You may obtain a copy of the License at
%%
%% http://www.apache.org/licenses/LICENSE-2.0
%%
%% Unless required by applicable law or agreed to in writing, software
%% distributed under the License is distributed on an "AS IS" BASIS,
%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
%% See the License for the specific language governing permissions and
%% limitations under the License.
%%--------------------------------------------------------------------
%% @doc Topic index for matching topics to topic filters.
%%
%% Works on top of ETS ordered_set table. Keys are parsed topic filters
%% with record ID appended to the end, wrapped in a tuple to disambiguate from
%% topic filter words. Existing table may be used if existing keys will not
%% collide with index keys.
%%
%% Designed to effectively answer questions like:
%% 1. Does any topic filter match given topic?
%% 2. Which records are associated with topic filters matching given topic?
%%
%% Questions like these are _only slightly_ less effective:
%% 1. Which topic filters match given topic?
%% 2. Which record IDs are associated with topic filters matching given topic?
-module(emqx_topic_index).
-export([new/0]).
-export([insert/4]).
-export([delete/3]).
-export([match/2]).
-export([matches/2]).
-export([get_id/1]).
-export([get_topic/1]).
-export([get_record/2]).
-type key(ID) :: [binary() | '+' | '#' | {ID}].
-type match(ID) :: key(ID).
new() ->
ets:new(?MODULE, [public, ordered_set, {write_concurrency, true}]).
insert(Filter, ID, Record, Tab) ->
ets:insert(Tab, {emqx_topic:words(Filter) ++ [{ID}], Record}).
delete(Filter, ID, Tab) ->
ets:delete(Tab, emqx_topic:words(Filter) ++ [{ID}]).
-spec match(emqx_types:topic(), ets:table()) -> match(_ID) | false.
match(Topic, Tab) ->
{Words, RPrefix} = match_init(Topic),
match(Words, RPrefix, Tab).
match(Words, RPrefix, Tab) ->
Prefix = lists:reverse(RPrefix),
K = ets:next(Tab, Prefix),
case match_filter(Prefix, K, Words =/= []) of
true ->
K;
stop ->
false;
Matched ->
match_rest(Matched, Words, RPrefix, Tab)
end.
match_rest(false, [W | Rest], RPrefix, Tab) ->
match(Rest, [W | RPrefix], Tab);
match_rest(plus, [W | Rest], RPrefix, Tab) ->
case match(Rest, ['+' | RPrefix], Tab) of
Match when is_list(Match) ->
Match;
false ->
match(Rest, [W | RPrefix], Tab)
end;
match_rest(_, [], _RPrefix, _Tab) ->
false.
-spec matches(emqx_types:topic(), ets:table()) -> [match(_ID)].
matches(Topic, Tab) ->
{Words, RPrefix} = match_init(Topic),
matches(Words, RPrefix, Tab).
matches(Words, RPrefix, Tab) ->
Prefix = lists:reverse(RPrefix),
matches(ets:next(Tab, Prefix), Prefix, Words, RPrefix, Tab).
matches(K, Prefix, Words, RPrefix, Tab) ->
case match_filter(Prefix, K, Words =/= []) of
true ->
[K | matches(ets:next(Tab, K), Prefix, Words, RPrefix, Tab)];
stop ->
[];
Matched ->
matches_rest(Matched, Words, RPrefix, Tab)
end.
matches_rest(false, [W | Rest], RPrefix, Tab) ->
matches(Rest, [W | RPrefix], Tab);
matches_rest(plus, [W | Rest], RPrefix, Tab) ->
matches(Rest, ['+' | RPrefix], Tab) ++ matches(Rest, [W | RPrefix], Tab);
matches_rest(_, [], _RPrefix, _Tab) ->
[].
match_filter([], [{_ID}], _IsPrefix = false) ->
% NOTE: exact match is `true` only if we match whole topic, not prefix
true;
match_filter([], ['#', {_ID}], _IsPrefix) ->
% NOTE: naturally, '#' < '+', so this is already optimal for `match/2`
true;
match_filter([], ['+' | _], _) ->
plus;
match_filter([], [_H | _], _) ->
false;
match_filter([H | T1], [H | T2], IsPrefix) ->
match_filter(T1, T2, IsPrefix);
match_filter([H1 | _], [H2 | _], _) when H2 > H1 ->
% NOTE: we're strictly past the prefix, no need to continue
stop;
match_filter(_, '$end_of_table', _) ->
stop.
match_init(Topic) ->
case emqx_topic:words(Topic) of
[W = <<"$", _/bytes>> | Rest] ->
% NOTE
% This will effectively skip attempts to match special topics to `#` or `+/...`.
{Rest, [W]};
Words ->
{Words, []}
end.
-spec get_id(match(ID)) -> ID.
get_id([{ID}]) ->
ID;
get_id([_ | Rest]) ->
get_id(Rest).
-spec get_topic(match(_ID)) -> emqx_types:topic().
get_topic(K) ->
emqx_topic:join(cut_topic(K)).
cut_topic([{_ID}]) ->
[];
cut_topic([W | Rest]) ->
[W | cut_topic(Rest)].
-spec get_record(match(_ID), ets:table()) -> _Record.
get_record(K, Tab) ->
ets:lookup_element(Tab, K, 2).

View File

@ -0,0 +1,143 @@
%%--------------------------------------------------------------------
%% Copyright (c) 2023 EMQ Technologies Co., Ltd. All Rights Reserved.
%%
%% Licensed under the Apache License, Version 2.0 (the "License");
%% you may not use this file except in compliance with the License.
%% You may obtain a copy of the License at
%%
%% http://www.apache.org/licenses/LICENSE-2.0
%%
%% Unless required by applicable law or agreed to in writing, software
%% distributed under the License is distributed on an "AS IS" BASIS,
%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
%% See the License for the specific language governing permissions and
%% limitations under the License.
%%--------------------------------------------------------------------
-module(emqx_topic_index_SUITE).
-compile(export_all).
-compile(nowarn_export_all).
-include_lib("eunit/include/eunit.hrl").
all() ->
emqx_common_test_helpers:all(?MODULE).
t_insert(_) ->
Tab = emqx_topic_index:new(),
true = emqx_topic_index:insert(<<"sensor/1/metric/2">>, t_insert_1, <<>>, Tab),
true = emqx_topic_index:insert(<<"sensor/+/#">>, t_insert_2, <<>>, Tab),
true = emqx_topic_index:insert(<<"sensor/#">>, t_insert_3, <<>>, Tab),
?assertEqual(<<"sensor/#">>, topic(match(<<"sensor">>, Tab))),
?assertEqual(t_insert_3, id(match(<<"sensor">>, Tab))).
t_match(_) ->
Tab = emqx_topic_index:new(),
true = emqx_topic_index:insert(<<"sensor/1/metric/2">>, t_match_1, <<>>, Tab),
true = emqx_topic_index:insert(<<"sensor/+/#">>, t_match_2, <<>>, Tab),
true = emqx_topic_index:insert(<<"sensor/#">>, t_match_3, <<>>, Tab),
?assertMatch(
[<<"sensor/#">>, <<"sensor/+/#">>],
[topic(M) || M <- matches(<<"sensor/1">>, Tab)]
).
t_match2(_) ->
Tab = emqx_topic_index:new(),
true = emqx_topic_index:insert(<<"#">>, t_match2_1, <<>>, Tab),
true = emqx_topic_index:insert(<<"+/#">>, t_match2_2, <<>>, Tab),
true = emqx_topic_index:insert(<<"+/+/#">>, t_match2_3, <<>>, Tab),
?assertEqual(
[<<"#">>, <<"+/#">>, <<"+/+/#">>],
[topic(M) || M <- matches(<<"a/b/c">>, Tab)]
),
?assertEqual(
false,
emqx_topic_index:match(<<"$SYS/broker/zenmq">>, Tab)
).
t_match3(_) ->
Tab = emqx_topic_index:new(),
Records = [
{<<"d/#">>, t_match3_1},
{<<"a/b/+">>, t_match3_2},
{<<"a/#">>, t_match3_3},
{<<"#">>, t_match3_4},
{<<"$SYS/#">>, t_match3_sys}
],
lists:foreach(
fun({Topic, ID}) -> emqx_topic_index:insert(Topic, ID, <<>>, Tab) end,
Records
),
Matched = matches(<<"a/b/c">>, Tab),
case length(Matched) of
3 -> ok;
_ -> error({unexpected, Matched})
end,
?assertEqual(
t_match3_sys,
id(match(<<"$SYS/a/b/c">>, Tab))
).
t_match4(_) ->
Tab = emqx_topic_index:new(),
Records = [{<<"/#">>, t_match4_1}, {<<"/+">>, t_match4_2}, {<<"/+/a/b/c">>, t_match4_3}],
lists:foreach(
fun({Topic, ID}) -> emqx_topic_index:insert(Topic, ID, <<>>, Tab) end,
Records
),
?assertEqual(
[<<"/#">>, <<"/+">>],
[topic(M) || M <- matches(<<"/">>, Tab)]
),
?assertEqual(
[<<"/#">>, <<"/+/a/b/c">>],
[topic(M) || M <- matches(<<"/0/a/b/c">>, Tab)]
).
t_match5(_) ->
Tab = emqx_topic_index:new(),
T = <<"a/b/c/d/e/f/g/h/i/j/k/l/m/n/o/p/q/r/s/t/u/v/w/x/y/z">>,
Records = [
{<<"#">>, t_match5_1},
{<<T/binary, "/#">>, t_match5_2},
{<<T/binary, "/+">>, t_match5_3}
],
lists:foreach(
fun({Topic, ID}) -> emqx_topic_index:insert(Topic, ID, <<>>, Tab) end,
Records
),
?assertEqual(
[<<"#">>, <<T/binary, "/#">>],
[topic(M) || M <- matches(T, Tab)]
),
?assertEqual(
[<<"#">>, <<T/binary, "/#">>, <<T/binary, "/+">>],
[topic(M) || M <- matches(<<T/binary, "/1">>, Tab)]
).
t_match6(_) ->
Tab = emqx_topic_index:new(),
T = <<"a/b/c/d/e/f/g/h/i/j/k/l/m/n/o/p/q/r/s/t/u/v/w/x/y/z">>,
W = <<"+/+/+/+/+/+/+/+/+/+/+/+/+/+/+/+/+/+/+/+/+/+/+/+/+/+/#">>,
emqx_topic_index:insert(W, ID = t_match6, <<>>, Tab),
?assertEqual(ID, id(match(T, Tab))).
t_match7(_) ->
Tab = emqx_topic_index:new(),
T = <<"a/b/c/d/e/f/g/h/i/j/k/l/m/n/o/p/q/r/s/t/u/v/w/x/y/z">>,
W = <<"a/+/c/+/e/+/g/+/i/+/k/+/m/+/o/+/q/+/s/+/u/+/w/+/y/+/#">>,
emqx_topic_index:insert(W, t_match7, <<>>, Tab),
?assertEqual(W, topic(match(T, Tab))).
match(T, Tab) ->
emqx_topic_index:match(T, Tab).
matches(T, Tab) ->
lists:sort(emqx_topic_index:matches(T, Tab)).
id(Match) ->
emqx_topic_index:get_id(Match).
topic(Match) ->
emqx_topic_index:get_topic(Match).