Merge pull request #11236 from HJianBo/perf-speedup-the-empty-conds-query

perf(http): improve the speed of clients querying in default parameters
This commit is contained in:
JianBo He 2023-07-11 19:30:41 +08:00 committed by GitHub
commit 515b414d99
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 51 additions and 9 deletions

View File

@ -29,7 +29,9 @@
%% first_next query APIs
-export([
node_query/6,
node_query/7,
cluster_query/5,
cluster_query/6,
b2i/1
]).
@ -57,6 +59,18 @@
fun((node(), term()) -> term())
| fun((term()) -> term()).
-type query_options() :: #{
%% Whether to use `ets:info/2` to get the total number of rows when the query conditions are
%% empty. It can significantly improves the speed of the query when the table stored large
%% amounts of data.
%%
%% However, it may cause the total number of rows to be inaccurate when the table stored in
%% multiple schemas of data, i.e: Built-in Authorization
%%
%% Default: false
fast_total_counting => boolean()
}.
-type query_return() :: #{meta := map(), data := [term()]}.
-export([do_query/2, apply_total_query/1]).
@ -114,13 +128,25 @@ limit(Params) when is_map(Params) ->
format_result_fun()
) -> {error, page_limit_invalid} | {error, atom(), term()} | query_return().
node_query(Node, Tab, QString, QSchema, MsFun, FmtFun) ->
node_query(Node, Tab, QString, QSchema, MsFun, FmtFun, #{}).
-spec node_query(
node(),
atom(),
query_params(),
query_schema(),
query_to_match_spec_fun(),
format_result_fun(),
query_options()
) -> {error, page_limit_invalid} | {error, atom(), term()} | query_return().
node_query(Node, Tab, QString, QSchema, MsFun, FmtFun, Options) ->
case parse_pager_params(QString) of
false ->
{error, page_limit_invalid};
Meta ->
{_CodCnt, NQString} = parse_qstring(QString, QSchema),
ResultAcc = init_query_result(),
QueryState = init_query_state(Tab, NQString, MsFun, Meta),
QueryState = init_query_state(Tab, NQString, MsFun, Meta, Options),
NResultAcc = do_node_query(
Node, QueryState, ResultAcc
),
@ -158,6 +184,17 @@ do_node_query(
format_result_fun()
) -> {error, page_limit_invalid} | {error, atom(), term()} | query_return().
cluster_query(Tab, QString, QSchema, MsFun, FmtFun) ->
cluster_query(Tab, QString, QSchema, MsFun, FmtFun, #{}).
-spec cluster_query(
atom(),
query_params(),
query_schema(),
query_to_match_spec_fun(),
format_result_fun(),
query_options()
) -> {error, page_limit_invalid} | {error, atom(), term()} | query_return().
cluster_query(Tab, QString, QSchema, MsFun, FmtFun, Options) ->
case parse_pager_params(QString) of
false ->
{error, page_limit_invalid};
@ -165,7 +202,7 @@ cluster_query(Tab, QString, QSchema, MsFun, FmtFun) ->
{_CodCnt, NQString} = parse_qstring(QString, QSchema),
Nodes = emqx:running_nodes(),
ResultAcc = init_query_result(),
QueryState = init_query_state(Tab, NQString, MsFun, Meta),
QueryState = init_query_state(Tab, NQString, MsFun, Meta, Options),
NResultAcc = do_cluster_query(
Nodes, QueryState, ResultAcc
),
@ -231,9 +268,10 @@ collect_total_from_tail_nodes(Nodes, QueryState = #{total := TotalAcc}) ->
%% table := atom(),
%% qs := {Qs, Fuzzy}, %% parsed query params
%% msfun := query_to_match_spec_fun(),
%% complete := boolean()
%% complete := boolean(),
%% options := query_options()
%% }
init_query_state(Tab, QString, MsFun, _Meta = #{page := Page, limit := Limit}) ->
init_query_state(Tab, QString, MsFun, _Meta = #{page := Page, limit := Limit}, Options) ->
#{match_spec := Ms, fuzzy_fun := FuzzyFun} = erlang:apply(MsFun, [Tab, QString]),
%% assert FuzzyFun type
_ =
@ -252,7 +290,8 @@ init_query_state(Tab, QString, MsFun, _Meta = #{page := Page, limit := Limit}) -
msfun => MsFun,
match_spec => Ms,
fuzzy_fun => FuzzyFun,
complete => false
complete => false,
options => Options
},
case counting_total_fun(QueryState) of
false ->
@ -355,6 +394,8 @@ apply_total_query(QueryState = #{table := Tab}) ->
Fun(Tab)
end.
counting_total_fun(_QueryState = #{qs := {[], []}, options := #{fast_total_counting := true}}) ->
fun(Tab) -> ets:info(Tab, size) end;
counting_total_fun(_QueryState = #{match_spec := Ms, fuzzy_fun := undefined}) ->
%% XXX: Calculating the total number of data that match a certain
%% condition under a large table is very expensive because the
@ -373,9 +414,7 @@ counting_total_fun(_QueryState = #{match_spec := Ms, fuzzy_fun := undefined}) ->
counting_total_fun(_QueryState = #{fuzzy_fun := FuzzyFun}) when FuzzyFun =/= undefined ->
%% XXX: Calculating the total number for a fuzzy searching is very very expensive
%% so it is not supported now
false;
counting_total_fun(_QueryState = #{qs := {[], []}}) ->
fun(Tab) -> ets:info(Tab, size) end.
false.
%% ResultAcc :: #{count := integer(),
%% cursor := integer(),

View File

@ -661,12 +661,14 @@ list_clients(QString) ->
Result =
case maps:get(<<"node">>, QString, undefined) of
undefined ->
Options = #{fast_total_counting => true},
emqx_mgmt_api:cluster_query(
?CHAN_INFO_TAB,
QString,
?CLIENT_QSCHEMA,
fun ?MODULE:qs2ms/2,
fun ?MODULE:format_channel_info/2
fun ?MODULE:format_channel_info/2,
Options
);
Node0 ->
case emqx_utils:safe_to_existing_atom(Node0) of

View File

@ -0,0 +1 @@
Improve the speed of clients querying in HTTP API `/clients` endpoint with default parameters