From b76b6fbe636cfc14acac5d9c9e909daaa31327e8 Mon Sep 17 00:00:00 2001 From: zmstone Date: Sat, 13 Apr 2024 00:11:57 +0200 Subject: [PATCH] feat(variform): initialize client_attrs with variform Moved regular expression extraction as a variform function. --- apps/emqx/src/emqx_channel.erl | 155 +++-------------- apps/emqx/src/emqx_schema.erl | 34 +++- apps/emqx/test/emqx_client_SUITE.erl | 24 +-- apps/emqx/test/emqx_listeners_SUITE.erl | 12 +- .../test/emqx_authz/emqx_authz_SUITE.erl | 14 +- apps/emqx_rule_engine/src/emqx_rule_funcs.erl | 62 +++---- apps/emqx_utils/src/emqx_variform.erl | 159 +++++++++++++----- ...variform_str.erl => emqx_variform_bif.erl} | 39 ++++- .../test/emqx_variform_bif_tests.erl | 59 +++++++ apps/emqx_utils/test/emqx_variform_tests.erl | 50 ++++-- .../ce/{feat-12750.en.md => feat-12872.en.md} | 4 +- rel/i18n/emqx_schema.hocon | 51 +++--- scripts/spellcheck/dicts/emqx.txt | 1 + 13 files changed, 375 insertions(+), 289 deletions(-) rename apps/emqx_utils/src/{emqx_variform_str.erl => emqx_variform_bif.erl} (90%) create mode 100644 apps/emqx_utils/test/emqx_variform_bif_tests.erl rename changes/ce/{feat-12750.en.md => feat-12872.en.md} (93%) diff --git a/apps/emqx/src/emqx_channel.erl b/apps/emqx/src/emqx_channel.erl index a0fbae441..efb5133bc 100644 --- a/apps/emqx/src/emqx_channel.erl +++ b/apps/emqx/src/emqx_channel.erl @@ -251,7 +251,7 @@ init( MP -> MP end, ListenerId = emqx_listeners:listener_id(Type, Listener), - ClientInfo0 = set_peercert_infos( + ClientInfo = set_peercert_infos( Peercert, #{ zone => Zone, @@ -269,7 +269,6 @@ init( }, Zone ), - ClientInfo = initialize_client_attrs_from_cert(ClientInfo0, Peercert), {NClientInfo, NConnInfo} = take_ws_cookie(ClientInfo, ConnInfo), #channel{ conninfo = NConnInfo, @@ -1586,60 +1585,6 @@ enrich_client(ConnPkt, Channel = #channel{clientinfo = ClientInfo}) -> {error, ReasonCode, Channel#channel{clientinfo = NClientInfo}} end. -initialize_client_attrs_from_cert(#{zone := Zone} = ClientInfo, Peercert) -> - Inits = get_client_attrs_init_config(Zone), - lists:foldl( - fun(Init, Acc) -> - do_initialize_client_attrs_from_cert(Init, Acc, Peercert) - end, - ClientInfo, - Inits - ). - -do_initialize_client_attrs_from_cert( - #{ - extract_from := From, - extract_regexp := Regexp, - extract_as := AttrName - }, - ClientInfo, - Peercert -) when From =:= cn orelse From =:= dn -> - Attrs0 = maps:get(client_attrs, ClientInfo, #{}), - Attrs = - case extract_client_attr_from_cert(From, Regexp, Peercert) of - {ok, Value} -> - ?SLOG( - debug, - #{ - msg => "client_attr_init_from_cert", - extracted_as => AttrName, - extracted_value => Value - } - ), - Attrs0#{AttrName => Value}; - _ -> - Attrs0 - end, - ClientInfo#{client_attrs => Attrs}; -do_initialize_client_attrs_from_cert(_, ClientInfo, _Peercert) -> - ClientInfo. - -extract_client_attr_from_cert(cn, Regexp, Peercert) -> - CN = esockd_peercert:common_name(Peercert), - re_extract(CN, Regexp); -extract_client_attr_from_cert(dn, Regexp, Peercert) -> - DN = esockd_peercert:subject(Peercert), - re_extract(DN, Regexp). - -re_extract(Str, Regexp) when is_binary(Str) -> - case re:run(Str, Regexp, [{capture, all_but_first, list}]) of - {match, [_ | _] = List} -> {ok, iolist_to_binary(List)}; - _ -> nomatch - end; -re_extract(_NotStr, _Regexp) -> - ignored. - set_username( #mqtt_packet_connect{username = Username}, ClientInfo = #{username := undefined} @@ -1681,33 +1626,36 @@ maybe_assign_clientid(#mqtt_packet_connect{clientid = ClientId}, ClientInfo) -> {ok, ClientInfo#{clientid => ClientId}}. get_client_attrs_init_config(Zone) -> - case get_mqtt_conf(Zone, client_attrs_init, []) of - L when is_list(L) -> L; - M when is_map(M) -> [M] - end. + get_mqtt_conf(Zone, client_attrs_init, []). -maybe_set_client_initial_attrs(ConnPkt, #{zone := Zone} = ClientInfo0) -> +maybe_set_client_initial_attrs(ConnPkt, #{zone := Zone} = ClientInfo) -> Inits = get_client_attrs_init_config(Zone), - ClientInfo = initialize_client_attrs_from_user_property(Inits, ConnPkt, ClientInfo0), - {ok, initialize_client_attrs_from_clientinfo(Inits, ClientInfo)}. + UserProperty = get_user_property_as_map(ConnPkt), + {ok, initialize_client_attrs(Inits, ClientInfo#{user_property => UserProperty})}. -initialize_client_attrs_from_clientinfo(Inits, ClientInfo) -> +initialize_client_attrs(Inits, ClientInfo) -> lists:foldl( - fun(Init, Acc) -> + fun(#{expression := Variform, set_as_attr := Name}, Acc) -> Attrs = maps:get(client_attrs, ClientInfo, #{}), - case extract_attr_from_clientinfo(Init, ClientInfo) of + case emqx_variform:render(Variform, ClientInfo) of {ok, Value} -> - #{extract_as := Name} = Init, ?SLOG( debug, #{ - msg => "client_attr_init_from_clientinfo", - extracted_as => Name, - extracted_value => Value + msg => "client_attr_initialized", + set_as_attr => Name, + attr_value => Value } ), Acc#{client_attrs => Attrs#{Name => Value}}; - _ -> + {error, Reason} -> + ?SLOG( + warning, + #{ + msg => "client_attr_initialization_failed", + reason => Reason + } + ), Acc end end, @@ -1715,67 +1663,12 @@ initialize_client_attrs_from_clientinfo(Inits, ClientInfo) -> Inits ). -initialize_client_attrs_from_user_property(Inits, ConnPkt, ClientInfo) -> - lists:foldl( - fun(Init, Acc) -> - do_initialize_client_attrs_from_user_property(Init, ConnPkt, Acc) - end, - ClientInfo, - Inits - ). - -do_initialize_client_attrs_from_user_property( - #{ - extract_from := user_property, - extract_as := PropertyKey - }, - ConnPkt, - ClientInfo -) -> - Attrs0 = maps:get(client_attrs, ClientInfo, #{}), - Attrs = - case extract_client_attr_from_user_property(ConnPkt, PropertyKey) of - {ok, Value} -> - ?SLOG( - debug, - #{ - msg => "client_attr_init_from_user_property", - extracted_as => PropertyKey, - extracted_value => Value - } - ), - Attrs0#{PropertyKey => Value}; - _ -> - Attrs0 - end, - ClientInfo#{client_attrs => Attrs}; -do_initialize_client_attrs_from_user_property(_, _ConnPkt, ClientInfo) -> - ClientInfo. - -extract_client_attr_from_user_property( - #mqtt_packet_connect{properties = #{'User-Property' := UserProperty}}, PropertyKey -) -> - case lists:keyfind(PropertyKey, 1, UserProperty) of - {_, Value} -> - {ok, Value}; - _ -> - not_found - end; -extract_client_attr_from_user_property(_ConnPkt, _PropertyKey) -> - ignored. - -extract_attr_from_clientinfo(#{extract_from := clientid, extract_regexp := Regexp}, #{ - clientid := ClientId -}) -> - re_extract(ClientId, Regexp); -extract_attr_from_clientinfo(#{extract_from := username, extract_regexp := Regexp}, #{ - username := Username -}) when - Username =/= undefined +get_user_property_as_map(#mqtt_packet_connect{properties = #{'User-Property' := UserProperty}}) when + is_list(UserProperty) -> - re_extract(Username, Regexp); -extract_attr_from_clientinfo(_Config, _CLientInfo) -> - ignored. + maps:from_list(UserProperty); +get_user_property_as_map(_) -> + #{}. fix_mountpoint(#{mountpoint := undefined} = ClientInfo) -> ClientInfo; diff --git a/apps/emqx/src/emqx_schema.erl b/apps/emqx/src/emqx_schema.erl index ef1ba9999..1dab4f42f 100644 --- a/apps/emqx/src/emqx_schema.erl +++ b/apps/emqx/src/emqx_schema.erl @@ -1734,20 +1734,38 @@ fields(durable_storage) -> emqx_ds_schema:schema(); fields("client_attrs_init") -> [ - {extract_from, + {expression, sc( - hoconsc:enum([clientid, username, cn, dn, user_property]), - #{desc => ?DESC("client_attrs_init_extract_from")} + typerefl:alias("string", any()), + #{ + desc => ?DESC("client_attrs_init_expression"), + converter => fun compile_variform/2 + } )}, - {extract_regexp, sc(binary(), #{desc => ?DESC("client_attrs_init_extract_regexp")})}, - {extract_as, + {set_as_attr, sc(binary(), #{ - default => <<"alias">>, - desc => ?DESC("client_attrs_init_extract_as"), + desc => ?DESC("client_attrs_init_set_as_attr"), validator => fun restricted_string/1 })} ]. +compile_variform(undefined, _Opts) -> + undefined; +compile_variform(Expression, #{make_serializable := true}) -> + case is_binary(Expression) of + true -> + Expression; + false -> + emqx_variform:decompile(Expression) + end; +compile_variform(Expression, _Opts) -> + case emqx_variform:compile(Expression) of + {ok, Compiled} -> + Compiled; + {error, Reason} -> + throw(#{expression => Expression, reason => Reason}) + end. + restricted_string(Str) -> case emqx_utils:is_restricted_str(Str) of true -> ok; @@ -3552,7 +3570,7 @@ mqtt_general() -> )}, {"client_attrs_init", sc( - hoconsc:union([hoconsc:array(ref("client_attrs_init")), ref("client_attrs_init")]), + hoconsc:array(ref("client_attrs_init")), #{ default => [], desc => ?DESC("client_attrs_init") diff --git a/apps/emqx/test/emqx_client_SUITE.erl b/apps/emqx/test/emqx_client_SUITE.erl index ba38d92ff..f0afe6195 100644 --- a/apps/emqx/test/emqx_client_SUITE.erl +++ b/apps/emqx/test/emqx_client_SUITE.erl @@ -395,13 +395,14 @@ t_certdn_as_alias(_) -> test_cert_extraction_as_alias(Which) -> %% extract the first two chars - Re = <<"^(..).*$">>, ClientId = iolist_to_binary(["ClientIdFor_", atom_to_list(Which)]), - emqx_config:put_zone_conf(default, [mqtt, client_attrs_init], #{ - extract_from => Which, - extract_regexp => Re, - extract_as => <<"alias">> - }), + {ok, Compiled} = emqx_variform:compile("substr(" ++ atom_to_list(Which) ++ ",0,2)"), + emqx_config:put_zone_conf(default, [mqtt, client_attrs_init], [ + #{ + expression => Compiled, + set_as_attr => <<"alias">> + } + ]), SslConf = emqx_common_test_helpers:client_mtls('tlsv1.2'), {ok, Client} = emqtt:start_link([ {clientid, ClientId}, {port, 8883}, {ssl, true}, {ssl_opts, SslConf} @@ -416,10 +417,13 @@ test_cert_extraction_as_alias(Which) -> t_client_attr_from_user_property(_Config) -> ClientId = atom_to_binary(?FUNCTION_NAME), - emqx_config:put_zone_conf(default, [mqtt, client_attrs_init], #{ - extract_from => user_property, - extract_as => <<"group">> - }), + {ok, Compiled} = emqx_variform:compile("user_property.group"), + emqx_config:put_zone_conf(default, [mqtt, client_attrs_init], [ + #{ + expression => Compiled, + set_as_attr => <<"group">> + } + ]), SslConf = emqx_common_test_helpers:client_mtls('tlsv1.3'), {ok, Client} = emqtt:start_link([ {clientid, ClientId}, diff --git a/apps/emqx/test/emqx_listeners_SUITE.erl b/apps/emqx/test/emqx_listeners_SUITE.erl index d49b5f893..ba84699c6 100644 --- a/apps/emqx/test/emqx_listeners_SUITE.erl +++ b/apps/emqx/test/emqx_listeners_SUITE.erl @@ -150,11 +150,13 @@ t_client_attr_as_mountpoint(_Config) -> <<"limiter">> => #{}, <<"mountpoint">> => <<"groups/${client_attrs.ns}/">> }, - emqx_config:put_zone_conf(default, [mqtt, client_attrs_init], #{ - extract_from => clientid, - extract_regexp => <<"^(.+)-.+$">>, - extract_as => <<"ns">> - }), + {ok, Compiled} = emqx_variform:compile("nth(1,tokens(clientid,'-'))"), + emqx_config:put_zone_conf(default, [mqtt, client_attrs_init], [ + #{ + expression => Compiled, + set_as_attr => <<"ns">> + } + ]), emqx_logger:set_log_level(debug), with_listener(tcp, attr_as_moutpoint, ListenerConf, fun() -> {ok, Client} = emqtt:start_link(#{ diff --git a/apps/emqx_auth/test/emqx_authz/emqx_authz_SUITE.erl b/apps/emqx_auth/test/emqx_authz/emqx_authz_SUITE.erl index 37ac27a9b..70dd0bbb6 100644 --- a/apps/emqx_auth/test/emqx_authz/emqx_authz_SUITE.erl +++ b/apps/emqx_auth/test/emqx_authz/emqx_authz_SUITE.erl @@ -557,12 +557,14 @@ t_publish_last_will_testament_denied_topic(_Config) -> t_alias_prefix(_Config) -> {ok, _} = emqx_authz:update(?CMD_REPLACE, [?SOURCE_FILE_CLIENT_ATTR]), - ExtractSuffix = <<"^.*-(.*)$">>, - emqx_config:put_zone_conf(default, [mqtt, client_attrs_init], #{ - extract_from => clientid, - extract_regexp => ExtractSuffix, - extract_as => <<"alias">> - }), + %% '^.*-(.*)$': extract the suffix after the last '-' + {ok, Compiled} = emqx_variform:compile("concat(regex_extract(clientid,'^.*-(.*)$'))"), + emqx_config:put_zone_conf(default, [mqtt, client_attrs_init], [ + #{ + expression => Compiled, + set_as_attr => <<"alias">> + } + ]), ClientId = <<"org1-name2">>, SubTopic = <<"name2/#">>, SubTopicNotAllowed = <<"name3/#">>, diff --git a/apps/emqx_rule_engine/src/emqx_rule_funcs.erl b/apps/emqx_rule_engine/src/emqx_rule_funcs.erl index 6a719c3f1..414a3d620 100644 --- a/apps/emqx_rule_engine/src/emqx_rule_funcs.erl +++ b/apps/emqx_rule_engine/src/emqx_rule_funcs.erl @@ -771,66 +771,66 @@ is_array(_) -> false. %% String Funcs %%------------------------------------------------------------------------------ -coalesce(List) -> emqx_variform_str:coalesce(List). +coalesce(List) -> emqx_variform_bif:coalesce(List). -coalesce(A, B) -> emqx_variform_str:coalesce(A, B). +coalesce(A, B) -> emqx_variform_bif:coalesce(A, B). -lower(S) -> emqx_variform_str:lower(S). +lower(S) -> emqx_variform_bif:lower(S). -ltrim(S) -> emqx_variform_str:ltrim(S). +ltrim(S) -> emqx_variform_bif:ltrim(S). -reverse(S) -> emqx_variform_str:reverse(S). +reverse(S) -> emqx_variform_bif:reverse(S). -rtrim(S) -> emqx_variform_str:rtrim(S). +rtrim(S) -> emqx_variform_bif:rtrim(S). -strlen(S) -> emqx_variform_str:strlen(S). +strlen(S) -> emqx_variform_bif:strlen(S). -substr(S, Start) -> emqx_variform_str:substr(S, Start). +substr(S, Start) -> emqx_variform_bif:substr(S, Start). -substr(S, Start, Length) -> emqx_variform_str:substr(S, Start, Length). +substr(S, Start, Length) -> emqx_variform_bif:substr(S, Start, Length). -trim(S) -> emqx_variform_str:trim(S). +trim(S) -> emqx_variform_bif:trim(S). -upper(S) -> emqx_variform_str:upper(S). +upper(S) -> emqx_variform_bif:upper(S). -split(S, P) -> emqx_variform_str:split(S, P). +split(S, P) -> emqx_variform_bif:split(S, P). -split(S, P, Position) -> emqx_variform_str:split(S, P, Position). +split(S, P, Position) -> emqx_variform_bif:split(S, P, Position). -tokens(S, Separators) -> emqx_variform_str:tokens(S, Separators). +tokens(S, Separators) -> emqx_variform_bif:tokens(S, Separators). -tokens(S, Separators, NoCRLF) -> emqx_variform_str:tokens(S, Separators, NoCRLF). +tokens(S, Separators, NoCRLF) -> emqx_variform_bif:tokens(S, Separators, NoCRLF). -concat(S1, S2) -> emqx_variform_str:concat(S1, S2). +concat(S1, S2) -> emqx_variform_bif:concat(S1, S2). -concat(List) -> emqx_variform_str:concat(List). +concat(List) -> emqx_variform_bif:concat(List). -sprintf_s(Format, Args) -> emqx_variform_str:sprintf_s(Format, Args). +sprintf_s(Format, Args) -> emqx_variform_bif:sprintf_s(Format, Args). -pad(S, Len) -> emqx_variform_str:pad(S, Len). +pad(S, Len) -> emqx_variform_bif:pad(S, Len). -pad(S, Len, Position) -> emqx_variform_str:pad(S, Len, Position). +pad(S, Len, Position) -> emqx_variform_bif:pad(S, Len, Position). -pad(S, Len, Position, Char) -> emqx_variform_str:pad(S, Len, Position, Char). +pad(S, Len, Position, Char) -> emqx_variform_bif:pad(S, Len, Position, Char). -replace(SrcStr, Pattern, RepStr) -> emqx_variform_str:replace(SrcStr, Pattern, RepStr). +replace(SrcStr, Pattern, RepStr) -> emqx_variform_bif:replace(SrcStr, Pattern, RepStr). replace(SrcStr, Pattern, RepStr, Position) -> - emqx_variform_str:replace(SrcStr, Pattern, RepStr, Position). + emqx_variform_bif:replace(SrcStr, Pattern, RepStr, Position). -regex_match(Str, RE) -> emqx_variform_str:regex_match(Str, RE). +regex_match(Str, RE) -> emqx_variform_bif:regex_match(Str, RE). -regex_replace(SrcStr, RE, RepStr) -> emqx_variform_str:regex_replace(SrcStr, RE, RepStr). +regex_replace(SrcStr, RE, RepStr) -> emqx_variform_bif:regex_replace(SrcStr, RE, RepStr). -ascii(Char) -> emqx_variform_str:ascii(Char). +ascii(Char) -> emqx_variform_bif:ascii(Char). -find(S, P) -> emqx_variform_str:find(S, P). +find(S, P) -> emqx_variform_bif:find(S, P). -find(S, P, Position) -> emqx_variform_str:find(S, P, Position). +find(S, P, Position) -> emqx_variform_bif:find(S, P, Position). -join_to_string(Str) -> emqx_variform_str:join_to_string(Str). +join_to_string(Str) -> emqx_variform_bif:join_to_string(Str). -join_to_string(Sep, List) -> emqx_variform_str:join_to_string(Sep, List). +join_to_string(Sep, List) -> emqx_variform_bif:join_to_string(Sep, List). join_to_sql_values_string(List) -> QuotedList = @@ -878,7 +878,7 @@ jq(FilterProgram, JSONBin) -> ]) ). -unescape(Str) -> emqx_variform_str:unescape(Str). +unescape(Str) -> emqx_variform_bif:unescape(Str). %%------------------------------------------------------------------------------ %% Array Funcs diff --git a/apps/emqx_utils/src/emqx_variform.erl b/apps/emqx_utils/src/emqx_variform.erl index 0a26f7480..09a673851 100644 --- a/apps/emqx_utils/src/emqx_variform.erl +++ b/apps/emqx_utils/src/emqx_variform.erl @@ -28,14 +28,35 @@ erase_allowed_module/1, erase_allowed_modules/1 ]). + -export([render/2, render/3]). +-export([compile/1, decompile/1]). + +-export_type([compiled/0]). + +-type compiled() :: #{expr := string(), form := term()}. +-define(BIF_MOD, emqx_variform_bif). +-define(IS_ALLOWED_MOD(M), + (M =:= ?BIF_MOD orelse + M =:= lists orelse + M =:= maps) +). + +-define(COALESCE_BADARG, + throw(#{ + reason => coalesce_badarg, + explain => + "must be an array, or a call to a function which returns an array, " + "for example: coalesce([a,b,c]) or coalesce(tokens(var,','))" + }) +). %% @doc Render a variform expression with bindings. %% A variform expression is a template string which supports variable substitution %% and function calls. %% %% The function calls are in the form of `module.function(arg1, arg2, ...)` where `module` -%% is optional, and if not provided, the function is assumed to be in the `emqx_variform_str` module. +%% is optional, and if not provided, the function is assumed to be in the `emqx_variform_bif` module. %% Both module and function must be existing atoms, and only whitelisted functions are allowed. %% %% A function arg can be a constant string or a number. @@ -49,18 +70,54 @@ %% %% For unresolved variables, empty string (but not "undefined") is used. %% In case of runtime exeption, an error is returned. +%% In case of unbound variable is referenced, error is returned. -spec render(string(), map()) -> {ok, binary()} | {error, term()}. render(Expression, Bindings) -> render(Expression, Bindings, #{}). -render(Expression, Bindings, Opts) when is_binary(Expression) -> - render(unicode:characters_to_list(Expression), Bindings, Opts); +render(#{form := Form}, Bindings, Opts) -> + eval_as_string(Form, Bindings, Opts); render(Expression, Bindings, Opts) -> + case compile(Expression) of + {ok, Compiled} -> + render(Compiled, Bindings, Opts); + {error, Reason} -> + {error, Reason} + end. + +eval_as_string(Expr, Bindings, _Opts) -> + try + {ok, return_str(eval(Expr, Bindings, #{}))} + catch + throw:Reason -> + {error, Reason}; + C:E:S -> + {error, #{exception => C, reason => E, stack_trace => S}} + end. + +%% Force the expression to return binary string. +return_str(Str) when is_binary(Str) -> Str; +return_str(Num) when is_integer(Num) -> integer_to_binary(Num); +return_str(Num) when is_float(Num) -> float_to_binary(Num, [{decimals, 10}, compact]); +return_str(Other) -> + throw(#{ + reason => bad_return, + expected => string, + got => Other + }). + +%% @doc Compile varifom expression. +-spec compile(string() | binary() | compiled()) -> {ok, compiled()} | {error, any()}. +compile(#{form := _} = Compiled) -> + {ok, Compiled}; +compile(Expression) when is_binary(Expression) -> + compile(unicode:characters_to_list(Expression)); +compile(Expression) -> case emqx_variform_scan:string(Expression) of {ok, Tokens, _Line} -> case emqx_variform_parser:parse(Tokens) of - {ok, Expr} -> - eval_as_string(Expr, Bindings, Opts); + {ok, Form} -> + {ok, #{expr => Expression, form => Form}}; {error, {_, emqx_variform_parser, Msg}} -> %% syntax error {error, lists:flatten(Msg)}; @@ -71,40 +128,59 @@ render(Expression, Bindings, Opts) -> {error, Reason} end. -eval_as_string(Expr, Bindings, _Opts) -> - try - {ok, str(eval(Expr, Bindings))} - catch - throw:Reason -> - {error, Reason}; - C:E:S -> - {error, #{exception => C, reason => E, stack_trace => S}} - end. +decompile(#{expr := Expression}) -> + Expression; +decompile(Expression) -> + Expression. -eval({str, Str}, _Bindings) -> - str(Str); -eval({integer, Num}, _Bindings) -> +eval({str, Str}, _Bindings, _Opts) -> + unicode:characters_to_binary(Str); +eval({integer, Num}, _Bindings, _Opts) -> Num; -eval({float, Num}, _Bindings) -> +eval({float, Num}, _Bindings, _Opts) -> Num; -eval({array, Args}, Bindings) -> - eval(Args, Bindings); -eval({call, FuncNameStr, Args}, Bindings) -> +eval({array, Args}, Bindings, Opts) -> + eval_loop(Args, Bindings, Opts); +eval({call, FuncNameStr, Args}, Bindings, Opts) -> {Mod, Fun} = resolve_func_name(FuncNameStr), ok = assert_func_exported(Mod, Fun, length(Args)), - call(Mod, Fun, eval(Args, Bindings)); -eval({var, VarName}, Bindings) -> - resolve_var_value(VarName, Bindings); -eval([Arg | Args], Bindings) -> - [eval(Arg, Bindings) | eval(Args, Bindings)]; -eval([], _Bindings) -> - []. + case {Mod, Fun} of + {?BIF_MOD, coalesce} -> + eval_coalesce(Args, Bindings, Opts); + _ -> + call(Mod, Fun, eval_loop(Args, Bindings, Opts)) + end; +eval({var, VarName}, Bindings, Opts) -> + resolve_var_value(VarName, Bindings, Opts). + +eval_loop([], _, _) -> []; +eval_loop([H | T], Bindings, Opts) -> [eval(H, Bindings, Opts) | eval_loop(T, Bindings, Opts)]. + +%% coalesce treats var_unbound exception as empty string '' +eval_coalesce([{array, Args}], Bindings, Opts) -> + NewArgs = [lists:map(fun(Arg) -> try_eval(Arg, Bindings, Opts) end, Args)], + call(?BIF_MOD, coalesce, NewArgs); +eval_coalesce([Arg], Bindings, Opts) -> + case try_eval(Arg, Bindings, Opts) of + List when is_list(List) -> + call(?BIF_MOD, coalesce, List); + <<>> -> + <<>>; + _ -> + ?COALESCE_BADARG + end; +eval_coalesce(_Args, _Bindings, _Opts) -> + ?COALESCE_BADARG. + +try_eval(Arg, Bindings, Opts) -> + try + eval(Arg, Bindings, Opts) + catch + throw:#{reason := var_unbound} -> + <<>> + end. %% Some functions accept arbitrary number of arguments but implemented as /1. -call(emqx_variform_str, concat, Args) -> - str(emqx_variform_str:concat(Args)); -call(emqx_variform_str, coalesce, Args) -> - str(emqx_variform_str:coalesce(Args)); call(Mod, Fun, Args) -> erlang:apply(Mod, Fun, Args). @@ -144,23 +220,23 @@ resolve_func_name(FuncNameStr) -> function => Fun }) end, - {emqx_variform_str, FuncName}; + {?BIF_MOD, FuncName}; _ -> throw(#{reason => invalid_function_reference, function => FuncNameStr}) end. -resolve_var_value(VarName, Bindings) -> +%% _Opts can be extended in the future. For example, unbound var as 'undfeined' +resolve_var_value(VarName, Bindings, _Opts) -> case emqx_template:lookup_var(split(VarName), Bindings) of {ok, Value} -> Value; {error, _Reason} -> - <<>> + throw(#{ + var_name => VarName, + reason => var_unbound + }) end. -assert_func_exported(emqx_variform_str, concat, _Arity) -> - ok; -assert_func_exported(emqx_variform_str, coalesce, _Arity) -> - ok; assert_func_exported(Mod, Fun, Arity) -> ok = try_load(Mod), case erlang:function_exported(Mod, Fun, Arity) of @@ -187,7 +263,7 @@ try_load(Mod) -> ok end. -assert_module_allowed(emqx_variform_str) -> +assert_module_allowed(Mod) when ?IS_ALLOWED_MOD(Mod) -> ok; assert_module_allowed(Mod) -> Allowed = get_allowed_modules(), @@ -220,8 +296,5 @@ erase_allowed_modules(Modules) when is_list(Modules) -> get_allowed_modules() -> persistent_term:get({emqx_variform, allowed_modules}, []). -str(Value) -> - emqx_utils_conv:bin(Value). - split(VarName) -> lists:map(fun erlang:iolist_to_binary/1, string:tokens(VarName, ".")). diff --git a/apps/emqx_utils/src/emqx_variform_str.erl b/apps/emqx_utils/src/emqx_variform_bif.erl similarity index 90% rename from apps/emqx_utils/src/emqx_variform_str.erl rename to apps/emqx_utils/src/emqx_variform_bif.erl index a53e1e216..fe5cb2369 100644 --- a/apps/emqx_utils/src/emqx_variform_str.erl +++ b/apps/emqx_utils/src/emqx_variform_bif.erl @@ -14,13 +14,11 @@ %% limitations under the License. %%-------------------------------------------------------------------- -%% Predefined functions string templating --module(emqx_variform_str). +%% Predefined functions for variform expressions. +-module(emqx_variform_bif). %% String Funcs -export([ - coalesce/1, - coalesce/2, lower/1, ltrim/1, ltrim/2, @@ -47,15 +45,22 @@ replace/4, regex_match/2, regex_replace/3, + regex_extract/2, ascii/1, find/2, find/3, join_to_string/1, join_to_string/2, unescape/1, - nth/2 + any_to_str/1 ]). +%% Array functions +-export([nth/2]). + +%% Control functions +-export([coalesce/1, coalesce/2]). + -define(IS_EMPTY(X), (X =:= <<>> orelse X =:= "" orelse X =:= undefined)). %%------------------------------------------------------------------------------ @@ -143,8 +148,10 @@ tokens(S, Separators, <<"nocrlf">>) -> concat(S1, S2) -> concat([S1, S2]). +%% @doc Concatenate a list of strings. +%% NOTE: it converts non-string elements to Erlang term literals for backward compatibility concat(List) -> - unicode:characters_to_binary(lists:map(fun str/1, List), unicode). + unicode:characters_to_binary(lists:map(fun any_to_str/1, List), unicode). sprintf_s(Format, Args) when is_list(Args) -> erlang:iolist_to_binary(io_lib:format(binary_to_list(Format), Args)). @@ -190,6 +197,22 @@ regex_match(Str, RE) -> regex_replace(SrcStr, RE, RepStr) -> re:replace(SrcStr, RE, RepStr, [global, {return, binary}]). +%% @doc Searches the string Str for patterns specified by Regexp. +%% If matches are found, it returns a list of all captured groups from these matches. +%% If no matches are found or there are no groups captured, it returns an empty list. +%% This function can be used to extract parts of a string based on a regular expression, +%% excluding the complete match itself. +%% Examples: +%% ("Number: 12345", "(\\d+)") -> [<<"12345">>] +%% ("Hello, world!", "(\\w+)") -> [<<"Hello">>, <<"world">>] +%% ("No numbers here!", "(\\d+)") -> [] +%% ("Date: 2021-05-20", "(\\d{4})-(\\d{2})-(\\d{2})") -> [<<"2021">>, <<"05">>, <<"20">>] +regex_extract(Str, Regexp) -> + case re:run(Str, Regexp, [{capture, all_but_first, list}]) of + {match, [_ | _] = L} -> lists:map(fun erlang:iolist_to_binary/1, L); + _ -> [] + end. + ascii(Char) when is_binary(Char) -> [FirstC | _] = binary_to_list(Char), FirstC. @@ -212,7 +235,7 @@ join_to_string(List) when is_list(List) -> join_to_string(<<", ">>, List). join_to_string(Sep, List) when is_list(List), is_binary(Sep) -> - iolist_to_binary(lists:join(Sep, [str(Item) || Item <- List])). + iolist_to_binary(lists:join(Sep, [any_to_str(Item) || Item <- List])). unescape(Bin) when is_binary(Bin) -> UnicodeList = unicode:characters_to_list(Bin, utf8), @@ -364,5 +387,5 @@ is_hex_digit(_) -> false. %% Data Type Conversion Funcs %%------------------------------------------------------------------------------ -str(Data) -> +any_to_str(Data) -> emqx_utils_conv:bin(Data). diff --git a/apps/emqx_utils/test/emqx_variform_bif_tests.erl b/apps/emqx_utils/test/emqx_variform_bif_tests.erl new file mode 100644 index 000000000..b74f6fcac --- /dev/null +++ b/apps/emqx_utils/test/emqx_variform_bif_tests.erl @@ -0,0 +1,59 @@ +%%-------------------------------------------------------------------- +%% Copyright (c) 2024 EMQ Technologies Co., Ltd. All Rights Reserved. +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. +%%-------------------------------------------------------------------- + +%% Most of the functions are tested as rule-engine string funcs +-module(emqx_variform_bif_tests). + +-compile(export_all). +-compile(nowarn_export_all). + +-include_lib("eunit/include/eunit.hrl"). + +regex_extract_test_() -> + [ + ?_assertEqual([<<"12345">>], regex_extract("Order number: 12345", "(\\d+)")), + ?_assertEqual( + [<<"Hello">>, <<"world">>], regex_extract("Hello, world!", "(\\w+).*\s(\\w+)") + ), + ?_assertEqual([], regex_extract("No numbers here!", "(\\d+)")), + ?_assertEqual( + [<<"2021">>, <<"05">>, <<"20">>], + regex_extract("Date: 2021-05-20", "(\\d{4})-(\\d{2})-(\\d{2})") + ), + ?_assertEqual([<<"Hello">>], regex_extract("Hello, world!", "(Hello)")), + ?_assertEqual( + [<<"12">>, <<"34">>], regex_extract("Items: 12, Price: 34", "(\\d+).*\s(\\d+)") + ), + ?_assertEqual( + [<<"john.doe@example.com">>], + regex_extract("Contact: john.doe@example.com", "([\\w\\.]+@[\\w\\.]+)") + ), + ?_assertEqual([], regex_extract("Just some text, nothing more.", "([A-Z]\\d{3})")), + ?_assertEqual( + [<<"admin">>, <<"1234">>], + regex_extract("User: admin, Pass: 1234", "User: (\\w+), Pass: (\\d+)") + ), + ?_assertEqual([], regex_extract("", "(\\d+)")), + ?_assertEqual([], regex_extract("$$$###!!!", "(\\d+)")), + ?_assertEqual([<<"23.1">>], regex_extract("Erlang 23.1 version", "(\\d+\\.\\d+)")), + ?_assertEqual( + [<<"192.168.1.1">>], + regex_extract("Server IP: 192.168.1.1 at port 8080", "(\\d+\\.\\d+\\.\\d+\\.\\d+)") + ) + ]. + +regex_extract(Str, RegEx) -> + emqx_variform_bif:regex_extract(Str, RegEx). diff --git a/apps/emqx_utils/test/emqx_variform_tests.erl b/apps/emqx_utils/test/emqx_variform_tests.erl index 72fbf2637..91da471c9 100644 --- a/apps/emqx_utils/test/emqx_variform_tests.erl +++ b/apps/emqx_utils/test/emqx_variform_tests.erl @@ -27,14 +27,16 @@ redner_test_() -> [ {"direct var reference", fun() -> ?assertEqual({ok, <<"1">>}, render("a", #{a => 1})) end}, {"concat strings", fun() -> - ?assertEqual({ok, <<"a,b">>}, render("concat('a',',','b')", #{})) + ?assertEqual({ok, <<"a,b">>}, render("concat(['a',',','b'])", #{})) + end}, + {"concat empty string", fun() -> + ?assertEqual({ok, <<"">>}, render("concat([''])", #{})) end}, - {"concat empty string", fun() -> ?assertEqual({ok, <<"">>}, render("concat('')", #{})) end}, {"tokens 1st", fun() -> ?assertEqual({ok, <<"a">>}, render("nth(1,tokens(var, ','))", #{var => <<"a,b">>})) end}, - {"unknown var as empty str", fun() -> - ?assertEqual({ok, <<>>}, render("var", #{})) + {"unknown var return error", fun() -> + ?assertMatch({error, #{reason := var_unbound}}, render("var", #{})) end}, {"out of range nth index", fun() -> ?assertEqual({ok, <<>>}, render("nth(2, tokens(var, ','))", #{var => <<"a">>})) @@ -97,7 +99,7 @@ unknown_func_test_() -> {"unknown function in a known module", fun() -> ?assertMatch( {error, #{reason := unknown_variform_function}}, - render("emqx_variform_str.nonexistingatom__(a)", #{}) + render("emqx_variform_bif.nonexistingatom__(a)", #{}) ) end}, {"invalid func reference", fun() -> @@ -133,19 +135,39 @@ inject_allowed_module_test() -> coalesce_test_() -> [ - {"coalesce first", fun() -> - ?assertEqual({ok, <<"a">>}, render("coalesce('a','b')", #{})) + {"first", fun() -> + ?assertEqual({ok, <<"a">>}, render("coalesce(['a','b'])", #{})) end}, - {"coalesce second", fun() -> - ?assertEqual({ok, <<"b">>}, render("coalesce('', 'b')", #{})) + {"second", fun() -> + ?assertEqual({ok, <<"b">>}, render("coalesce(['', 'b'])", #{})) end}, - {"coalesce first var", fun() -> - ?assertEqual({ok, <<"a">>}, render("coalesce(a,b)", #{a => <<"a">>, b => <<"b">>})) + {"first var", fun() -> + ?assertEqual({ok, <<"a">>}, render("coalesce([a,b])", #{a => <<"a">>, b => <<"b">>})) end}, - {"coalesce second var", fun() -> - ?assertEqual({ok, <<"b">>}, render("coalesce(a,b)", #{b => <<"b">>})) + {"second var", fun() -> + ?assertEqual({ok, <<"b">>}, render("coalesce([a,b])", #{b => <<"b">>})) end}, - {"coalesce empty", fun() -> ?assertEqual({ok, <<>>}, render("coalesce(a,b)", #{})) end} + {"empty", fun() -> ?assertEqual({ok, <<>>}, render("coalesce([a,b])", #{})) end}, + {"arg from other func", fun() -> + ?assertEqual({ok, <<"b">>}, render("coalesce(tokens(a,','))", #{a => <<",,b,c">>})) + end}, + {"var unbound", fun() -> ?assertEqual({ok, <<>>}, render("coalesce(a)", #{})) end}, + {"var unbound in call", fun() -> + ?assertEqual({ok, <<>>}, render("coalesce(concat(a))", #{})) + end}, + {"var unbound in calls", fun() -> + ?assertEqual({ok, <<"c">>}, render("coalesce([any_to_str(a),any_to_str(b),'c'])", #{})) + end}, + {"badarg", fun() -> + ?assertMatch( + {error, #{reason := coalesce_badarg}}, render("coalesce(a,b)", #{a => 1, b => 2}) + ) + end}, + {"badarg from return", fun() -> + ?assertMatch( + {error, #{reason := coalesce_badarg}}, render("coalesce(any_to_str(a))", #{a => 1}) + ) + end} ]. syntax_error_test_() -> diff --git a/changes/ce/feat-12750.en.md b/changes/ce/feat-12872.en.md similarity index 93% rename from changes/ce/feat-12750.en.md rename to changes/ce/feat-12872.en.md index d0a70e6fc..dfc799bb2 100644 --- a/changes/ce/feat-12750.en.md +++ b/changes/ce/feat-12872.en.md @@ -7,8 +7,8 @@ an MQTT connection. ### Initialization of `client_attrs` -- The `client_attrs` fields can be initially populated based on the configuration from one of the - following sources: +- The `client_attrs` fields can be initially populated from one of the + following `clientinfo` fields: - `cn`: The common name from the TLS client's certificate. - `dn`: The distinguished name from the TLS client's certificate, that is, the certificate "Subject". - `clientid`: The MQTT client ID provided by the client. diff --git a/rel/i18n/emqx_schema.hocon b/rel/i18n/emqx_schema.hocon index 90fbfeefc..0868bf93c 100644 --- a/rel/i18n/emqx_schema.hocon +++ b/rel/i18n/emqx_schema.hocon @@ -1575,48 +1575,37 @@ client_attrs_init { label: "Client Attributes Initialization" desc: """~ Specify how to initialize client attributes. - This config accepts one initialization rule, or a list of rules. - Client attributes can be initialized as `client_attrs.NAME`, - where `NAME` is the name of the attribute specified in the config `extract_as`. + Each client attribute can be initialized as `client_attrs.{NAME}`, + where `{NAME}` is the name of the attribute specified in the config field `set_as_attr`. The initialized client attribute will be stored in the `client_attrs` property with the specified name, and can be used as a placeholder in a template for authentication and authorization. - For example, use `${client_attrs.alias}` to render an HTTP POST body when `extract_as = alias`, + For example, use `${client_attrs.alias}` to render an HTTP POST body when `set_as_attr = alias`, or render listener config `moutpoint = devices/${client_attrs.alias}/` to initialize a per-client topic namespace.""" } -client_attrs_init_extract_from { - label: "Client Property to Extract Attribute" - desc: """~ - Specify from which client property the client attribute should be extracted. - - Supported values: - - `clientid`: Extract from the client ID. - - `username`: Extract from the username. - - `cn`: Extract from the Common Name (CN) field of the client certificate. - - `dn`: Extract from the Distinguished Name (DN) field of the client certificate. - - `user_property`: Extract from the user property sent in the MQTT v5 `CONNECT` packet. - In this case, `extract_regexp` is not applicable, and `extract_as` should be the user property key. - - NOTE: this extraction happens **after** `clientid` or `username` is initialized - from `peer_cert_as_clientid` or `peer_cert_as_username` config.""" -} - -client_attrs_init_extract_regexp { +client_attrs_init_expression { label: "Client Attribute Extraction Regular Expression" desc: """~ - The regular expression to extract a client attribute from the client property specified by `client_attrs_init.extract_from` config. - The expression should match the entire client property value, and capturing groups are concatenated to make the client attribute. - For example if the client attribute is the first part of the client ID delimited by a dash, the regular expression would be `^(.+?)-.*$`. - Note that failure to match the regular expression will result in the client attribute being absent but not an empty string. - Note also that currently only printable ASCII characters are allowed as input for the regular expression extraction.""" + A one line expression to evaluate a set of predefined string functions (like in the rule engine SQL statements). + The expression accepts direct variable reference, or one function call with nested calls for its arguments, + but it does not provide variable binding or user-defined functions and pre-bound variables. + For example, to extract the prefix of client ID delimited by a dot: `nth(1, tokens(clientid, '.'))`. + + The variables pre-bound variables are: + - `cn`: Client's TLS certificate common name. + - `dn`: Client's TLS certificate distinguished name (the subject). + - `clientid`: MQTT Client ID. + - `username`: MQTT Client's username. + - `user_property.{NAME}`: User properties in the CONNECT packet. + + You can read more about variform expressions in EMQX docs.""" } -client_attrs_init_extract_as { +client_attrs_init_set_as_attr { label: "Name The Extracted Attribute" desc: """~ - The name of the client attribute extracted from the client property specified by `client_attrs_init.extract_from` config. - The extracted attribute will be stored in the `client_attrs` property with this name. - In case `extract_from = user_property`, this should be the key of the user property.""" + The name of the client attribute extracted from the client data. + The extracted attribute will be stored in the `client_attrs` property with this name.""" } } diff --git a/scripts/spellcheck/dicts/emqx.txt b/scripts/spellcheck/dicts/emqx.txt index 7e8fed96f..c7c266434 100644 --- a/scripts/spellcheck/dicts/emqx.txt +++ b/scripts/spellcheck/dicts/emqx.txt @@ -259,6 +259,7 @@ uplink url utc util +variform ver vm vsn