From 9bf65a415b17f635f2a86487920bc5a0ac8df580 Mon Sep 17 00:00:00 2001 From: zmstone Date: Wed, 27 Mar 2024 17:34:18 +0100 Subject: [PATCH 1/8] feat(variform): add a variable transformer --- .gitignore | 2 ++ .../src/variform/emqx_variform_parser.yrl | 16 ++++++++++++ apps/emqx/src/variform/emqx_variform_scan.xrl | 26 +++++++++++++++++++ 3 files changed, 44 insertions(+) create mode 100644 apps/emqx/src/variform/emqx_variform_parser.yrl create mode 100644 apps/emqx/src/variform/emqx_variform_scan.xrl diff --git a/.gitignore b/.gitignore index 5e91d4bc5..a2c8b7e65 100644 --- a/.gitignore +++ b/.gitignore @@ -76,3 +76,5 @@ rebar-git-cache.tar .docker_image_tag .emqx_docker_image_tags .git/ +apps/emqx/src/emqx_variform_parser.erl +apps/emqx/src/variform/emqx_variform_scan.erl diff --git a/apps/emqx/src/variform/emqx_variform_parser.yrl b/apps/emqx/src/variform/emqx_variform_parser.yrl new file mode 100644 index 000000000..3f2b739ba --- /dev/null +++ b/apps/emqx/src/variform/emqx_variform_parser.yrl @@ -0,0 +1,16 @@ +Nonterminals expr call_or_var args. +Terminals identifier number string '(' ')' ','. + +Rootsymbol expr. + +%% Grammar Rules +expr -> call_or_var: '$1'. + +call_or_var -> identifier '(' args ')' : {call, element(3,'$1'), '$3'}. +call_or_var -> identifier : {var, element(3, '$1')}. +args -> expr : ['$1']. +args -> args ',' expr : '$1' ++ ['$3']. + +%% Handling direct values and variables within arguments +expr -> number : {num, element(3, '$1')}. +expr -> string : {str, element(3, '$1')}. diff --git a/apps/emqx/src/variform/emqx_variform_scan.xrl b/apps/emqx/src/variform/emqx_variform_scan.xrl new file mode 100644 index 000000000..53657bad4 --- /dev/null +++ b/apps/emqx/src/variform/emqx_variform_scan.xrl @@ -0,0 +1,26 @@ +Definitions. +%% Define regular expressions for tokens +IDENTIFIER = [a-zA-Z][a-zA-Z0-9_.]* +SQ_STRING = \'[^\']*\' +DQ_STRING = \"[^\"]*\" +NUMBER = [+-]?(\\d+\\.\\d+|[0-9]+) +LPAREN = \( +RPAREN = \) +COMMA = , +WHITESPACE = [\s\t\n]+ + +Rules. +%% Match function names, variable names (with ${}), strings, numbers, and structural characters +{WHITESPACE} : skip_token. +{IDENTIFIER} : {token, {identifier, TokenLine, TokenChars}}. +{SQ_STRING} : {token, {string, TokenLine, unquote(TokenChars, $')}}. +{DQ_STRING} : {token, {string, TokenLine, unquote(TokenChars, $")}}. +{NUMBER} : {token, {number, TokenLine, TokenChars}}. +{LPAREN} : {token, {'(', TokenLine}}. +{RPAREN} : {token, {')', TokenLine}}. +{COMMA} : {token, {',', TokenLine}}. + +Erlang code. + +unquote(String, Char) -> + string:trim(String, both, [Char]). From ad95473aaef0148d350996f2a2bfccc1b812f90f Mon Sep 17 00:00:00 2001 From: zmstone Date: Thu, 28 Mar 2024 13:59:31 +0100 Subject: [PATCH 2/8] refactor: move string functions to emqx_variform --- apps/emqx/src/variform/emqx_variform.erl | 39 ++ apps/emqx/src/variform/emqx_variform_str.erl | 353 ++++++++++++++++++ apps/emqx_rule_engine/src/emqx_rule_funcs.erl | 271 +++----------- 3 files changed, 434 insertions(+), 229 deletions(-) create mode 100644 apps/emqx/src/variform/emqx_variform.erl create mode 100644 apps/emqx/src/variform/emqx_variform_str.erl diff --git a/apps/emqx/src/variform/emqx_variform.erl b/apps/emqx/src/variform/emqx_variform.erl new file mode 100644 index 000000000..6f4fd6f47 --- /dev/null +++ b/apps/emqx/src/variform/emqx_variform.erl @@ -0,0 +1,39 @@ +%%-------------------------------------------------------------------- +%% Copyright (c) 2020-2024 EMQ Technologies Co., Ltd. All Rights Reserved. +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. +%%-------------------------------------------------------------------- + +%% Predefined functions for templating +-module(emqx_variform). + +-export([render/2]). + +render(Expression, Context) -> + case emqx_variform_scan:string(Expression) of + {ok, Tokens, _Line} -> + case emqx_variform_parser:parse(Tokens) of + {ok, Expr} -> + eval(Expr, Context); + {error, {_, emqx_variform_parser, Msg}} -> + %% syntax error + {error, lists:flatten(Msg)}; + {error, Reason} -> + {error, Reason} + end; + {error, Reason, _Line} -> + {error, Reason} + end. + +eval(Expr, _Context) -> + io:format(user, "~p~n", [Expr]). diff --git a/apps/emqx/src/variform/emqx_variform_str.erl b/apps/emqx/src/variform/emqx_variform_str.erl new file mode 100644 index 000000000..d94519f76 --- /dev/null +++ b/apps/emqx/src/variform/emqx_variform_str.erl @@ -0,0 +1,353 @@ +%%-------------------------------------------------------------------- +%% Copyright (c) 2020-2024 EMQ Technologies Co., Ltd. All Rights Reserved. +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. +%%-------------------------------------------------------------------- + +%% Predefined functions string templating +-module(emqx_variform_str). + +%% String Funcs +-export([ + coalesce/1, + coalesce/2, + lower/1, + ltrim/1, + ltrim/2, + reverse/1, + rtrim/1, + rtrim/2, + strlen/1, + substr/2, + substr/3, + trim/1, + trim/2, + upper/1, + split/2, + split/3, + concat/1, + concat/2, + tokens/2, + tokens/3, + sprintf_s/2, + pad/2, + pad/3, + pad/4, + replace/3, + replace/4, + regex_match/2, + regex_replace/3, + ascii/1, + find/2, + find/3, + join_to_string/1, + join_to_string/2, + unescape/1 +]). + +-define(IS_EMPTY(X), (X =:= <<>> orelse X =:= "" orelse X =:= undefined)). + +%%------------------------------------------------------------------------------ +%% String Funcs +%%------------------------------------------------------------------------------ + +%% @doc Return the first non-empty string +coalesce(A, B) when ?IS_EMPTY(A) andalso ?IS_EMPTY(B) -> + <<>>; +coalesce(A, _) when is_binary(A) -> + A; +coalesce(_, B) -> + B. + +%% @doc Return the first non-empty string +coalesce([]) -> + <<>>; +coalesce([H | T]) -> + coalesce(H, coalesce(T)). + +lower(S) when is_binary(S) -> + string:lowercase(S). + +ltrim(S) when is_binary(S) -> + string:trim(S, leading). + +ltrim(S, Chars) -> + string:trim(S, leading, Chars). + +reverse(S) when is_binary(S) -> + iolist_to_binary(string:reverse(S)). + +rtrim(S) when is_binary(S) -> + string:trim(S, trailing). + +rtrim(S, Chars) when is_binary(S) -> + string:trim(S, trailing, Chars). + +strlen(S) when is_binary(S) -> + string:length(S). + +substr(S, Start) when is_binary(S), is_integer(Start) -> + string:slice(S, Start). + +substr(S, Start, Length) when + is_binary(S), + is_integer(Start), + is_integer(Length) +-> + string:slice(S, Start, Length). + +trim(S) when is_binary(S) -> + string:trim(S). + +trim(S, Chars) when is_binary(S) -> + string:trim(S, both, Chars). + +upper(S) when is_binary(S) -> + string:uppercase(S). + +split(S, P) when is_binary(S), is_binary(P) -> + [R || R <- string:split(S, P, all), R =/= <<>> andalso R =/= ""]. + +split(S, P, <<"notrim">>) -> + string:split(S, P, all); +split(S, P, <<"leading_notrim">>) -> + string:split(S, P, leading); +split(S, P, <<"leading">>) when is_binary(S), is_binary(P) -> + [R || R <- string:split(S, P, leading), R =/= <<>> andalso R =/= ""]; +split(S, P, <<"trailing_notrim">>) -> + string:split(S, P, trailing); +split(S, P, <<"trailing">>) when is_binary(S), is_binary(P) -> + [R || R <- string:split(S, P, trailing), R =/= <<>> andalso R =/= ""]. + +tokens(S, Separators) -> + [list_to_binary(R) || R <- string:lexemes(binary_to_list(S), binary_to_list(Separators))]. + +tokens(S, Separators, <<"nocrlf">>) -> + [ + list_to_binary(R) + || R <- string:lexemes(binary_to_list(S), binary_to_list(Separators) ++ [$\r, $\n, [$\r, $\n]]) + ]. + +%% implicit convert args to strings, and then do concatenation +concat(S1, S2) -> + concat([S1, S2], unicode). + +concat(List) -> + unicode:characters_to_binary(lists:map(fun str/1, List), unicode). + +sprintf_s(Format, Args) when is_list(Args) -> + erlang:iolist_to_binary(io_lib:format(binary_to_list(Format), Args)). + +pad(S, Len) when is_binary(S), is_integer(Len) -> + iolist_to_binary(string:pad(S, Len, trailing)). + +pad(S, Len, <<"trailing">>) when is_binary(S), is_integer(Len) -> + iolist_to_binary(string:pad(S, Len, trailing)); +pad(S, Len, <<"both">>) when is_binary(S), is_integer(Len) -> + iolist_to_binary(string:pad(S, Len, both)); +pad(S, Len, <<"leading">>) when is_binary(S), is_integer(Len) -> + iolist_to_binary(string:pad(S, Len, leading)). + +pad(S, Len, <<"trailing">>, Char) when is_binary(S), is_integer(Len), is_binary(Char) -> + Chars = unicode:characters_to_list(Char, utf8), + iolist_to_binary(string:pad(S, Len, trailing, Chars)); +pad(S, Len, <<"both">>, Char) when is_binary(S), is_integer(Len), is_binary(Char) -> + Chars = unicode:characters_to_list(Char, utf8), + iolist_to_binary(string:pad(S, Len, both, Chars)); +pad(S, Len, <<"leading">>, Char) when is_binary(S), is_integer(Len), is_binary(Char) -> + Chars = unicode:characters_to_list(Char, utf8), + iolist_to_binary(string:pad(S, Len, leading, Chars)). + +replace(SrcStr, P, RepStr) when is_binary(SrcStr), is_binary(P), is_binary(RepStr) -> + iolist_to_binary(string:replace(SrcStr, P, RepStr, all)). + +replace(SrcStr, P, RepStr, <<"all">>) when is_binary(SrcStr), is_binary(P), is_binary(RepStr) -> + iolist_to_binary(string:replace(SrcStr, P, RepStr, all)); +replace(SrcStr, P, RepStr, <<"trailing">>) when + is_binary(SrcStr), is_binary(P), is_binary(RepStr) +-> + iolist_to_binary(string:replace(SrcStr, P, RepStr, trailing)); +replace(SrcStr, P, RepStr, <<"leading">>) when is_binary(SrcStr), is_binary(P), is_binary(RepStr) -> + iolist_to_binary(string:replace(SrcStr, P, RepStr, leading)). + +regex_match(Str, RE) -> + case re:run(Str, RE, [global, {capture, none}]) of + match -> true; + nomatch -> false + end. + +regex_replace(SrcStr, RE, RepStr) -> + re:replace(SrcStr, RE, RepStr, [global, {return, binary}]). + +ascii(Char) when is_binary(Char) -> + [FirstC | _] = binary_to_list(Char), + FirstC. + +find(S, P) when is_binary(S), is_binary(P) -> + find_s(S, P, leading). + +find(S, P, <<"trailing">>) when is_binary(S), is_binary(P) -> + find_s(S, P, trailing); +find(S, P, <<"leading">>) when is_binary(S), is_binary(P) -> + find_s(S, P, leading). + +find_s(S, P, Dir) -> + case string:find(S, P, Dir) of + nomatch -> <<"">>; + SubStr -> SubStr + end. + +join_to_string(List) when is_list(List) -> + join_to_string(<<", ">>, List). + +join_to_string(Sep, List) when is_list(List), is_binary(Sep) -> + iolist_to_binary(lists:join(Sep, [str(Item) || Item <- List])). + +unescape(Bin) when is_binary(Bin) -> + UnicodeList = unicode:characters_to_list(Bin, utf8), + UnescapedUnicodeList = unescape_string(UnicodeList), + UnescapedUTF8Bin = unicode:characters_to_binary(UnescapedUnicodeList, utf32, utf8), + case UnescapedUTF8Bin of + Out when is_binary(Out) -> + Out; + Error -> + throw({invalid_unicode_character, Error}) + end. + +unescape_string(Input) -> unescape_string(Input, []). + +unescape_string([], Acc) -> + lists:reverse(Acc); +unescape_string([$\\, $\\ | Rest], Acc) -> + unescape_string(Rest, [$\\ | Acc]); +unescape_string([$\\, $n | Rest], Acc) -> + unescape_string(Rest, [$\n | Acc]); +unescape_string([$\\, $t | Rest], Acc) -> + unescape_string(Rest, [$\t | Acc]); +unescape_string([$\\, $r | Rest], Acc) -> + unescape_string(Rest, [$\r | Acc]); +unescape_string([$\\, $b | Rest], Acc) -> + unescape_string(Rest, [$\b | Acc]); +unescape_string([$\\, $f | Rest], Acc) -> + unescape_string(Rest, [$\f | Acc]); +unescape_string([$\\, $v | Rest], Acc) -> + unescape_string(Rest, [$\v | Acc]); +unescape_string([$\\, $' | Rest], Acc) -> + unescape_string(Rest, [$\' | Acc]); +unescape_string([$\\, $" | Rest], Acc) -> + unescape_string(Rest, [$\" | Acc]); +unescape_string([$\\, $? | Rest], Acc) -> + unescape_string(Rest, [$\? | Acc]); +unescape_string([$\\, $a | Rest], Acc) -> + unescape_string(Rest, [$\a | Acc]); +%% Start of HEX escape code +unescape_string([$\\, $x | [$0 | _] = HexStringStart], Acc) -> + unescape_handle_hex_string(HexStringStart, Acc); +unescape_string([$\\, $x | [$1 | _] = HexStringStart], Acc) -> + unescape_handle_hex_string(HexStringStart, Acc); +unescape_string([$\\, $x | [$2 | _] = HexStringStart], Acc) -> + unescape_handle_hex_string(HexStringStart, Acc); +unescape_string([$\\, $x | [$3 | _] = HexStringStart], Acc) -> + unescape_handle_hex_string(HexStringStart, Acc); +unescape_string([$\\, $x | [$4 | _] = HexStringStart], Acc) -> + unescape_handle_hex_string(HexStringStart, Acc); +unescape_string([$\\, $x | [$5 | _] = HexStringStart], Acc) -> + unescape_handle_hex_string(HexStringStart, Acc); +unescape_string([$\\, $x | [$6 | _] = HexStringStart], Acc) -> + unescape_handle_hex_string(HexStringStart, Acc); +unescape_string([$\\, $x | [$7 | _] = HexStringStart], Acc) -> + unescape_handle_hex_string(HexStringStart, Acc); +unescape_string([$\\, $x | [$8 | _] = HexStringStart], Acc) -> + unescape_handle_hex_string(HexStringStart, Acc); +unescape_string([$\\, $x | [$9 | _] = HexStringStart], Acc) -> + unescape_handle_hex_string(HexStringStart, Acc); +unescape_string([$\\, $x | [$A | _] = HexStringStart], Acc) -> + unescape_handle_hex_string(HexStringStart, Acc); +unescape_string([$\\, $x | [$B | _] = HexStringStart], Acc) -> + unescape_handle_hex_string(HexStringStart, Acc); +unescape_string([$\\, $x | [$C | _] = HexStringStart], Acc) -> + unescape_handle_hex_string(HexStringStart, Acc); +unescape_string([$\\, $x | [$D | _] = HexStringStart], Acc) -> + unescape_handle_hex_string(HexStringStart, Acc); +unescape_string([$\\, $x | [$E | _] = HexStringStart], Acc) -> + unescape_handle_hex_string(HexStringStart, Acc); +unescape_string([$\\, $x | [$F | _] = HexStringStart], Acc) -> + unescape_handle_hex_string(HexStringStart, Acc); +unescape_string([$\\, $x | [$a | _] = HexStringStart], Acc) -> + unescape_handle_hex_string(HexStringStart, Acc); +unescape_string([$\\, $x | [$b | _] = HexStringStart], Acc) -> + unescape_handle_hex_string(HexStringStart, Acc); +unescape_string([$\\, $x | [$c | _] = HexStringStart], Acc) -> + unescape_handle_hex_string(HexStringStart, Acc); +unescape_string([$\\, $x | [$d | _] = HexStringStart], Acc) -> + unescape_handle_hex_string(HexStringStart, Acc); +unescape_string([$\\, $x | [$e | _] = HexStringStart], Acc) -> + unescape_handle_hex_string(HexStringStart, Acc); +unescape_string([$\\, $x | [$f | _] = HexStringStart], Acc) -> + unescape_handle_hex_string(HexStringStart, Acc); +%% We treat all other escape sequences as not valid input to leave room for +%% extending the function to support more escape codes +unescape_string([$\\, X | _Rest], _Acc) -> + erlang:throw({unrecognized_escape_sequence, list_to_binary([$\\, X])}); +unescape_string([First | Rest], Acc) -> + unescape_string(Rest, [First | Acc]). + +unescape_handle_hex_string(HexStringStart, Acc) -> + {RemainingString, Num} = parse_hex_string(HexStringStart), + unescape_string(RemainingString, [Num | Acc]). + +parse_hex_string(SeqStartingWithHexDigit) -> + parse_hex_string(SeqStartingWithHexDigit, []). + +parse_hex_string([], Acc) -> + ReversedAcc = lists:reverse(Acc), + {[], list_to_integer(ReversedAcc, 16)}; +parse_hex_string([First | Rest] = String, Acc) -> + case is_hex_digit(First) of + true -> + parse_hex_string(Rest, [First | Acc]); + false -> + ReversedAcc = lists:reverse(Acc), + {String, list_to_integer(ReversedAcc, 16)} + end. + +is_hex_digit($0) -> true; +is_hex_digit($1) -> true; +is_hex_digit($2) -> true; +is_hex_digit($3) -> true; +is_hex_digit($4) -> true; +is_hex_digit($5) -> true; +is_hex_digit($6) -> true; +is_hex_digit($7) -> true; +is_hex_digit($8) -> true; +is_hex_digit($9) -> true; +is_hex_digit($A) -> true; +is_hex_digit($B) -> true; +is_hex_digit($C) -> true; +is_hex_digit($D) -> true; +is_hex_digit($E) -> true; +is_hex_digit($F) -> true; +is_hex_digit($a) -> true; +is_hex_digit($b) -> true; +is_hex_digit($c) -> true; +is_hex_digit($d) -> true; +is_hex_digit($e) -> true; +is_hex_digit($f) -> true; +is_hex_digit(_) -> false. + +%%------------------------------------------------------------------------------ +%% Data Type Conversion Funcs +%%------------------------------------------------------------------------------ + +str(Data) -> + emqx_utils_conv:bin(Data). diff --git a/apps/emqx_rule_engine/src/emqx_rule_funcs.erl b/apps/emqx_rule_engine/src/emqx_rule_funcs.erl index ac7f66597..ea8e192d4 100644 --- a/apps/emqx_rule_engine/src/emqx_rule_funcs.erl +++ b/apps/emqx_rule_engine/src/emqx_rule_funcs.erl @@ -132,6 +132,8 @@ %% String Funcs -export([ + coalesce/1, + coalesce/2, lower/1, ltrim/1, reverse/1, @@ -768,130 +770,67 @@ is_array(_) -> false. %% String Funcs %%------------------------------------------------------------------------------ -lower(S) when is_binary(S) -> - string:lowercase(S). +coalesce(List) -> emqx_variform_str:coalesce(List). -ltrim(S) when is_binary(S) -> - string:trim(S, leading). +coalesce(A, B) -> emqx_variform_str:coalesce(A, B). -reverse(S) when is_binary(S) -> - iolist_to_binary(string:reverse(S)). +lower(S) -> emqx_variform_str:lower(S). -rtrim(S) when is_binary(S) -> - string:trim(S, trailing). +ltrim(S) -> emqx_variform_str:ltrim(S). -strlen(S) when is_binary(S) -> - string:length(S). +reverse(S) -> emqx_variform_str:reverse(S). -substr(S, Start) when is_binary(S), is_integer(Start) -> - string:slice(S, Start). +rtrim(S) -> emqx_variform_str:rtrim(S). -substr(S, Start, Length) when - is_binary(S), - is_integer(Start), - is_integer(Length) --> - string:slice(S, Start, Length). +strlen(S) -> emqx_variform_str:strlen(S). -trim(S) when is_binary(S) -> - string:trim(S). +substr(S, Start) -> emqx_variform_str:substr(S, Start). -upper(S) when is_binary(S) -> - string:uppercase(S). +substr(S, Start, Length) -> emqx_variform_str:substr(S, Start, Length). -split(S, P) when is_binary(S), is_binary(P) -> - [R || R <- string:split(S, P, all), R =/= <<>> andalso R =/= ""]. +trim(S) -> emqx_variform_str:trim(S). -split(S, P, <<"notrim">>) -> - string:split(S, P, all); -split(S, P, <<"leading_notrim">>) -> - string:split(S, P, leading); -split(S, P, <<"leading">>) when is_binary(S), is_binary(P) -> - [R || R <- string:split(S, P, leading), R =/= <<>> andalso R =/= ""]; -split(S, P, <<"trailing_notrim">>) -> - string:split(S, P, trailing); -split(S, P, <<"trailing">>) when is_binary(S), is_binary(P) -> - [R || R <- string:split(S, P, trailing), R =/= <<>> andalso R =/= ""]. +upper(S) -> emqx_variform_str:upper(S). -tokens(S, Separators) -> - [list_to_binary(R) || R <- string:lexemes(binary_to_list(S), binary_to_list(Separators))]. +split(S, P) -> emqx_variform_str:split(S, P). -tokens(S, Separators, <<"nocrlf">>) -> - [ - list_to_binary(R) - || R <- string:lexemes(binary_to_list(S), binary_to_list(Separators) ++ [$\r, $\n, [$\r, $\n]]) - ]. +split(S, P, Position) -> emqx_variform_str:split(S, P, Position). -%% implicit convert args to strings, and then do concatenation -concat(S1, S2) -> - unicode:characters_to_binary([str(S1), str(S2)], unicode). +tokens(S, Separators) -> emqx_variform_str:tokens(S, Separators). -sprintf_s(Format, Args) when is_list(Args) -> - erlang:iolist_to_binary(io_lib:format(binary_to_list(Format), Args)). +tokens(S, Separators, NoCRLF) -> emqx_variform_str:tokens(S, Separators, NoCRLF). -pad(S, Len) when is_binary(S), is_integer(Len) -> - iolist_to_binary(string:pad(S, Len, trailing)). +concat(S1, S2) -> emqx_variform_str:concat(S1, S2). -pad(S, Len, <<"trailing">>) when is_binary(S), is_integer(Len) -> - iolist_to_binary(string:pad(S, Len, trailing)); -pad(S, Len, <<"both">>) when is_binary(S), is_integer(Len) -> - iolist_to_binary(string:pad(S, Len, both)); -pad(S, Len, <<"leading">>) when is_binary(S), is_integer(Len) -> - iolist_to_binary(string:pad(S, Len, leading)). +concat(List) -> emqx_variform_str:concat(List). -pad(S, Len, <<"trailing">>, Char) when is_binary(S), is_integer(Len), is_binary(Char) -> - Chars = unicode:characters_to_list(Char, utf8), - iolist_to_binary(string:pad(S, Len, trailing, Chars)); -pad(S, Len, <<"both">>, Char) when is_binary(S), is_integer(Len), is_binary(Char) -> - Chars = unicode:characters_to_list(Char, utf8), - iolist_to_binary(string:pad(S, Len, both, Chars)); -pad(S, Len, <<"leading">>, Char) when is_binary(S), is_integer(Len), is_binary(Char) -> - Chars = unicode:characters_to_list(Char, utf8), - iolist_to_binary(string:pad(S, Len, leading, Chars)). +sprintf_s(Format, Args) -> emqx_variform_str:sprintf_s(Format, Args). -replace(SrcStr, P, RepStr) when is_binary(SrcStr), is_binary(P), is_binary(RepStr) -> - iolist_to_binary(string:replace(SrcStr, P, RepStr, all)). +pad(S, Len) -> emqx_variform_str:pad(S, Len). -replace(SrcStr, P, RepStr, <<"all">>) when is_binary(SrcStr), is_binary(P), is_binary(RepStr) -> - iolist_to_binary(string:replace(SrcStr, P, RepStr, all)); -replace(SrcStr, P, RepStr, <<"trailing">>) when - is_binary(SrcStr), is_binary(P), is_binary(RepStr) --> - iolist_to_binary(string:replace(SrcStr, P, RepStr, trailing)); -replace(SrcStr, P, RepStr, <<"leading">>) when is_binary(SrcStr), is_binary(P), is_binary(RepStr) -> - iolist_to_binary(string:replace(SrcStr, P, RepStr, leading)). +pad(S, Len, Position) -> emqx_variform_str:pad(S, Len, Position). -regex_match(Str, RE) -> - case re:run(Str, RE, [global, {capture, none}]) of - match -> true; - nomatch -> false - end. +pad(S, Len, Position, Char) -> emqx_variform_str:pad(S, Len, Position, Char). -regex_replace(SrcStr, RE, RepStr) -> - re:replace(SrcStr, RE, RepStr, [global, {return, binary}]). +replace(SrcStr, Pattern, RepStr) -> emqx_variform_str:replace(SrcStr, Pattern, RepStr). -ascii(Char) when is_binary(Char) -> - [FirstC | _] = binary_to_list(Char), - FirstC. +replace(SrcStr, Pattern, RepStr, Position) -> + emqx_variform_str:replace(SrcStr, Pattern, RepStr, Position). -find(S, P) when is_binary(S), is_binary(P) -> - find_s(S, P, leading). +regex_match(Str, RE) -> emqx_variform_str:regex_match(Str, RE). -find(S, P, <<"trailing">>) when is_binary(S), is_binary(P) -> - find_s(S, P, trailing); -find(S, P, <<"leading">>) when is_binary(S), is_binary(P) -> - find_s(S, P, leading). +regex_replace(SrcStr, RE, RepStr) -> emqx_variform_str:regex_replace(SrcStr, RE, RepStr). -find_s(S, P, Dir) -> - case string:find(S, P, Dir) of - nomatch -> <<"">>; - SubStr -> SubStr - end. +ascii(Char) -> emqx_variform_str:ascii(Char). + +find(S, P) -> emqx_variform_str:find(S, P). + +find(S, P, Position) -> emqx_variform_str:find(S, P, Position). + +join_to_string(Str) -> emqx_variform_str:join_to_string(Str). + +join_to_string(Sep, List) -> emqx_variform_str:join_to_string(Sep, List). -join_to_string(List) when is_list(List) -> - join_to_string(<<", ">>, List). -join_to_string(Sep, List) when is_list(List), is_binary(Sep) -> - iolist_to_binary(lists:join(Sep, [str(Item) || Item <- List])). join_to_sql_values_string(List) -> QuotedList = [ @@ -938,137 +877,7 @@ jq(FilterProgram, JSONBin) -> ]) ). -unescape(Bin) when is_binary(Bin) -> - UnicodeList = unicode:characters_to_list(Bin, utf8), - UnescapedUnicodeList = unescape_string(UnicodeList), - UnescapedUTF8Bin = unicode:characters_to_binary(UnescapedUnicodeList, utf32, utf8), - case UnescapedUTF8Bin of - Out when is_binary(Out) -> - Out; - Error -> - throw({invalid_unicode_character, Error}) - end. - -unescape_string(Input) -> unescape_string(Input, []). - -unescape_string([], Acc) -> - lists:reverse(Acc); -unescape_string([$\\, $\\ | Rest], Acc) -> - unescape_string(Rest, [$\\ | Acc]); -unescape_string([$\\, $n | Rest], Acc) -> - unescape_string(Rest, [$\n | Acc]); -unescape_string([$\\, $t | Rest], Acc) -> - unescape_string(Rest, [$\t | Acc]); -unescape_string([$\\, $r | Rest], Acc) -> - unescape_string(Rest, [$\r | Acc]); -unescape_string([$\\, $b | Rest], Acc) -> - unescape_string(Rest, [$\b | Acc]); -unescape_string([$\\, $f | Rest], Acc) -> - unescape_string(Rest, [$\f | Acc]); -unescape_string([$\\, $v | Rest], Acc) -> - unescape_string(Rest, [$\v | Acc]); -unescape_string([$\\, $' | Rest], Acc) -> - unescape_string(Rest, [$\' | Acc]); -unescape_string([$\\, $" | Rest], Acc) -> - unescape_string(Rest, [$\" | Acc]); -unescape_string([$\\, $? | Rest], Acc) -> - unescape_string(Rest, [$\? | Acc]); -unescape_string([$\\, $a | Rest], Acc) -> - unescape_string(Rest, [$\a | Acc]); -%% Start of HEX escape code -unescape_string([$\\, $x | [$0 | _] = HexStringStart], Acc) -> - unescape_handle_hex_string(HexStringStart, Acc); -unescape_string([$\\, $x | [$1 | _] = HexStringStart], Acc) -> - unescape_handle_hex_string(HexStringStart, Acc); -unescape_string([$\\, $x | [$2 | _] = HexStringStart], Acc) -> - unescape_handle_hex_string(HexStringStart, Acc); -unescape_string([$\\, $x | [$3 | _] = HexStringStart], Acc) -> - unescape_handle_hex_string(HexStringStart, Acc); -unescape_string([$\\, $x | [$4 | _] = HexStringStart], Acc) -> - unescape_handle_hex_string(HexStringStart, Acc); -unescape_string([$\\, $x | [$5 | _] = HexStringStart], Acc) -> - unescape_handle_hex_string(HexStringStart, Acc); -unescape_string([$\\, $x | [$6 | _] = HexStringStart], Acc) -> - unescape_handle_hex_string(HexStringStart, Acc); -unescape_string([$\\, $x | [$7 | _] = HexStringStart], Acc) -> - unescape_handle_hex_string(HexStringStart, Acc); -unescape_string([$\\, $x | [$8 | _] = HexStringStart], Acc) -> - unescape_handle_hex_string(HexStringStart, Acc); -unescape_string([$\\, $x | [$9 | _] = HexStringStart], Acc) -> - unescape_handle_hex_string(HexStringStart, Acc); -unescape_string([$\\, $x | [$A | _] = HexStringStart], Acc) -> - unescape_handle_hex_string(HexStringStart, Acc); -unescape_string([$\\, $x | [$B | _] = HexStringStart], Acc) -> - unescape_handle_hex_string(HexStringStart, Acc); -unescape_string([$\\, $x | [$C | _] = HexStringStart], Acc) -> - unescape_handle_hex_string(HexStringStart, Acc); -unescape_string([$\\, $x | [$D | _] = HexStringStart], Acc) -> - unescape_handle_hex_string(HexStringStart, Acc); -unescape_string([$\\, $x | [$E | _] = HexStringStart], Acc) -> - unescape_handle_hex_string(HexStringStart, Acc); -unescape_string([$\\, $x | [$F | _] = HexStringStart], Acc) -> - unescape_handle_hex_string(HexStringStart, Acc); -unescape_string([$\\, $x | [$a | _] = HexStringStart], Acc) -> - unescape_handle_hex_string(HexStringStart, Acc); -unescape_string([$\\, $x | [$b | _] = HexStringStart], Acc) -> - unescape_handle_hex_string(HexStringStart, Acc); -unescape_string([$\\, $x | [$c | _] = HexStringStart], Acc) -> - unescape_handle_hex_string(HexStringStart, Acc); -unescape_string([$\\, $x | [$d | _] = HexStringStart], Acc) -> - unescape_handle_hex_string(HexStringStart, Acc); -unescape_string([$\\, $x | [$e | _] = HexStringStart], Acc) -> - unescape_handle_hex_string(HexStringStart, Acc); -unescape_string([$\\, $x | [$f | _] = HexStringStart], Acc) -> - unescape_handle_hex_string(HexStringStart, Acc); -%% We treat all other escape sequences as not valid input to leave room for -%% extending the function to support more escape codes -unescape_string([$\\, X | _Rest], _Acc) -> - erlang:throw({unrecognized_escape_sequence, list_to_binary([$\\, X])}); -unescape_string([First | Rest], Acc) -> - unescape_string(Rest, [First | Acc]). - -unescape_handle_hex_string(HexStringStart, Acc) -> - {RemainingString, Num} = parse_hex_string(HexStringStart), - unescape_string(RemainingString, [Num | Acc]). - -parse_hex_string(SeqStartingWithHexDigit) -> - parse_hex_string(SeqStartingWithHexDigit, []). - -parse_hex_string([], Acc) -> - ReversedAcc = lists:reverse(Acc), - {[], list_to_integer(ReversedAcc, 16)}; -parse_hex_string([First | Rest] = String, Acc) -> - case is_hex_digit(First) of - true -> - parse_hex_string(Rest, [First | Acc]); - false -> - ReversedAcc = lists:reverse(Acc), - {String, list_to_integer(ReversedAcc, 16)} - end. - -is_hex_digit($0) -> true; -is_hex_digit($1) -> true; -is_hex_digit($2) -> true; -is_hex_digit($3) -> true; -is_hex_digit($4) -> true; -is_hex_digit($5) -> true; -is_hex_digit($6) -> true; -is_hex_digit($7) -> true; -is_hex_digit($8) -> true; -is_hex_digit($9) -> true; -is_hex_digit($A) -> true; -is_hex_digit($B) -> true; -is_hex_digit($C) -> true; -is_hex_digit($D) -> true; -is_hex_digit($E) -> true; -is_hex_digit($F) -> true; -is_hex_digit($a) -> true; -is_hex_digit($b) -> true; -is_hex_digit($c) -> true; -is_hex_digit($d) -> true; -is_hex_digit($e) -> true; -is_hex_digit($f) -> true; -is_hex_digit(_) -> false. +unescape(Str) -> emqx_variform_str:unescape(Str). %%------------------------------------------------------------------------------ %% Array Funcs @@ -1095,6 +904,10 @@ last(List) when is_list(List) -> contains(Elm, List) when is_list(List) -> lists:member(Elm, List). +%%------------------------------------------------------------------------------ +%% Map Funcs +%%------------------------------------------------------------------------------ + map_new() -> #{}. From 5f26e4ed5e66de9d2193329e5738bf230201bf5b Mon Sep 17 00:00:00 2001 From: zmstone Date: Thu, 28 Mar 2024 18:03:03 +0100 Subject: [PATCH 3/8] feat(variform): implement variform engine --- apps/emqx/src/variform/emqx_variform.erl | 158 +++++++++++++++++- apps/emqx/src/variform/emqx_variform_str.erl | 10 +- apps/emqx_rule_engine/src/emqx_rule_funcs.erl | 1 + 3 files changed, 158 insertions(+), 11 deletions(-) diff --git a/apps/emqx/src/variform/emqx_variform.erl b/apps/emqx/src/variform/emqx_variform.erl index 6f4fd6f47..51108885c 100644 --- a/apps/emqx/src/variform/emqx_variform.erl +++ b/apps/emqx/src/variform/emqx_variform.erl @@ -14,17 +14,46 @@ %% limitations under the License. %%-------------------------------------------------------------------- -%% Predefined functions for templating +%% @doc This module provides a single-line expression string rendering engine. +%% A predefined set of functions are allowed to be called in the expressions. +%% Only simple string expressions are supported, and no control flow is allowed. +%% However, with the help from the functions, some control flow can be achieved. +%% For example, the `coalesce` function can be used to provide a default value, +%% or used to choose the first non-empty value from a list of variables. -module(emqx_variform). --export([render/2]). +-export([inject_allowed_modules/1]). +-export([render/2, render/3]). -render(Expression, Context) -> +%% @doc Render a variform expression with bindings. +%% A variform expression is a template string which supports variable substitution +%% and function calls. +%% +%% The function calls are in the form of `module.function(arg1, arg2, ...)` where `module` +%% is optional, and if not provided, the function is assumed to be in the `emqx_variform_str` module. +%% Both module and function must be existing atoms, and only whitelisted functions are allowed. +%% +%% A function arg can be a constant string or a number. +%% Strings can be quoted with single quotes or double quotes, without support of escape characters. +%% If some special characters are needed, the function `unescape' can be used convert a escaped string +%% to raw bytes. +%% For example, to get the first line of a multi-line string, the expression can be +%% `coalesce(tokens(variable_name, unescape("\n")))'. +%% +%% The bindings is a map of variables to their values. +%% +%% For unresolved variables, empty string (but not "undefined") is used. +%% In case of runtime exeption, an error is returned. +-spec render(string(), map()) -> {ok, binary()} | {error, term()}. +render(Expression, Bindings) -> + render(Expression, Bindings, #{}). + +render(Expression, Bindings, Opts) -> case emqx_variform_scan:string(Expression) of {ok, Tokens, _Line} -> case emqx_variform_parser:parse(Tokens) of {ok, Expr} -> - eval(Expr, Context); + eval_as_string(Expr, Bindings, Opts); {error, {_, emqx_variform_parser, Msg}} -> %% syntax error {error, lists:flatten(Msg)}; @@ -35,5 +64,122 @@ render(Expression, Context) -> {error, Reason} end. -eval(Expr, _Context) -> - io:format(user, "~p~n", [Expr]). +eval_as_string(Expr, Bindings, _Opts) -> + try + {ok, iolist_to_binary(eval(Expr, Bindings))} + catch + throw:Reason -> + {error, Reason}; + C:E:S -> + {error, #{exception => C, reason => E, stack_trace => S}} + end. + +eval({str, Str}, _Bindings) -> + str(Str); +eval({num, Num}, _Bindings) -> + str(Num); +eval({call, FuncNameStr, Args}, Bindings) -> + {Mod, Fun} = resolve_func_name(FuncNameStr), + ok = assert_func_exported(Mod, Fun, length(Args)), + call(Mod, Fun, eval(Args, Bindings)); +eval({var, VarName}, Bindings) -> + resolve_var_value(VarName, Bindings); +eval([Arg | Args], Bindings) -> + [eval(Arg, Bindings) | eval(Args, Bindings)]; +eval([], _Bindings) -> + []. + +%% Some functions accept arbitrary number of arguments but implemented as /1. +call(emqx_variform_str, concat, Args) -> + str(emqx_variform_str:concat(Args)); +call(emqx_variform_str, coalesce, Args) -> + str(emqx_variform_str:coalesce(Args)); +call(Mod, Fun, Args) -> + str(erlang:apply(Mod, Fun, Args)). + +resolve_func_name(FuncNameStr) -> + case string:tokens(FuncNameStr, ".") of + [Mod0, Fun0] -> + Mod = + try + list_to_existing_atom(Mod0) + catch + error:badarg -> + throw(#{unknown_module => Mod0}) + end, + ok = assert_module_allowed(Mod), + Fun = + try + list_to_existing_atom(Fun0) + catch + error:badarg -> + throw(#{unknown_function => Fun0}) + end, + {Mod, Fun}; + [Fun] -> + FuncName = + try + list_to_existing_atom(Fun) + catch + error:badarg -> + throw(#{ + reason => "unknown_variform_function", + function => Fun + }) + end, + {emqx_variform_str, FuncName} + end. + +resolve_var_value(VarName, Bindings) -> + case emqx_template:lookup_var(split(VarName), Bindings) of + {ok, Value} -> + str(Value); + {error, _Reason} -> + <<>> + end. + +assert_func_exported(emqx_variform_str, concat, _Arity) -> + ok; +assert_func_exported(emqx_variform_str, coalesce, _Arity) -> + ok; +assert_func_exported(Mod, Fun, Arity) -> + _ = Mod:module_info(md5), + case erlang:function_exported(Mod, Fun, Arity) of + true -> + ok; + false -> + throw(#{ + reason => "unknown_variform_function", + module => Mod, + function => Fun, + arity => Arity + }) + end. + +assert_module_allowed(emqx_variform_str) -> + ok; +assert_module_allowed(Mod) -> + Allowed = get_allowed_modules(), + case lists:member(Mod, Allowed) of + true -> + ok; + false -> + throw(#{ + reason => "unallowed_veriform_module", + module => Mod + }) + end. + +inject_allowed_modules(Modules) -> + Allowed0 = get_allowed_modules(), + Allowed = lists:usort(Allowed0 ++ Modules), + persistent_term:put({emqx_variform, allowed_modules}, Allowed). + +get_allowed_modules() -> + persistent_term:get({emqx_variform, allowed_modules}, []). + +str(Value) -> + emqx_utils_conv:bin(Value). + +split(VarName) -> + lists:map(fun erlang:iolist_to_binary/1, string:tokens(VarName, ".")). diff --git a/apps/emqx/src/variform/emqx_variform_str.erl b/apps/emqx/src/variform/emqx_variform_str.erl index d94519f76..7b8e2e742 100644 --- a/apps/emqx/src/variform/emqx_variform_str.erl +++ b/apps/emqx/src/variform/emqx_variform_str.erl @@ -64,10 +64,10 @@ %% @doc Return the first non-empty string coalesce(A, B) when ?IS_EMPTY(A) andalso ?IS_EMPTY(B) -> <<>>; -coalesce(A, _) when is_binary(A) -> - A; -coalesce(_, B) -> - B. +coalesce(A, B) when ?IS_EMPTY(A) -> + B; +coalesce(A, _B) -> + A. %% @doc Return the first non-empty string coalesce([]) -> @@ -140,7 +140,7 @@ tokens(S, Separators, <<"nocrlf">>) -> %% implicit convert args to strings, and then do concatenation concat(S1, S2) -> - concat([S1, S2], unicode). + concat([S1, S2]). concat(List) -> unicode:characters_to_binary(lists:map(fun str/1, List), unicode). diff --git a/apps/emqx_rule_engine/src/emqx_rule_funcs.erl b/apps/emqx_rule_engine/src/emqx_rule_funcs.erl index ea8e192d4..6a719c3f1 100644 --- a/apps/emqx_rule_engine/src/emqx_rule_funcs.erl +++ b/apps/emqx_rule_engine/src/emqx_rule_funcs.erl @@ -145,6 +145,7 @@ upper/1, split/2, split/3, + concat/1, concat/2, tokens/2, tokens/3, From bfca3ebc71a81583cc1cf7df795e284ff66dfc79 Mon Sep 17 00:00:00 2001 From: zmstone Date: Thu, 28 Mar 2024 19:30:23 +0100 Subject: [PATCH 4/8] feat(variform): support array syntax '[' and ']' --- apps/emqx/src/variform/emqx_variform.erl | 2 + .../src/variform/emqx_variform_parser.yrl | 42 ++++++++++++++----- apps/emqx/src/variform/emqx_variform_scan.xrl | 4 ++ 3 files changed, 38 insertions(+), 10 deletions(-) diff --git a/apps/emqx/src/variform/emqx_variform.erl b/apps/emqx/src/variform/emqx_variform.erl index 51108885c..95ea1e1ce 100644 --- a/apps/emqx/src/variform/emqx_variform.erl +++ b/apps/emqx/src/variform/emqx_variform.erl @@ -78,6 +78,8 @@ eval({str, Str}, _Bindings) -> str(Str); eval({num, Num}, _Bindings) -> str(Num); +eval({array, Args}, Bindings) -> + eval(Args, Bindings); eval({call, FuncNameStr, Args}, Bindings) -> {Mod, Fun} = resolve_func_name(FuncNameStr), ok = assert_func_exported(Mod, Fun, length(Args)), diff --git a/apps/emqx/src/variform/emqx_variform_parser.yrl b/apps/emqx/src/variform/emqx_variform_parser.yrl index 3f2b739ba..8a8a03a4d 100644 --- a/apps/emqx/src/variform/emqx_variform_parser.yrl +++ b/apps/emqx/src/variform/emqx_variform_parser.yrl @@ -1,16 +1,38 @@ -Nonterminals expr call_or_var args. -Terminals identifier number string '(' ')' ','. +Nonterminals + expr + call_or_var + array + args + arg. -Rootsymbol expr. +Terminals + identifier + number + string + '(' ')' + ',' '[' ']'. + +Rootsymbol + expr. %% Grammar Rules -expr -> call_or_var: '$1'. -call_or_var -> identifier '(' args ')' : {call, element(3,'$1'), '$3'}. +%% Root expression: function call or variable +expr -> call_or_var : '$1'. + +%% Function call or variable +call_or_var -> identifier '(' args ')' : {call, element(3, '$1'), '$3'}. call_or_var -> identifier : {var, element(3, '$1')}. -args -> expr : ['$1']. -args -> args ',' expr : '$1' ++ ['$3']. -%% Handling direct values and variables within arguments -expr -> number : {num, element(3, '$1')}. -expr -> string : {str, element(3, '$1')}. +%% Array is like a arg list, but with square brackets +array -> '[' args ']' : {array, '$2'}. + +%% Argument handling +args -> arg : ['$1']. +args -> args ',' arg : '$1' ++ ['$3']. + +%% Arguments can be expressions, arrays, numbers, or strings +arg -> expr : '$1'. +arg -> array : '$1'. +arg -> number : {num, element(3, '$1')}. +arg -> string : {str, element(3, '$1')}. diff --git a/apps/emqx/src/variform/emqx_variform_scan.xrl b/apps/emqx/src/variform/emqx_variform_scan.xrl index 53657bad4..29a45ef92 100644 --- a/apps/emqx/src/variform/emqx_variform_scan.xrl +++ b/apps/emqx/src/variform/emqx_variform_scan.xrl @@ -6,6 +6,8 @@ DQ_STRING = \"[^\"]*\" NUMBER = [+-]?(\\d+\\.\\d+|[0-9]+) LPAREN = \( RPAREN = \) +LBRACKET = \[ +RBRACKET = \] COMMA = , WHITESPACE = [\s\t\n]+ @@ -18,6 +20,8 @@ Rules. {NUMBER} : {token, {number, TokenLine, TokenChars}}. {LPAREN} : {token, {'(', TokenLine}}. {RPAREN} : {token, {')', TokenLine}}. +{LBRACKET} : {token, {'[', TokenLine}}. +{RBRACKET} : {token, {']', TokenLine}}. {COMMA} : {token, {',', TokenLine}}. Erlang code. From 0e79b543cf51fcc83da27e2dd0ff4fcb7e69def4 Mon Sep 17 00:00:00 2001 From: zmstone Date: Thu, 4 Apr 2024 11:10:56 +0200 Subject: [PATCH 5/8] refactor: move variform to emqx_utils --- .gitignore | 4 ++-- apps/{emqx/src/variform => emqx_utils/src}/emqx_variform.erl | 0 .../src/variform => emqx_utils/src}/emqx_variform_parser.yrl | 0 .../src/variform => emqx_utils/src}/emqx_variform_scan.xrl | 0 .../src/variform => emqx_utils/src}/emqx_variform_str.erl | 0 5 files changed, 2 insertions(+), 2 deletions(-) rename apps/{emqx/src/variform => emqx_utils/src}/emqx_variform.erl (100%) rename apps/{emqx/src/variform => emqx_utils/src}/emqx_variform_parser.yrl (100%) rename apps/{emqx/src/variform => emqx_utils/src}/emqx_variform_scan.xrl (100%) rename apps/{emqx/src/variform => emqx_utils/src}/emqx_variform_str.erl (100%) diff --git a/.gitignore b/.gitignore index a2c8b7e65..d5338d5c4 100644 --- a/.gitignore +++ b/.gitignore @@ -76,5 +76,5 @@ rebar-git-cache.tar .docker_image_tag .emqx_docker_image_tags .git/ -apps/emqx/src/emqx_variform_parser.erl -apps/emqx/src/variform/emqx_variform_scan.erl +apps/emqx_utils/src/emqx_variform_parser.erl +apps/emqx_utils/src/emqx_variform_scan.erl diff --git a/apps/emqx/src/variform/emqx_variform.erl b/apps/emqx_utils/src/emqx_variform.erl similarity index 100% rename from apps/emqx/src/variform/emqx_variform.erl rename to apps/emqx_utils/src/emqx_variform.erl diff --git a/apps/emqx/src/variform/emqx_variform_parser.yrl b/apps/emqx_utils/src/emqx_variform_parser.yrl similarity index 100% rename from apps/emqx/src/variform/emqx_variform_parser.yrl rename to apps/emqx_utils/src/emqx_variform_parser.yrl diff --git a/apps/emqx/src/variform/emqx_variform_scan.xrl b/apps/emqx_utils/src/emqx_variform_scan.xrl similarity index 100% rename from apps/emqx/src/variform/emqx_variform_scan.xrl rename to apps/emqx_utils/src/emqx_variform_scan.xrl diff --git a/apps/emqx/src/variform/emqx_variform_str.erl b/apps/emqx_utils/src/emqx_variform_str.erl similarity index 100% rename from apps/emqx/src/variform/emqx_variform_str.erl rename to apps/emqx_utils/src/emqx_variform_str.erl From bf12efac6dba235a46e6446acbaef357144f120e Mon Sep 17 00:00:00 2001 From: zmstone Date: Mon, 8 Apr 2024 20:18:01 +0200 Subject: [PATCH 6/8] fix(variform): add basic tests --- apps/emqx_utils/src/emqx_variform.erl | 49 +++++-- apps/emqx_utils/src/emqx_variform_str.erl | 17 ++- apps/emqx_utils/test/emqx_variform_tests.erl | 129 +++++++++++++++++++ 3 files changed, 183 insertions(+), 12 deletions(-) create mode 100644 apps/emqx_utils/test/emqx_variform_tests.erl diff --git a/apps/emqx_utils/src/emqx_variform.erl b/apps/emqx_utils/src/emqx_variform.erl index 95ea1e1ce..25825ea9f 100644 --- a/apps/emqx_utils/src/emqx_variform.erl +++ b/apps/emqx_utils/src/emqx_variform.erl @@ -1,5 +1,5 @@ %%-------------------------------------------------------------------- -%% Copyright (c) 2020-2024 EMQ Technologies Co., Ltd. All Rights Reserved. +%% Copyright (c) 2024 EMQ Technologies Co., Ltd. All Rights Reserved. %% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. @@ -22,7 +22,12 @@ %% or used to choose the first non-empty value from a list of variables. -module(emqx_variform). --export([inject_allowed_modules/1]). +-export([ + inject_allowed_module/1, + inject_allowed_modules/1, + erase_allowed_module/1, + erase_allowed_modules/1 +]). -export([render/2, render/3]). %% @doc Render a variform expression with bindings. @@ -48,6 +53,8 @@ render(Expression, Bindings) -> render(Expression, Bindings, #{}). +render(Expression, Bindings, Opts) when is_binary(Expression) -> + render(unicode:characters_to_list(Expression), Bindings, Opts); render(Expression, Bindings, Opts) -> case emqx_variform_scan:string(Expression) of {ok, Tokens, _Line} -> @@ -66,7 +73,7 @@ render(Expression, Bindings, Opts) -> eval_as_string(Expr, Bindings, _Opts) -> try - {ok, iolist_to_binary(eval(Expr, Bindings))} + {ok, str(eval(Expr, Bindings))} catch throw:Reason -> {error, Reason}; @@ -97,7 +104,7 @@ call(emqx_variform_str, concat, Args) -> call(emqx_variform_str, coalesce, Args) -> str(emqx_variform_str:coalesce(Args)); call(Mod, Fun, Args) -> - str(erlang:apply(Mod, Fun, Args)). + erlang:apply(Mod, Fun, Args). resolve_func_name(FuncNameStr) -> case string:tokens(FuncNameStr, ".") of @@ -107,7 +114,10 @@ resolve_func_name(FuncNameStr) -> list_to_existing_atom(Mod0) catch error:badarg -> - throw(#{unknown_module => Mod0}) + throw(#{ + reason => unknown_variform_module, + module => Mod0 + }) end, ok = assert_module_allowed(Mod), Fun = @@ -115,7 +125,10 @@ resolve_func_name(FuncNameStr) -> list_to_existing_atom(Fun0) catch error:badarg -> - throw(#{unknown_function => Fun0}) + throw(#{ + reason => unknown_variform_function, + function => Fun0 + }) end, {Mod, Fun}; [Fun] -> @@ -125,11 +138,13 @@ resolve_func_name(FuncNameStr) -> catch error:badarg -> throw(#{ - reason => "unknown_variform_function", + reason => unknown_variform_function, function => Fun }) end, - {emqx_variform_str, FuncName} + {emqx_variform_str, FuncName}; + _ -> + throw(#{reason => invalid_function_reference, function => FuncNameStr}) end. resolve_var_value(VarName, Bindings) -> @@ -145,13 +160,14 @@ assert_func_exported(emqx_variform_str, concat, _Arity) -> assert_func_exported(emqx_variform_str, coalesce, _Arity) -> ok; assert_func_exported(Mod, Fun, Arity) -> + %% ensure beam loaded _ = Mod:module_info(md5), case erlang:function_exported(Mod, Fun, Arity) of true -> ok; false -> throw(#{ - reason => "unknown_variform_function", + reason => unknown_variform_function, module => Mod, function => Fun, arity => Arity @@ -167,16 +183,27 @@ assert_module_allowed(Mod) -> ok; false -> throw(#{ - reason => "unallowed_veriform_module", + reason => unallowed_veriform_module, module => Mod }) end. -inject_allowed_modules(Modules) -> +inject_allowed_module(Module) when is_atom(Module) -> + inject_allowed_modules([Module]). + +inject_allowed_modules(Modules) when is_list(Modules) -> Allowed0 = get_allowed_modules(), Allowed = lists:usort(Allowed0 ++ Modules), persistent_term:put({emqx_variform, allowed_modules}, Allowed). +erase_allowed_module(Module) when is_atom(Module) -> + erase_allowed_modules([Module]). + +erase_allowed_modules(Modules) when is_list(Modules) -> + Allowed0 = get_allowed_modules(), + Allowed = Allowed0 -- Modules, + persistent_term:put({emqx_variform, allowed_modules}, Allowed). + get_allowed_modules() -> persistent_term:get({emqx_variform, allowed_modules}, []). diff --git a/apps/emqx_utils/src/emqx_variform_str.erl b/apps/emqx_utils/src/emqx_variform_str.erl index 7b8e2e742..a53e1e216 100644 --- a/apps/emqx_utils/src/emqx_variform_str.erl +++ b/apps/emqx_utils/src/emqx_variform_str.erl @@ -52,7 +52,8 @@ find/3, join_to_string/1, join_to_string/2, - unescape/1 + unescape/1, + nth/2 ]). -define(IS_EMPTY(X), (X =:= <<>> orelse X =:= "" orelse X =:= undefined)). @@ -224,6 +225,20 @@ unescape(Bin) when is_binary(Bin) -> throw({invalid_unicode_character, Error}) end. +nth(N, List) when (is_list(N) orelse is_binary(N)) andalso is_list(List) -> + try binary_to_integer(iolist_to_binary(N)) of + N1 -> + nth(N1, List) + catch + _:_ -> + throw(#{reason => invalid_argument, func => nth, index => N}) + end; +nth(N, List) when is_integer(N) andalso is_list(List) -> + case length(List) of + L when L < N -> <<>>; + _ -> lists:nth(N, List) + end. + unescape_string(Input) -> unescape_string(Input, []). unescape_string([], Acc) -> diff --git a/apps/emqx_utils/test/emqx_variform_tests.erl b/apps/emqx_utils/test/emqx_variform_tests.erl new file mode 100644 index 000000000..da26a383d --- /dev/null +++ b/apps/emqx_utils/test/emqx_variform_tests.erl @@ -0,0 +1,129 @@ +%%-------------------------------------------------------------------- +%% Copyright (c) 2024 EMQ Technologies Co., Ltd. All Rights Reserved. +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. +%%-------------------------------------------------------------------- + +-module(emqx_variform_tests). + +-compile(export_all). +-compile(nowarn_export_all). + +-include_lib("eunit/include/eunit.hrl"). + +-define(SYNTAX_ERROR, {error, "syntax error before:" ++ _}). + +redner_test_() -> + [ + {"direct var reference", fun() -> ?assertEqual({ok, <<"1">>}, render("a", #{a => 1})) end}, + {"concat strings", fun() -> + ?assertEqual({ok, <<"a,b">>}, render("concat('a',',','b')", #{})) + end}, + {"concat empty string", fun() -> ?assertEqual({ok, <<"">>}, render("concat('')", #{})) end}, + {"tokens 1st", fun() -> + ?assertEqual({ok, <<"a">>}, render("nth(1,tokens(var, ','))", #{var => <<"a,b">>})) + end}, + {"unknown var as empty str", fun() -> + ?assertEqual({ok, <<>>}, render("var", #{})) + end}, + {"out of range nth index", fun() -> + ?assertEqual({ok, <<>>}, render("nth(2, tokens(var, ','))", #{var => <<"a">>})) + end}, + {"not a index number for nth", fun() -> + ?assertMatch( + {error, #{reason := invalid_argument, func := nth, index := <<"notnum">>}}, + render("nth('notnum', tokens(var, ','))", #{var => <<"a">>}) + ) + end} + ]. + +unknown_func_test_() -> + [ + {"unknown function", fun() -> + ?assertMatch( + {error, #{reason := unknown_variform_function}}, + render("nonexistingatom__(a)", #{}) + ) + end}, + {"unknown module", fun() -> + ?assertMatch( + {error, #{reason := unknown_variform_module}}, + render("nonexistingatom__.nonexistingatom__(a)", #{}) + ) + end}, + {"unknown function in a known module", fun() -> + ?assertMatch( + {error, #{reason := unknown_variform_function}}, + render("emqx_variform_str.nonexistingatom__(a)", #{}) + ) + end}, + {"invalid func reference", fun() -> + ?assertMatch( + {error, #{reason := invalid_function_reference, function := "a.b.c"}}, + render("a.b.c(var)", #{}) + ) + end} + ]. + +concat(L) -> iolist_to_binary(L). + +inject_allowed_module_test() -> + try + emqx_variform:inject_allowed_module(?MODULE), + ?assertEqual({ok, <<"ab">>}, render(atom_to_list(?MODULE) ++ ".concat(['a','b'])", #{})), + ?assertMatch( + {error, #{ + reason := unknown_variform_function, + module := ?MODULE, + function := concat, + arity := 2 + }}, + render(atom_to_list(?MODULE) ++ ".concat('a','b')", #{}) + ), + ?assertMatch( + {error, #{reason := unallowed_veriform_module, module := emqx}}, + render("emqx.concat('a','b')", #{}) + ) + after + emqx_variform:erase_allowed_module(?MODULE) + end. + +coalesce_test_() -> + [ + {"coalesce first", fun() -> + ?assertEqual({ok, <<"a">>}, render("coalesce('a','b')", #{})) + end}, + {"coalesce second", fun() -> + ?assertEqual({ok, <<"b">>}, render("coalesce('', 'b')", #{})) + end}, + {"coalesce first var", fun() -> + ?assertEqual({ok, <<"a">>}, render("coalesce(a,b)", #{a => <<"a">>, b => <<"b">>})) + end}, + {"coalesce second var", fun() -> + ?assertEqual({ok, <<"b">>}, render("coalesce(a,b)", #{b => <<"b">>})) + end}, + {"coalesce empty", fun() -> ?assertEqual({ok, <<>>}, render("coalesce(a,b)", #{})) end} + ]. + +syntax_error_test_() -> + [ + {"empty expression", fun() -> ?assertMatch(?SYNTAX_ERROR, render("", #{})) end}, + {"const string single quote", fun() -> ?assertMatch(?SYNTAX_ERROR, render("'a'", #{})) end}, + {"const string double quote", fun() -> + ?assertMatch(?SYNTAX_ERROR, render(<<"\"a\"">>, #{})) + end}, + {"no arity", fun() -> ?assertMatch(?SYNTAX_ERROR, render("concat()", #{})) end} + ]. + +render(Expression, Bindings) -> + emqx_variform:render(Expression, Bindings). From 41677eb7855015c1e3a68c7a5726fd0d8eae0eb6 Mon Sep 17 00:00:00 2001 From: zmstone Date: Mon, 8 Apr 2024 21:25:58 +0200 Subject: [PATCH 7/8] refactor: make elvis happy --- apps/emqx_utils/src/emqx_variform.erl | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/apps/emqx_utils/src/emqx_variform.erl b/apps/emqx_utils/src/emqx_variform.erl index 25825ea9f..834d22750 100644 --- a/apps/emqx_utils/src/emqx_variform.erl +++ b/apps/emqx_utils/src/emqx_variform.erl @@ -160,8 +160,7 @@ assert_func_exported(emqx_variform_str, concat, _Arity) -> assert_func_exported(emqx_variform_str, coalesce, _Arity) -> ok; assert_func_exported(Mod, Fun, Arity) -> - %% ensure beam loaded - _ = Mod:module_info(md5), + ok = try_load(Mod), case erlang:function_exported(Mod, Fun, Arity) of true -> ok; @@ -174,6 +173,18 @@ assert_func_exported(Mod, Fun, Arity) -> }) end. +%% best effort to load the module because it might not be loaded as a part of the release modules +%% e.g. from a plugin. +%% do not call code server, just try to call a function in the module. +try_load(Mod) -> + try + _ = erlang:apply(Mod, module_info, [md5]), + ok + catch + _:_ -> + ok + end. + assert_module_allowed(emqx_variform_str) -> ok; assert_module_allowed(Mod) -> From 53b78086ed810453900e5e740944ed6c42a4b91d Mon Sep 17 00:00:00 2001 From: zmstone Date: Tue, 9 Apr 2024 09:34:05 +0200 Subject: [PATCH 8/8] chore: fix xref checks --- apps/emqx_utils/src/emqx_variform_parser.yrl | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/apps/emqx_utils/src/emqx_variform_parser.yrl b/apps/emqx_utils/src/emqx_variform_parser.yrl index 8a8a03a4d..508ef46d0 100644 --- a/apps/emqx_utils/src/emqx_variform_parser.yrl +++ b/apps/emqx_utils/src/emqx_variform_parser.yrl @@ -36,3 +36,8 @@ arg -> expr : '$1'. arg -> array : '$1'. arg -> number : {num, element(3, '$1')}. arg -> string : {str, element(3, '$1')}. + +Erlang code. + +%% mute xref warning +-export([return_error/2]).