Merge pull request #12851 from zmstone/0327-feat-add-emqx_variform

emqx_variform for string substitution and transform
This commit is contained in:
Zaiming (Stone) Shi 2024-04-10 10:18:40 +02:00 committed by GitHub
commit 2fea651d1d
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 840 additions and 229 deletions

2
.gitignore vendored
View File

@ -76,3 +76,5 @@ rebar-git-cache.tar
.docker_image_tag .docker_image_tag
.emqx_docker_image_tags .emqx_docker_image_tags
.git/ .git/
apps/emqx_utils/src/emqx_variform_parser.erl
apps/emqx_utils/src/emqx_variform_scan.erl

View File

@ -132,6 +132,8 @@
%% String Funcs %% String Funcs
-export([ -export([
coalesce/1,
coalesce/2,
lower/1, lower/1,
ltrim/1, ltrim/1,
reverse/1, reverse/1,
@ -143,6 +145,7 @@
upper/1, upper/1,
split/2, split/2,
split/3, split/3,
concat/1,
concat/2, concat/2,
tokens/2, tokens/2,
tokens/3, tokens/3,
@ -768,130 +771,67 @@ is_array(_) -> false.
%% String Funcs %% String Funcs
%%------------------------------------------------------------------------------ %%------------------------------------------------------------------------------
lower(S) when is_binary(S) -> coalesce(List) -> emqx_variform_str:coalesce(List).
string:lowercase(S).
ltrim(S) when is_binary(S) -> coalesce(A, B) -> emqx_variform_str:coalesce(A, B).
string:trim(S, leading).
reverse(S) when is_binary(S) -> lower(S) -> emqx_variform_str:lower(S).
iolist_to_binary(string:reverse(S)).
rtrim(S) when is_binary(S) -> ltrim(S) -> emqx_variform_str:ltrim(S).
string:trim(S, trailing).
strlen(S) when is_binary(S) -> reverse(S) -> emqx_variform_str:reverse(S).
string:length(S).
substr(S, Start) when is_binary(S), is_integer(Start) -> rtrim(S) -> emqx_variform_str:rtrim(S).
string:slice(S, Start).
substr(S, Start, Length) when strlen(S) -> emqx_variform_str:strlen(S).
is_binary(S),
is_integer(Start),
is_integer(Length)
->
string:slice(S, Start, Length).
trim(S) when is_binary(S) -> substr(S, Start) -> emqx_variform_str:substr(S, Start).
string:trim(S).
upper(S) when is_binary(S) -> substr(S, Start, Length) -> emqx_variform_str:substr(S, Start, Length).
string:uppercase(S).
split(S, P) when is_binary(S), is_binary(P) -> trim(S) -> emqx_variform_str:trim(S).
[R || R <- string:split(S, P, all), R =/= <<>> andalso R =/= ""].
split(S, P, <<"notrim">>) -> upper(S) -> emqx_variform_str:upper(S).
string:split(S, P, all);
split(S, P, <<"leading_notrim">>) ->
string:split(S, P, leading);
split(S, P, <<"leading">>) when is_binary(S), is_binary(P) ->
[R || R <- string:split(S, P, leading), R =/= <<>> andalso R =/= ""];
split(S, P, <<"trailing_notrim">>) ->
string:split(S, P, trailing);
split(S, P, <<"trailing">>) when is_binary(S), is_binary(P) ->
[R || R <- string:split(S, P, trailing), R =/= <<>> andalso R =/= ""].
tokens(S, Separators) -> split(S, P) -> emqx_variform_str:split(S, P).
[list_to_binary(R) || R <- string:lexemes(binary_to_list(S), binary_to_list(Separators))].
tokens(S, Separators, <<"nocrlf">>) -> split(S, P, Position) -> emqx_variform_str:split(S, P, Position).
[
list_to_binary(R)
|| R <- string:lexemes(binary_to_list(S), binary_to_list(Separators) ++ [$\r, $\n, [$\r, $\n]])
].
%% implicit convert args to strings, and then do concatenation tokens(S, Separators) -> emqx_variform_str:tokens(S, Separators).
concat(S1, S2) ->
unicode:characters_to_binary([str(S1), str(S2)], unicode).
sprintf_s(Format, Args) when is_list(Args) -> tokens(S, Separators, NoCRLF) -> emqx_variform_str:tokens(S, Separators, NoCRLF).
erlang:iolist_to_binary(io_lib:format(binary_to_list(Format), Args)).
pad(S, Len) when is_binary(S), is_integer(Len) -> concat(S1, S2) -> emqx_variform_str:concat(S1, S2).
iolist_to_binary(string:pad(S, Len, trailing)).
pad(S, Len, <<"trailing">>) when is_binary(S), is_integer(Len) -> concat(List) -> emqx_variform_str:concat(List).
iolist_to_binary(string:pad(S, Len, trailing));
pad(S, Len, <<"both">>) when is_binary(S), is_integer(Len) ->
iolist_to_binary(string:pad(S, Len, both));
pad(S, Len, <<"leading">>) when is_binary(S), is_integer(Len) ->
iolist_to_binary(string:pad(S, Len, leading)).
pad(S, Len, <<"trailing">>, Char) when is_binary(S), is_integer(Len), is_binary(Char) -> sprintf_s(Format, Args) -> emqx_variform_str:sprintf_s(Format, Args).
Chars = unicode:characters_to_list(Char, utf8),
iolist_to_binary(string:pad(S, Len, trailing, Chars));
pad(S, Len, <<"both">>, Char) when is_binary(S), is_integer(Len), is_binary(Char) ->
Chars = unicode:characters_to_list(Char, utf8),
iolist_to_binary(string:pad(S, Len, both, Chars));
pad(S, Len, <<"leading">>, Char) when is_binary(S), is_integer(Len), is_binary(Char) ->
Chars = unicode:characters_to_list(Char, utf8),
iolist_to_binary(string:pad(S, Len, leading, Chars)).
replace(SrcStr, P, RepStr) when is_binary(SrcStr), is_binary(P), is_binary(RepStr) -> pad(S, Len) -> emqx_variform_str:pad(S, Len).
iolist_to_binary(string:replace(SrcStr, P, RepStr, all)).
replace(SrcStr, P, RepStr, <<"all">>) when is_binary(SrcStr), is_binary(P), is_binary(RepStr) -> pad(S, Len, Position) -> emqx_variform_str:pad(S, Len, Position).
iolist_to_binary(string:replace(SrcStr, P, RepStr, all));
replace(SrcStr, P, RepStr, <<"trailing">>) when
is_binary(SrcStr), is_binary(P), is_binary(RepStr)
->
iolist_to_binary(string:replace(SrcStr, P, RepStr, trailing));
replace(SrcStr, P, RepStr, <<"leading">>) when is_binary(SrcStr), is_binary(P), is_binary(RepStr) ->
iolist_to_binary(string:replace(SrcStr, P, RepStr, leading)).
regex_match(Str, RE) -> pad(S, Len, Position, Char) -> emqx_variform_str:pad(S, Len, Position, Char).
case re:run(Str, RE, [global, {capture, none}]) of
match -> true;
nomatch -> false
end.
regex_replace(SrcStr, RE, RepStr) -> replace(SrcStr, Pattern, RepStr) -> emqx_variform_str:replace(SrcStr, Pattern, RepStr).
re:replace(SrcStr, RE, RepStr, [global, {return, binary}]).
ascii(Char) when is_binary(Char) -> replace(SrcStr, Pattern, RepStr, Position) ->
[FirstC | _] = binary_to_list(Char), emqx_variform_str:replace(SrcStr, Pattern, RepStr, Position).
FirstC.
find(S, P) when is_binary(S), is_binary(P) -> regex_match(Str, RE) -> emqx_variform_str:regex_match(Str, RE).
find_s(S, P, leading).
find(S, P, <<"trailing">>) when is_binary(S), is_binary(P) -> regex_replace(SrcStr, RE, RepStr) -> emqx_variform_str:regex_replace(SrcStr, RE, RepStr).
find_s(S, P, trailing);
find(S, P, <<"leading">>) when is_binary(S), is_binary(P) ->
find_s(S, P, leading).
find_s(S, P, Dir) -> ascii(Char) -> emqx_variform_str:ascii(Char).
case string:find(S, P, Dir) of
nomatch -> <<"">>; find(S, P) -> emqx_variform_str:find(S, P).
SubStr -> SubStr
end. find(S, P, Position) -> emqx_variform_str:find(S, P, Position).
join_to_string(Str) -> emqx_variform_str:join_to_string(Str).
join_to_string(Sep, List) -> emqx_variform_str:join_to_string(Sep, List).
join_to_string(List) when is_list(List) ->
join_to_string(<<", ">>, List).
join_to_string(Sep, List) when is_list(List), is_binary(Sep) ->
iolist_to_binary(lists:join(Sep, [str(Item) || Item <- List])).
join_to_sql_values_string(List) -> join_to_sql_values_string(List) ->
QuotedList = QuotedList =
[ [
@ -938,137 +878,7 @@ jq(FilterProgram, JSONBin) ->
]) ])
). ).
unescape(Bin) when is_binary(Bin) -> unescape(Str) -> emqx_variform_str:unescape(Str).
UnicodeList = unicode:characters_to_list(Bin, utf8),
UnescapedUnicodeList = unescape_string(UnicodeList),
UnescapedUTF8Bin = unicode:characters_to_binary(UnescapedUnicodeList, utf32, utf8),
case UnescapedUTF8Bin of
Out when is_binary(Out) ->
Out;
Error ->
throw({invalid_unicode_character, Error})
end.
unescape_string(Input) -> unescape_string(Input, []).
unescape_string([], Acc) ->
lists:reverse(Acc);
unescape_string([$\\, $\\ | Rest], Acc) ->
unescape_string(Rest, [$\\ | Acc]);
unescape_string([$\\, $n | Rest], Acc) ->
unescape_string(Rest, [$\n | Acc]);
unescape_string([$\\, $t | Rest], Acc) ->
unescape_string(Rest, [$\t | Acc]);
unescape_string([$\\, $r | Rest], Acc) ->
unescape_string(Rest, [$\r | Acc]);
unescape_string([$\\, $b | Rest], Acc) ->
unescape_string(Rest, [$\b | Acc]);
unescape_string([$\\, $f | Rest], Acc) ->
unescape_string(Rest, [$\f | Acc]);
unescape_string([$\\, $v | Rest], Acc) ->
unescape_string(Rest, [$\v | Acc]);
unescape_string([$\\, $' | Rest], Acc) ->
unescape_string(Rest, [$\' | Acc]);
unescape_string([$\\, $" | Rest], Acc) ->
unescape_string(Rest, [$\" | Acc]);
unescape_string([$\\, $? | Rest], Acc) ->
unescape_string(Rest, [$\? | Acc]);
unescape_string([$\\, $a | Rest], Acc) ->
unescape_string(Rest, [$\a | Acc]);
%% Start of HEX escape code
unescape_string([$\\, $x | [$0 | _] = HexStringStart], Acc) ->
unescape_handle_hex_string(HexStringStart, Acc);
unescape_string([$\\, $x | [$1 | _] = HexStringStart], Acc) ->
unescape_handle_hex_string(HexStringStart, Acc);
unescape_string([$\\, $x | [$2 | _] = HexStringStart], Acc) ->
unescape_handle_hex_string(HexStringStart, Acc);
unescape_string([$\\, $x | [$3 | _] = HexStringStart], Acc) ->
unescape_handle_hex_string(HexStringStart, Acc);
unescape_string([$\\, $x | [$4 | _] = HexStringStart], Acc) ->
unescape_handle_hex_string(HexStringStart, Acc);
unescape_string([$\\, $x | [$5 | _] = HexStringStart], Acc) ->
unescape_handle_hex_string(HexStringStart, Acc);
unescape_string([$\\, $x | [$6 | _] = HexStringStart], Acc) ->
unescape_handle_hex_string(HexStringStart, Acc);
unescape_string([$\\, $x | [$7 | _] = HexStringStart], Acc) ->
unescape_handle_hex_string(HexStringStart, Acc);
unescape_string([$\\, $x | [$8 | _] = HexStringStart], Acc) ->
unescape_handle_hex_string(HexStringStart, Acc);
unescape_string([$\\, $x | [$9 | _] = HexStringStart], Acc) ->
unescape_handle_hex_string(HexStringStart, Acc);
unescape_string([$\\, $x | [$A | _] = HexStringStart], Acc) ->
unescape_handle_hex_string(HexStringStart, Acc);
unescape_string([$\\, $x | [$B | _] = HexStringStart], Acc) ->
unescape_handle_hex_string(HexStringStart, Acc);
unescape_string([$\\, $x | [$C | _] = HexStringStart], Acc) ->
unescape_handle_hex_string(HexStringStart, Acc);
unescape_string([$\\, $x | [$D | _] = HexStringStart], Acc) ->
unescape_handle_hex_string(HexStringStart, Acc);
unescape_string([$\\, $x | [$E | _] = HexStringStart], Acc) ->
unescape_handle_hex_string(HexStringStart, Acc);
unescape_string([$\\, $x | [$F | _] = HexStringStart], Acc) ->
unescape_handle_hex_string(HexStringStart, Acc);
unescape_string([$\\, $x | [$a | _] = HexStringStart], Acc) ->
unescape_handle_hex_string(HexStringStart, Acc);
unescape_string([$\\, $x | [$b | _] = HexStringStart], Acc) ->
unescape_handle_hex_string(HexStringStart, Acc);
unescape_string([$\\, $x | [$c | _] = HexStringStart], Acc) ->
unescape_handle_hex_string(HexStringStart, Acc);
unescape_string([$\\, $x | [$d | _] = HexStringStart], Acc) ->
unescape_handle_hex_string(HexStringStart, Acc);
unescape_string([$\\, $x | [$e | _] = HexStringStart], Acc) ->
unescape_handle_hex_string(HexStringStart, Acc);
unescape_string([$\\, $x | [$f | _] = HexStringStart], Acc) ->
unescape_handle_hex_string(HexStringStart, Acc);
%% We treat all other escape sequences as not valid input to leave room for
%% extending the function to support more escape codes
unescape_string([$\\, X | _Rest], _Acc) ->
erlang:throw({unrecognized_escape_sequence, list_to_binary([$\\, X])});
unescape_string([First | Rest], Acc) ->
unescape_string(Rest, [First | Acc]).
unescape_handle_hex_string(HexStringStart, Acc) ->
{RemainingString, Num} = parse_hex_string(HexStringStart),
unescape_string(RemainingString, [Num | Acc]).
parse_hex_string(SeqStartingWithHexDigit) ->
parse_hex_string(SeqStartingWithHexDigit, []).
parse_hex_string([], Acc) ->
ReversedAcc = lists:reverse(Acc),
{[], list_to_integer(ReversedAcc, 16)};
parse_hex_string([First | Rest] = String, Acc) ->
case is_hex_digit(First) of
true ->
parse_hex_string(Rest, [First | Acc]);
false ->
ReversedAcc = lists:reverse(Acc),
{String, list_to_integer(ReversedAcc, 16)}
end.
is_hex_digit($0) -> true;
is_hex_digit($1) -> true;
is_hex_digit($2) -> true;
is_hex_digit($3) -> true;
is_hex_digit($4) -> true;
is_hex_digit($5) -> true;
is_hex_digit($6) -> true;
is_hex_digit($7) -> true;
is_hex_digit($8) -> true;
is_hex_digit($9) -> true;
is_hex_digit($A) -> true;
is_hex_digit($B) -> true;
is_hex_digit($C) -> true;
is_hex_digit($D) -> true;
is_hex_digit($E) -> true;
is_hex_digit($F) -> true;
is_hex_digit($a) -> true;
is_hex_digit($b) -> true;
is_hex_digit($c) -> true;
is_hex_digit($d) -> true;
is_hex_digit($e) -> true;
is_hex_digit($f) -> true;
is_hex_digit(_) -> false.
%%------------------------------------------------------------------------------ %%------------------------------------------------------------------------------
%% Array Funcs %% Array Funcs
@ -1095,6 +905,10 @@ last(List) when is_list(List) ->
contains(Elm, List) when is_list(List) -> contains(Elm, List) when is_list(List) ->
lists:member(Elm, List). lists:member(Elm, List).
%%------------------------------------------------------------------------------
%% Map Funcs
%%------------------------------------------------------------------------------
map_new() -> map_new() ->
#{}. #{}.

View File

@ -0,0 +1,225 @@
%%--------------------------------------------------------------------
%% Copyright (c) 2024 EMQ Technologies Co., Ltd. All Rights Reserved.
%%
%% Licensed under the Apache License, Version 2.0 (the "License");
%% you may not use this file except in compliance with the License.
%% You may obtain a copy of the License at
%%
%% http://www.apache.org/licenses/LICENSE-2.0
%%
%% Unless required by applicable law or agreed to in writing, software
%% distributed under the License is distributed on an "AS IS" BASIS,
%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
%% See the License for the specific language governing permissions and
%% limitations under the License.
%%--------------------------------------------------------------------
%% @doc This module provides a single-line expression string rendering engine.
%% A predefined set of functions are allowed to be called in the expressions.
%% Only simple string expressions are supported, and no control flow is allowed.
%% However, with the help from the functions, some control flow can be achieved.
%% For example, the `coalesce` function can be used to provide a default value,
%% or used to choose the first non-empty value from a list of variables.
-module(emqx_variform).
-export([
inject_allowed_module/1,
inject_allowed_modules/1,
erase_allowed_module/1,
erase_allowed_modules/1
]).
-export([render/2, render/3]).
%% @doc Render a variform expression with bindings.
%% A variform expression is a template string which supports variable substitution
%% and function calls.
%%
%% The function calls are in the form of `module.function(arg1, arg2, ...)` where `module`
%% is optional, and if not provided, the function is assumed to be in the `emqx_variform_str` module.
%% Both module and function must be existing atoms, and only whitelisted functions are allowed.
%%
%% A function arg can be a constant string or a number.
%% Strings can be quoted with single quotes or double quotes, without support of escape characters.
%% If some special characters are needed, the function `unescape' can be used convert a escaped string
%% to raw bytes.
%% For example, to get the first line of a multi-line string, the expression can be
%% `coalesce(tokens(variable_name, unescape("\n")))'.
%%
%% The bindings is a map of variables to their values.
%%
%% For unresolved variables, empty string (but not "undefined") is used.
%% In case of runtime exeption, an error is returned.
-spec render(string(), map()) -> {ok, binary()} | {error, term()}.
render(Expression, Bindings) ->
render(Expression, Bindings, #{}).
render(Expression, Bindings, Opts) when is_binary(Expression) ->
render(unicode:characters_to_list(Expression), Bindings, Opts);
render(Expression, Bindings, Opts) ->
case emqx_variform_scan:string(Expression) of
{ok, Tokens, _Line} ->
case emqx_variform_parser:parse(Tokens) of
{ok, Expr} ->
eval_as_string(Expr, Bindings, Opts);
{error, {_, emqx_variform_parser, Msg}} ->
%% syntax error
{error, lists:flatten(Msg)};
{error, Reason} ->
{error, Reason}
end;
{error, Reason, _Line} ->
{error, Reason}
end.
eval_as_string(Expr, Bindings, _Opts) ->
try
{ok, str(eval(Expr, Bindings))}
catch
throw:Reason ->
{error, Reason};
C:E:S ->
{error, #{exception => C, reason => E, stack_trace => S}}
end.
eval({str, Str}, _Bindings) ->
str(Str);
eval({num, Num}, _Bindings) ->
str(Num);
eval({array, Args}, Bindings) ->
eval(Args, Bindings);
eval({call, FuncNameStr, Args}, Bindings) ->
{Mod, Fun} = resolve_func_name(FuncNameStr),
ok = assert_func_exported(Mod, Fun, length(Args)),
call(Mod, Fun, eval(Args, Bindings));
eval({var, VarName}, Bindings) ->
resolve_var_value(VarName, Bindings);
eval([Arg | Args], Bindings) ->
[eval(Arg, Bindings) | eval(Args, Bindings)];
eval([], _Bindings) ->
[].
%% Some functions accept arbitrary number of arguments but implemented as /1.
call(emqx_variform_str, concat, Args) ->
str(emqx_variform_str:concat(Args));
call(emqx_variform_str, coalesce, Args) ->
str(emqx_variform_str:coalesce(Args));
call(Mod, Fun, Args) ->
erlang:apply(Mod, Fun, Args).
resolve_func_name(FuncNameStr) ->
case string:tokens(FuncNameStr, ".") of
[Mod0, Fun0] ->
Mod =
try
list_to_existing_atom(Mod0)
catch
error:badarg ->
throw(#{
reason => unknown_variform_module,
module => Mod0
})
end,
ok = assert_module_allowed(Mod),
Fun =
try
list_to_existing_atom(Fun0)
catch
error:badarg ->
throw(#{
reason => unknown_variform_function,
function => Fun0
})
end,
{Mod, Fun};
[Fun] ->
FuncName =
try
list_to_existing_atom(Fun)
catch
error:badarg ->
throw(#{
reason => unknown_variform_function,
function => Fun
})
end,
{emqx_variform_str, FuncName};
_ ->
throw(#{reason => invalid_function_reference, function => FuncNameStr})
end.
resolve_var_value(VarName, Bindings) ->
case emqx_template:lookup_var(split(VarName), Bindings) of
{ok, Value} ->
str(Value);
{error, _Reason} ->
<<>>
end.
assert_func_exported(emqx_variform_str, concat, _Arity) ->
ok;
assert_func_exported(emqx_variform_str, coalesce, _Arity) ->
ok;
assert_func_exported(Mod, Fun, Arity) ->
ok = try_load(Mod),
case erlang:function_exported(Mod, Fun, Arity) of
true ->
ok;
false ->
throw(#{
reason => unknown_variform_function,
module => Mod,
function => Fun,
arity => Arity
})
end.
%% best effort to load the module because it might not be loaded as a part of the release modules
%% e.g. from a plugin.
%% do not call code server, just try to call a function in the module.
try_load(Mod) ->
try
_ = erlang:apply(Mod, module_info, [md5]),
ok
catch
_:_ ->
ok
end.
assert_module_allowed(emqx_variform_str) ->
ok;
assert_module_allowed(Mod) ->
Allowed = get_allowed_modules(),
case lists:member(Mod, Allowed) of
true ->
ok;
false ->
throw(#{
reason => unallowed_veriform_module,
module => Mod
})
end.
inject_allowed_module(Module) when is_atom(Module) ->
inject_allowed_modules([Module]).
inject_allowed_modules(Modules) when is_list(Modules) ->
Allowed0 = get_allowed_modules(),
Allowed = lists:usort(Allowed0 ++ Modules),
persistent_term:put({emqx_variform, allowed_modules}, Allowed).
erase_allowed_module(Module) when is_atom(Module) ->
erase_allowed_modules([Module]).
erase_allowed_modules(Modules) when is_list(Modules) ->
Allowed0 = get_allowed_modules(),
Allowed = Allowed0 -- Modules,
persistent_term:put({emqx_variform, allowed_modules}, Allowed).
get_allowed_modules() ->
persistent_term:get({emqx_variform, allowed_modules}, []).
str(Value) ->
emqx_utils_conv:bin(Value).
split(VarName) ->
lists:map(fun erlang:iolist_to_binary/1, string:tokens(VarName, ".")).

View File

@ -0,0 +1,43 @@
Nonterminals
expr
call_or_var
array
args
arg.
Terminals
identifier
number
string
'(' ')'
',' '[' ']'.
Rootsymbol
expr.
%% Grammar Rules
%% Root expression: function call or variable
expr -> call_or_var : '$1'.
%% Function call or variable
call_or_var -> identifier '(' args ')' : {call, element(3, '$1'), '$3'}.
call_or_var -> identifier : {var, element(3, '$1')}.
%% Array is like a arg list, but with square brackets
array -> '[' args ']' : {array, '$2'}.
%% Argument handling
args -> arg : ['$1'].
args -> args ',' arg : '$1' ++ ['$3'].
%% Arguments can be expressions, arrays, numbers, or strings
arg -> expr : '$1'.
arg -> array : '$1'.
arg -> number : {num, element(3, '$1')}.
arg -> string : {str, element(3, '$1')}.
Erlang code.
%% mute xref warning
-export([return_error/2]).

View File

@ -0,0 +1,30 @@
Definitions.
%% Define regular expressions for tokens
IDENTIFIER = [a-zA-Z][a-zA-Z0-9_.]*
SQ_STRING = \'[^\']*\'
DQ_STRING = \"[^\"]*\"
NUMBER = [+-]?(\\d+\\.\\d+|[0-9]+)
LPAREN = \(
RPAREN = \)
LBRACKET = \[
RBRACKET = \]
COMMA = ,
WHITESPACE = [\s\t\n]+
Rules.
%% Match function names, variable names (with ${}), strings, numbers, and structural characters
{WHITESPACE} : skip_token.
{IDENTIFIER} : {token, {identifier, TokenLine, TokenChars}}.
{SQ_STRING} : {token, {string, TokenLine, unquote(TokenChars, $')}}.
{DQ_STRING} : {token, {string, TokenLine, unquote(TokenChars, $")}}.
{NUMBER} : {token, {number, TokenLine, TokenChars}}.
{LPAREN} : {token, {'(', TokenLine}}.
{RPAREN} : {token, {')', TokenLine}}.
{LBRACKET} : {token, {'[', TokenLine}}.
{RBRACKET} : {token, {']', TokenLine}}.
{COMMA} : {token, {',', TokenLine}}.
Erlang code.
unquote(String, Char) ->
string:trim(String, both, [Char]).

View File

@ -0,0 +1,368 @@
%%--------------------------------------------------------------------
%% Copyright (c) 2020-2024 EMQ Technologies Co., Ltd. All Rights Reserved.
%%
%% Licensed under the Apache License, Version 2.0 (the "License");
%% you may not use this file except in compliance with the License.
%% You may obtain a copy of the License at
%%
%% http://www.apache.org/licenses/LICENSE-2.0
%%
%% Unless required by applicable law or agreed to in writing, software
%% distributed under the License is distributed on an "AS IS" BASIS,
%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
%% See the License for the specific language governing permissions and
%% limitations under the License.
%%--------------------------------------------------------------------
%% Predefined functions string templating
-module(emqx_variform_str).
%% String Funcs
-export([
coalesce/1,
coalesce/2,
lower/1,
ltrim/1,
ltrim/2,
reverse/1,
rtrim/1,
rtrim/2,
strlen/1,
substr/2,
substr/3,
trim/1,
trim/2,
upper/1,
split/2,
split/3,
concat/1,
concat/2,
tokens/2,
tokens/3,
sprintf_s/2,
pad/2,
pad/3,
pad/4,
replace/3,
replace/4,
regex_match/2,
regex_replace/3,
ascii/1,
find/2,
find/3,
join_to_string/1,
join_to_string/2,
unescape/1,
nth/2
]).
-define(IS_EMPTY(X), (X =:= <<>> orelse X =:= "" orelse X =:= undefined)).
%%------------------------------------------------------------------------------
%% String Funcs
%%------------------------------------------------------------------------------
%% @doc Return the first non-empty string
coalesce(A, B) when ?IS_EMPTY(A) andalso ?IS_EMPTY(B) ->
<<>>;
coalesce(A, B) when ?IS_EMPTY(A) ->
B;
coalesce(A, _B) ->
A.
%% @doc Return the first non-empty string
coalesce([]) ->
<<>>;
coalesce([H | T]) ->
coalesce(H, coalesce(T)).
lower(S) when is_binary(S) ->
string:lowercase(S).
ltrim(S) when is_binary(S) ->
string:trim(S, leading).
ltrim(S, Chars) ->
string:trim(S, leading, Chars).
reverse(S) when is_binary(S) ->
iolist_to_binary(string:reverse(S)).
rtrim(S) when is_binary(S) ->
string:trim(S, trailing).
rtrim(S, Chars) when is_binary(S) ->
string:trim(S, trailing, Chars).
strlen(S) when is_binary(S) ->
string:length(S).
substr(S, Start) when is_binary(S), is_integer(Start) ->
string:slice(S, Start).
substr(S, Start, Length) when
is_binary(S),
is_integer(Start),
is_integer(Length)
->
string:slice(S, Start, Length).
trim(S) when is_binary(S) ->
string:trim(S).
trim(S, Chars) when is_binary(S) ->
string:trim(S, both, Chars).
upper(S) when is_binary(S) ->
string:uppercase(S).
split(S, P) when is_binary(S), is_binary(P) ->
[R || R <- string:split(S, P, all), R =/= <<>> andalso R =/= ""].
split(S, P, <<"notrim">>) ->
string:split(S, P, all);
split(S, P, <<"leading_notrim">>) ->
string:split(S, P, leading);
split(S, P, <<"leading">>) when is_binary(S), is_binary(P) ->
[R || R <- string:split(S, P, leading), R =/= <<>> andalso R =/= ""];
split(S, P, <<"trailing_notrim">>) ->
string:split(S, P, trailing);
split(S, P, <<"trailing">>) when is_binary(S), is_binary(P) ->
[R || R <- string:split(S, P, trailing), R =/= <<>> andalso R =/= ""].
tokens(S, Separators) ->
[list_to_binary(R) || R <- string:lexemes(binary_to_list(S), binary_to_list(Separators))].
tokens(S, Separators, <<"nocrlf">>) ->
[
list_to_binary(R)
|| R <- string:lexemes(binary_to_list(S), binary_to_list(Separators) ++ [$\r, $\n, [$\r, $\n]])
].
%% implicit convert args to strings, and then do concatenation
concat(S1, S2) ->
concat([S1, S2]).
concat(List) ->
unicode:characters_to_binary(lists:map(fun str/1, List), unicode).
sprintf_s(Format, Args) when is_list(Args) ->
erlang:iolist_to_binary(io_lib:format(binary_to_list(Format), Args)).
pad(S, Len) when is_binary(S), is_integer(Len) ->
iolist_to_binary(string:pad(S, Len, trailing)).
pad(S, Len, <<"trailing">>) when is_binary(S), is_integer(Len) ->
iolist_to_binary(string:pad(S, Len, trailing));
pad(S, Len, <<"both">>) when is_binary(S), is_integer(Len) ->
iolist_to_binary(string:pad(S, Len, both));
pad(S, Len, <<"leading">>) when is_binary(S), is_integer(Len) ->
iolist_to_binary(string:pad(S, Len, leading)).
pad(S, Len, <<"trailing">>, Char) when is_binary(S), is_integer(Len), is_binary(Char) ->
Chars = unicode:characters_to_list(Char, utf8),
iolist_to_binary(string:pad(S, Len, trailing, Chars));
pad(S, Len, <<"both">>, Char) when is_binary(S), is_integer(Len), is_binary(Char) ->
Chars = unicode:characters_to_list(Char, utf8),
iolist_to_binary(string:pad(S, Len, both, Chars));
pad(S, Len, <<"leading">>, Char) when is_binary(S), is_integer(Len), is_binary(Char) ->
Chars = unicode:characters_to_list(Char, utf8),
iolist_to_binary(string:pad(S, Len, leading, Chars)).
replace(SrcStr, P, RepStr) when is_binary(SrcStr), is_binary(P), is_binary(RepStr) ->
iolist_to_binary(string:replace(SrcStr, P, RepStr, all)).
replace(SrcStr, P, RepStr, <<"all">>) when is_binary(SrcStr), is_binary(P), is_binary(RepStr) ->
iolist_to_binary(string:replace(SrcStr, P, RepStr, all));
replace(SrcStr, P, RepStr, <<"trailing">>) when
is_binary(SrcStr), is_binary(P), is_binary(RepStr)
->
iolist_to_binary(string:replace(SrcStr, P, RepStr, trailing));
replace(SrcStr, P, RepStr, <<"leading">>) when is_binary(SrcStr), is_binary(P), is_binary(RepStr) ->
iolist_to_binary(string:replace(SrcStr, P, RepStr, leading)).
regex_match(Str, RE) ->
case re:run(Str, RE, [global, {capture, none}]) of
match -> true;
nomatch -> false
end.
regex_replace(SrcStr, RE, RepStr) ->
re:replace(SrcStr, RE, RepStr, [global, {return, binary}]).
ascii(Char) when is_binary(Char) ->
[FirstC | _] = binary_to_list(Char),
FirstC.
find(S, P) when is_binary(S), is_binary(P) ->
find_s(S, P, leading).
find(S, P, <<"trailing">>) when is_binary(S), is_binary(P) ->
find_s(S, P, trailing);
find(S, P, <<"leading">>) when is_binary(S), is_binary(P) ->
find_s(S, P, leading).
find_s(S, P, Dir) ->
case string:find(S, P, Dir) of
nomatch -> <<"">>;
SubStr -> SubStr
end.
join_to_string(List) when is_list(List) ->
join_to_string(<<", ">>, List).
join_to_string(Sep, List) when is_list(List), is_binary(Sep) ->
iolist_to_binary(lists:join(Sep, [str(Item) || Item <- List])).
unescape(Bin) when is_binary(Bin) ->
UnicodeList = unicode:characters_to_list(Bin, utf8),
UnescapedUnicodeList = unescape_string(UnicodeList),
UnescapedUTF8Bin = unicode:characters_to_binary(UnescapedUnicodeList, utf32, utf8),
case UnescapedUTF8Bin of
Out when is_binary(Out) ->
Out;
Error ->
throw({invalid_unicode_character, Error})
end.
nth(N, List) when (is_list(N) orelse is_binary(N)) andalso is_list(List) ->
try binary_to_integer(iolist_to_binary(N)) of
N1 ->
nth(N1, List)
catch
_:_ ->
throw(#{reason => invalid_argument, func => nth, index => N})
end;
nth(N, List) when is_integer(N) andalso is_list(List) ->
case length(List) of
L when L < N -> <<>>;
_ -> lists:nth(N, List)
end.
unescape_string(Input) -> unescape_string(Input, []).
unescape_string([], Acc) ->
lists:reverse(Acc);
unescape_string([$\\, $\\ | Rest], Acc) ->
unescape_string(Rest, [$\\ | Acc]);
unescape_string([$\\, $n | Rest], Acc) ->
unescape_string(Rest, [$\n | Acc]);
unescape_string([$\\, $t | Rest], Acc) ->
unescape_string(Rest, [$\t | Acc]);
unescape_string([$\\, $r | Rest], Acc) ->
unescape_string(Rest, [$\r | Acc]);
unescape_string([$\\, $b | Rest], Acc) ->
unescape_string(Rest, [$\b | Acc]);
unescape_string([$\\, $f | Rest], Acc) ->
unescape_string(Rest, [$\f | Acc]);
unescape_string([$\\, $v | Rest], Acc) ->
unescape_string(Rest, [$\v | Acc]);
unescape_string([$\\, $' | Rest], Acc) ->
unescape_string(Rest, [$\' | Acc]);
unescape_string([$\\, $" | Rest], Acc) ->
unescape_string(Rest, [$\" | Acc]);
unescape_string([$\\, $? | Rest], Acc) ->
unescape_string(Rest, [$\? | Acc]);
unescape_string([$\\, $a | Rest], Acc) ->
unescape_string(Rest, [$\a | Acc]);
%% Start of HEX escape code
unescape_string([$\\, $x | [$0 | _] = HexStringStart], Acc) ->
unescape_handle_hex_string(HexStringStart, Acc);
unescape_string([$\\, $x | [$1 | _] = HexStringStart], Acc) ->
unescape_handle_hex_string(HexStringStart, Acc);
unescape_string([$\\, $x | [$2 | _] = HexStringStart], Acc) ->
unescape_handle_hex_string(HexStringStart, Acc);
unescape_string([$\\, $x | [$3 | _] = HexStringStart], Acc) ->
unescape_handle_hex_string(HexStringStart, Acc);
unescape_string([$\\, $x | [$4 | _] = HexStringStart], Acc) ->
unescape_handle_hex_string(HexStringStart, Acc);
unescape_string([$\\, $x | [$5 | _] = HexStringStart], Acc) ->
unescape_handle_hex_string(HexStringStart, Acc);
unescape_string([$\\, $x | [$6 | _] = HexStringStart], Acc) ->
unescape_handle_hex_string(HexStringStart, Acc);
unescape_string([$\\, $x | [$7 | _] = HexStringStart], Acc) ->
unescape_handle_hex_string(HexStringStart, Acc);
unescape_string([$\\, $x | [$8 | _] = HexStringStart], Acc) ->
unescape_handle_hex_string(HexStringStart, Acc);
unescape_string([$\\, $x | [$9 | _] = HexStringStart], Acc) ->
unescape_handle_hex_string(HexStringStart, Acc);
unescape_string([$\\, $x | [$A | _] = HexStringStart], Acc) ->
unescape_handle_hex_string(HexStringStart, Acc);
unescape_string([$\\, $x | [$B | _] = HexStringStart], Acc) ->
unescape_handle_hex_string(HexStringStart, Acc);
unescape_string([$\\, $x | [$C | _] = HexStringStart], Acc) ->
unescape_handle_hex_string(HexStringStart, Acc);
unescape_string([$\\, $x | [$D | _] = HexStringStart], Acc) ->
unescape_handle_hex_string(HexStringStart, Acc);
unescape_string([$\\, $x | [$E | _] = HexStringStart], Acc) ->
unescape_handle_hex_string(HexStringStart, Acc);
unescape_string([$\\, $x | [$F | _] = HexStringStart], Acc) ->
unescape_handle_hex_string(HexStringStart, Acc);
unescape_string([$\\, $x | [$a | _] = HexStringStart], Acc) ->
unescape_handle_hex_string(HexStringStart, Acc);
unescape_string([$\\, $x | [$b | _] = HexStringStart], Acc) ->
unescape_handle_hex_string(HexStringStart, Acc);
unescape_string([$\\, $x | [$c | _] = HexStringStart], Acc) ->
unescape_handle_hex_string(HexStringStart, Acc);
unescape_string([$\\, $x | [$d | _] = HexStringStart], Acc) ->
unescape_handle_hex_string(HexStringStart, Acc);
unescape_string([$\\, $x | [$e | _] = HexStringStart], Acc) ->
unescape_handle_hex_string(HexStringStart, Acc);
unescape_string([$\\, $x | [$f | _] = HexStringStart], Acc) ->
unescape_handle_hex_string(HexStringStart, Acc);
%% We treat all other escape sequences as not valid input to leave room for
%% extending the function to support more escape codes
unescape_string([$\\, X | _Rest], _Acc) ->
erlang:throw({unrecognized_escape_sequence, list_to_binary([$\\, X])});
unescape_string([First | Rest], Acc) ->
unescape_string(Rest, [First | Acc]).
unescape_handle_hex_string(HexStringStart, Acc) ->
{RemainingString, Num} = parse_hex_string(HexStringStart),
unescape_string(RemainingString, [Num | Acc]).
parse_hex_string(SeqStartingWithHexDigit) ->
parse_hex_string(SeqStartingWithHexDigit, []).
parse_hex_string([], Acc) ->
ReversedAcc = lists:reverse(Acc),
{[], list_to_integer(ReversedAcc, 16)};
parse_hex_string([First | Rest] = String, Acc) ->
case is_hex_digit(First) of
true ->
parse_hex_string(Rest, [First | Acc]);
false ->
ReversedAcc = lists:reverse(Acc),
{String, list_to_integer(ReversedAcc, 16)}
end.
is_hex_digit($0) -> true;
is_hex_digit($1) -> true;
is_hex_digit($2) -> true;
is_hex_digit($3) -> true;
is_hex_digit($4) -> true;
is_hex_digit($5) -> true;
is_hex_digit($6) -> true;
is_hex_digit($7) -> true;
is_hex_digit($8) -> true;
is_hex_digit($9) -> true;
is_hex_digit($A) -> true;
is_hex_digit($B) -> true;
is_hex_digit($C) -> true;
is_hex_digit($D) -> true;
is_hex_digit($E) -> true;
is_hex_digit($F) -> true;
is_hex_digit($a) -> true;
is_hex_digit($b) -> true;
is_hex_digit($c) -> true;
is_hex_digit($d) -> true;
is_hex_digit($e) -> true;
is_hex_digit($f) -> true;
is_hex_digit(_) -> false.
%%------------------------------------------------------------------------------
%% Data Type Conversion Funcs
%%------------------------------------------------------------------------------
str(Data) ->
emqx_utils_conv:bin(Data).

View File

@ -0,0 +1,129 @@
%%--------------------------------------------------------------------
%% Copyright (c) 2024 EMQ Technologies Co., Ltd. All Rights Reserved.
%%
%% Licensed under the Apache License, Version 2.0 (the "License");
%% you may not use this file except in compliance with the License.
%% You may obtain a copy of the License at
%%
%% http://www.apache.org/licenses/LICENSE-2.0
%%
%% Unless required by applicable law or agreed to in writing, software
%% distributed under the License is distributed on an "AS IS" BASIS,
%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
%% See the License for the specific language governing permissions and
%% limitations under the License.
%%--------------------------------------------------------------------
-module(emqx_variform_tests).
-compile(export_all).
-compile(nowarn_export_all).
-include_lib("eunit/include/eunit.hrl").
-define(SYNTAX_ERROR, {error, "syntax error before:" ++ _}).
redner_test_() ->
[
{"direct var reference", fun() -> ?assertEqual({ok, <<"1">>}, render("a", #{a => 1})) end},
{"concat strings", fun() ->
?assertEqual({ok, <<"a,b">>}, render("concat('a',',','b')", #{}))
end},
{"concat empty string", fun() -> ?assertEqual({ok, <<"">>}, render("concat('')", #{})) end},
{"tokens 1st", fun() ->
?assertEqual({ok, <<"a">>}, render("nth(1,tokens(var, ','))", #{var => <<"a,b">>}))
end},
{"unknown var as empty str", fun() ->
?assertEqual({ok, <<>>}, render("var", #{}))
end},
{"out of range nth index", fun() ->
?assertEqual({ok, <<>>}, render("nth(2, tokens(var, ','))", #{var => <<"a">>}))
end},
{"not a index number for nth", fun() ->
?assertMatch(
{error, #{reason := invalid_argument, func := nth, index := <<"notnum">>}},
render("nth('notnum', tokens(var, ','))", #{var => <<"a">>})
)
end}
].
unknown_func_test_() ->
[
{"unknown function", fun() ->
?assertMatch(
{error, #{reason := unknown_variform_function}},
render("nonexistingatom__(a)", #{})
)
end},
{"unknown module", fun() ->
?assertMatch(
{error, #{reason := unknown_variform_module}},
render("nonexistingatom__.nonexistingatom__(a)", #{})
)
end},
{"unknown function in a known module", fun() ->
?assertMatch(
{error, #{reason := unknown_variform_function}},
render("emqx_variform_str.nonexistingatom__(a)", #{})
)
end},
{"invalid func reference", fun() ->
?assertMatch(
{error, #{reason := invalid_function_reference, function := "a.b.c"}},
render("a.b.c(var)", #{})
)
end}
].
concat(L) -> iolist_to_binary(L).
inject_allowed_module_test() ->
try
emqx_variform:inject_allowed_module(?MODULE),
?assertEqual({ok, <<"ab">>}, render(atom_to_list(?MODULE) ++ ".concat(['a','b'])", #{})),
?assertMatch(
{error, #{
reason := unknown_variform_function,
module := ?MODULE,
function := concat,
arity := 2
}},
render(atom_to_list(?MODULE) ++ ".concat('a','b')", #{})
),
?assertMatch(
{error, #{reason := unallowed_veriform_module, module := emqx}},
render("emqx.concat('a','b')", #{})
)
after
emqx_variform:erase_allowed_module(?MODULE)
end.
coalesce_test_() ->
[
{"coalesce first", fun() ->
?assertEqual({ok, <<"a">>}, render("coalesce('a','b')", #{}))
end},
{"coalesce second", fun() ->
?assertEqual({ok, <<"b">>}, render("coalesce('', 'b')", #{}))
end},
{"coalesce first var", fun() ->
?assertEqual({ok, <<"a">>}, render("coalesce(a,b)", #{a => <<"a">>, b => <<"b">>}))
end},
{"coalesce second var", fun() ->
?assertEqual({ok, <<"b">>}, render("coalesce(a,b)", #{b => <<"b">>}))
end},
{"coalesce empty", fun() -> ?assertEqual({ok, <<>>}, render("coalesce(a,b)", #{})) end}
].
syntax_error_test_() ->
[
{"empty expression", fun() -> ?assertMatch(?SYNTAX_ERROR, render("", #{})) end},
{"const string single quote", fun() -> ?assertMatch(?SYNTAX_ERROR, render("'a'", #{})) end},
{"const string double quote", fun() ->
?assertMatch(?SYNTAX_ERROR, render(<<"\"a\"">>, #{}))
end},
{"no arity", fun() -> ?assertMatch(?SYNTAX_ERROR, render("concat()", #{})) end}
].
render(Expression, Bindings) ->
emqx_variform:render(Expression, Bindings).