Merge pull request #12671 from kjellwinblad/kjell/rule_engine/fix/unescape_rules_from_dashboard/EMQX-11847

feat(rule engine SQL): add an `unescape` function
This commit is contained in:
Zaiming (Stone) Shi 2024-03-08 17:57:31 +01:00 committed by GitHub
commit 68682c4231
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 189 additions and 1 deletions

View File

@ -161,7 +161,8 @@
join_to_string/2,
join_to_sql_values_string/1,
jq/2,
jq/3
jq/3,
unescape/1
]).
%% Map Funcs
@ -937,6 +938,138 @@ jq(FilterProgram, JSONBin) ->
])
).
unescape(Bin) when is_binary(Bin) ->
UnicodeList = unicode:characters_to_list(Bin, utf8),
UnescapedUnicodeList = unescape_string(UnicodeList),
UnescapedUTF8Bin = unicode:characters_to_binary(UnescapedUnicodeList, utf32, utf8),
case UnescapedUTF8Bin of
Out when is_binary(Out) ->
Out;
Error ->
throw({invalid_unicode_character, Error})
end.
unescape_string(Input) -> unescape_string(Input, []).
unescape_string([], Acc) ->
lists:reverse(Acc);
unescape_string([$\\, $\\ | Rest], Acc) ->
unescape_string(Rest, [$\\ | Acc]);
unescape_string([$\\, $n | Rest], Acc) ->
unescape_string(Rest, [$\n | Acc]);
unescape_string([$\\, $t | Rest], Acc) ->
unescape_string(Rest, [$\t | Acc]);
unescape_string([$\\, $r | Rest], Acc) ->
unescape_string(Rest, [$\r | Acc]);
unescape_string([$\\, $b | Rest], Acc) ->
unescape_string(Rest, [$\b | Acc]);
unescape_string([$\\, $f | Rest], Acc) ->
unescape_string(Rest, [$\f | Acc]);
unescape_string([$\\, $v | Rest], Acc) ->
unescape_string(Rest, [$\v | Acc]);
unescape_string([$\\, $' | Rest], Acc) ->
unescape_string(Rest, [$\' | Acc]);
unescape_string([$\\, $" | Rest], Acc) ->
unescape_string(Rest, [$\" | Acc]);
unescape_string([$\\, $? | Rest], Acc) ->
unescape_string(Rest, [$\? | Acc]);
unescape_string([$\\, $a | Rest], Acc) ->
unescape_string(Rest, [$\a | Acc]);
%% Start of HEX escape code
unescape_string([$\\, $x | [$0 | _] = HexStringStart], Acc) ->
unescape_handle_hex_string(HexStringStart, Acc);
unescape_string([$\\, $x | [$1 | _] = HexStringStart], Acc) ->
unescape_handle_hex_string(HexStringStart, Acc);
unescape_string([$\\, $x | [$2 | _] = HexStringStart], Acc) ->
unescape_handle_hex_string(HexStringStart, Acc);
unescape_string([$\\, $x | [$3 | _] = HexStringStart], Acc) ->
unescape_handle_hex_string(HexStringStart, Acc);
unescape_string([$\\, $x | [$4 | _] = HexStringStart], Acc) ->
unescape_handle_hex_string(HexStringStart, Acc);
unescape_string([$\\, $x | [$5 | _] = HexStringStart], Acc) ->
unescape_handle_hex_string(HexStringStart, Acc);
unescape_string([$\\, $x | [$6 | _] = HexStringStart], Acc) ->
unescape_handle_hex_string(HexStringStart, Acc);
unescape_string([$\\, $x | [$7 | _] = HexStringStart], Acc) ->
unescape_handle_hex_string(HexStringStart, Acc);
unescape_string([$\\, $x | [$8 | _] = HexStringStart], Acc) ->
unescape_handle_hex_string(HexStringStart, Acc);
unescape_string([$\\, $x | [$9 | _] = HexStringStart], Acc) ->
unescape_handle_hex_string(HexStringStart, Acc);
unescape_string([$\\, $x | [$A | _] = HexStringStart], Acc) ->
unescape_handle_hex_string(HexStringStart, Acc);
unescape_string([$\\, $x | [$B | _] = HexStringStart], Acc) ->
unescape_handle_hex_string(HexStringStart, Acc);
unescape_string([$\\, $x | [$C | _] = HexStringStart], Acc) ->
unescape_handle_hex_string(HexStringStart, Acc);
unescape_string([$\\, $x | [$D | _] = HexStringStart], Acc) ->
unescape_handle_hex_string(HexStringStart, Acc);
unescape_string([$\\, $x | [$E | _] = HexStringStart], Acc) ->
unescape_handle_hex_string(HexStringStart, Acc);
unescape_string([$\\, $x | [$F | _] = HexStringStart], Acc) ->
unescape_handle_hex_string(HexStringStart, Acc);
unescape_string([$\\, $x | [$a | _] = HexStringStart], Acc) ->
unescape_handle_hex_string(HexStringStart, Acc);
unescape_string([$\\, $x | [$b | _] = HexStringStart], Acc) ->
unescape_handle_hex_string(HexStringStart, Acc);
unescape_string([$\\, $x | [$c | _] = HexStringStart], Acc) ->
unescape_handle_hex_string(HexStringStart, Acc);
unescape_string([$\\, $x | [$d | _] = HexStringStart], Acc) ->
unescape_handle_hex_string(HexStringStart, Acc);
unescape_string([$\\, $x | [$e | _] = HexStringStart], Acc) ->
unescape_handle_hex_string(HexStringStart, Acc);
unescape_string([$\\, $x | [$f | _] = HexStringStart], Acc) ->
unescape_handle_hex_string(HexStringStart, Acc);
%% We treat all other escape sequences as not valid input to leave room for
%% extending the function to support more escape codes
unescape_string([$\\, X | _Rest], _Acc) ->
erlang:throw({unrecognized_escape_sequence, list_to_binary([$\\, X])});
unescape_string([First | Rest], Acc) ->
unescape_string(Rest, [First | Acc]).
unescape_handle_hex_string(HexStringStart, Acc) ->
{RemainingString, Num} = parse_hex_string(HexStringStart),
unescape_string(RemainingString, [Num | Acc]).
parse_hex_string(SeqStartingWithHexDigit) ->
parse_hex_string(SeqStartingWithHexDigit, []).
parse_hex_string([], Acc) ->
ReversedAcc = lists:reverse(Acc),
{[], list_to_integer(ReversedAcc, 16)};
parse_hex_string([First | Rest] = String, Acc) ->
case is_hex_digit(First) of
true ->
parse_hex_string(Rest, [First | Acc]);
false ->
ReversedAcc = lists:reverse(Acc),
{String, list_to_integer(ReversedAcc, 16)}
end.
is_hex_digit($0) -> true;
is_hex_digit($1) -> true;
is_hex_digit($2) -> true;
is_hex_digit($3) -> true;
is_hex_digit($4) -> true;
is_hex_digit($5) -> true;
is_hex_digit($6) -> true;
is_hex_digit($7) -> true;
is_hex_digit($8) -> true;
is_hex_digit($9) -> true;
is_hex_digit($A) -> true;
is_hex_digit($B) -> true;
is_hex_digit($C) -> true;
is_hex_digit($D) -> true;
is_hex_digit($E) -> true;
is_hex_digit($F) -> true;
is_hex_digit($a) -> true;
is_hex_digit($b) -> true;
is_hex_digit($c) -> true;
is_hex_digit($d) -> true;
is_hex_digit($e) -> true;
is_hex_digit($f) -> true;
is_hex_digit(_) -> false.
%%------------------------------------------------------------------------------
%% Array Funcs
%%------------------------------------------------------------------------------

View File

@ -736,6 +736,60 @@ t_regex_replace(_) ->
?assertEqual(<<"aebed">>, apply_func(regex_replace, [<<"accbcd">>, <<"c+">>, <<"e">>])),
?assertEqual(<<"a[cc]b[c]d">>, apply_func(regex_replace, [<<"accbcd">>, <<"c+">>, <<"[&]">>])).
t_unescape(_) ->
?assertEqual(<<"\n">>, emqx_rule_funcs:unescape(<<"\\n">>)),
?assertEqual(<<"\t">>, emqx_rule_funcs:unescape(<<"\\t">>)),
?assertEqual(<<"\r">>, emqx_rule_funcs:unescape(<<"\\r">>)),
?assertEqual(<<"\b">>, emqx_rule_funcs:unescape(<<"\\b">>)),
?assertEqual(<<"\f">>, emqx_rule_funcs:unescape(<<"\\f">>)),
?assertEqual(<<"\v">>, emqx_rule_funcs:unescape(<<"\\v">>)),
?assertEqual(<<"'">>, emqx_rule_funcs:unescape(<<"\\'">>)),
?assertEqual(<<"\"">>, emqx_rule_funcs:unescape(<<"\\\"">>)),
?assertEqual(<<"?">>, emqx_rule_funcs:unescape(<<"\\?">>)),
?assertEqual(<<"\a">>, emqx_rule_funcs:unescape(<<"\\a">>)),
% Test escaping backslash itself
?assertEqual(<<"\\">>, emqx_rule_funcs:unescape(<<"\\\\">>)),
% Test a string without any escape sequences
?assertEqual(<<"Hello, World!">>, emqx_rule_funcs:unescape(<<"Hello, World!">>)),
% Test a string with escape sequences
?assertEqual(<<"Hello,\t World\n!">>, emqx_rule_funcs:unescape(<<"Hello,\\t World\\n!">>)),
% Test unrecognized escape sequence (should throw an error)
?assertException(
throw, {unrecognized_escape_sequence, <<$\\, $L>>}, emqx_rule_funcs:unescape(<<"\\L">>)
),
% Test hexadecimal escape sequences
% Newline
?assertEqual(<<"\n">>, emqx_rule_funcs:unescape(<<"\\x0A">>)),
% Newline
?assertEqual(<<"hej\n">>, emqx_rule_funcs:unescape(<<"hej\\x0A">>)),
% Newline
?assertEqual(<<"\nhej">>, emqx_rule_funcs:unescape(<<"\\x0Ahej">>)),
% Newline
?assertEqual(<<"hej\nhej">>, emqx_rule_funcs:unescape(<<"hej\\x0Ahej">>)),
% "ABC"
?assertEqual(<<"ABC">>, emqx_rule_funcs:unescape(<<"\\x41\\x42\\x43">>)),
% "\xFF" = 255 in decimal
?assertEqual(<<"\xFF"/utf8>>, emqx_rule_funcs:unescape(<<"\\xFF">>)),
% "W" = \x57
?assertEqual(<<"Hello, World!">>, emqx_rule_funcs:unescape(<<"Hello, \\x57orld!">>)).
t_unescape_hex(_) ->
?assertEqual(<<"A"/utf8>>, emqx_rule_funcs:unescape(<<"\\x41">>)),
?assertEqual(<<"Hello"/utf8>>, emqx_rule_funcs:unescape(<<"\\x48\\x65\\x6c\\x6c\\x6f">>)),
?assertEqual(<<"A"/utf8>>, emqx_rule_funcs:unescape(<<"\\x0041">>)),
?assertEqual(<<""/utf8>>, emqx_rule_funcs:unescape(<<"\\x20AC">>)),
?assertEqual(<<""/utf8>>, emqx_rule_funcs:unescape(<<"\\x2764">>)),
?assertException(
throw, {unrecognized_escape_sequence, <<"\\x">>}, emqx_rule_funcs:unescape(<<"\\xG1">>)
),
?assertException(
throw, {invalid_unicode_character, _}, emqx_rule_funcs:unescape(<<"\\x11000000">>)
),
?assertEqual(
<<"Hello, 世界"/utf8>>, emqx_rule_funcs:unescape(<<"Hello, \\x00004E16\\x0000754C">>)
).
jq_1_elm_res(JSONString) ->
Bin = list_to_binary(JSONString),
[apply_func(json_decode, [Bin])].

View File

@ -0,0 +1 @@
An `unescape` function has been added to the rule engine SQL language to handle expansion of escape sequences in strings. This addition has been done because string literals in the SQL language don't support any escape codes (e.g., `\n` and `\t`). This enhancement allows for more flexible string manipulation within SQL expressions.