diff --git a/apps/emqx_rule_engine/src/emqx_rule_funcs.erl b/apps/emqx_rule_engine/src/emqx_rule_funcs.erl index 3cebdd67a..c5a083ef4 100644 --- a/apps/emqx_rule_engine/src/emqx_rule_funcs.erl +++ b/apps/emqx_rule_engine/src/emqx_rule_funcs.erl @@ -161,7 +161,8 @@ join_to_string/2, join_to_sql_values_string/1, jq/2, - jq/3 + jq/3, + unescape/1 ]). %% Map Funcs @@ -937,6 +938,138 @@ jq(FilterProgram, JSONBin) -> ]) ). +unescape(Bin) when is_binary(Bin) -> + UnicodeList = unicode:characters_to_list(Bin, utf8), + UnescapedUnicodeList = unescape_string(UnicodeList), + UnescapedUTF8Bin = unicode:characters_to_binary(UnescapedUnicodeList, utf32, utf8), + case UnescapedUTF8Bin of + Out when is_binary(Out) -> + Out; + Error -> + throw({invalid_unicode_character, Error}) + end. + +unescape_string(Input) -> unescape_string(Input, []). + +unescape_string([], Acc) -> + lists:reverse(Acc); +unescape_string([$\\, $\\ | Rest], Acc) -> + unescape_string(Rest, [$\\ | Acc]); +unescape_string([$\\, $n | Rest], Acc) -> + unescape_string(Rest, [$\n | Acc]); +unescape_string([$\\, $t | Rest], Acc) -> + unescape_string(Rest, [$\t | Acc]); +unescape_string([$\\, $r | Rest], Acc) -> + unescape_string(Rest, [$\r | Acc]); +unescape_string([$\\, $b | Rest], Acc) -> + unescape_string(Rest, [$\b | Acc]); +unescape_string([$\\, $f | Rest], Acc) -> + unescape_string(Rest, [$\f | Acc]); +unescape_string([$\\, $v | Rest], Acc) -> + unescape_string(Rest, [$\v | Acc]); +unescape_string([$\\, $' | Rest], Acc) -> + unescape_string(Rest, [$\' | Acc]); +unescape_string([$\\, $" | Rest], Acc) -> + unescape_string(Rest, [$\" | Acc]); +unescape_string([$\\, $? | Rest], Acc) -> + unescape_string(Rest, [$\? | Acc]); +unescape_string([$\\, $a | Rest], Acc) -> + unescape_string(Rest, [$\a | Acc]); +%% Start of HEX escape code +unescape_string([$\\, $x | [$0 | _] = HexStringStart], Acc) -> + unescape_handle_hex_string(HexStringStart, Acc); +unescape_string([$\\, $x | [$1 | _] = HexStringStart], Acc) -> + unescape_handle_hex_string(HexStringStart, Acc); +unescape_string([$\\, $x | [$2 | _] = HexStringStart], Acc) -> + unescape_handle_hex_string(HexStringStart, Acc); +unescape_string([$\\, $x | [$3 | _] = HexStringStart], Acc) -> + unescape_handle_hex_string(HexStringStart, Acc); +unescape_string([$\\, $x | [$4 | _] = HexStringStart], Acc) -> + unescape_handle_hex_string(HexStringStart, Acc); +unescape_string([$\\, $x | [$5 | _] = HexStringStart], Acc) -> + unescape_handle_hex_string(HexStringStart, Acc); +unescape_string([$\\, $x | [$6 | _] = HexStringStart], Acc) -> + unescape_handle_hex_string(HexStringStart, Acc); +unescape_string([$\\, $x | [$7 | _] = HexStringStart], Acc) -> + unescape_handle_hex_string(HexStringStart, Acc); +unescape_string([$\\, $x | [$8 | _] = HexStringStart], Acc) -> + unescape_handle_hex_string(HexStringStart, Acc); +unescape_string([$\\, $x | [$9 | _] = HexStringStart], Acc) -> + unescape_handle_hex_string(HexStringStart, Acc); +unescape_string([$\\, $x | [$A | _] = HexStringStart], Acc) -> + unescape_handle_hex_string(HexStringStart, Acc); +unescape_string([$\\, $x | [$B | _] = HexStringStart], Acc) -> + unescape_handle_hex_string(HexStringStart, Acc); +unescape_string([$\\, $x | [$C | _] = HexStringStart], Acc) -> + unescape_handle_hex_string(HexStringStart, Acc); +unescape_string([$\\, $x | [$D | _] = HexStringStart], Acc) -> + unescape_handle_hex_string(HexStringStart, Acc); +unescape_string([$\\, $x | [$E | _] = HexStringStart], Acc) -> + unescape_handle_hex_string(HexStringStart, Acc); +unescape_string([$\\, $x | [$F | _] = HexStringStart], Acc) -> + unescape_handle_hex_string(HexStringStart, Acc); +unescape_string([$\\, $x | [$a | _] = HexStringStart], Acc) -> + unescape_handle_hex_string(HexStringStart, Acc); +unescape_string([$\\, $x | [$b | _] = HexStringStart], Acc) -> + unescape_handle_hex_string(HexStringStart, Acc); +unescape_string([$\\, $x | [$c | _] = HexStringStart], Acc) -> + unescape_handle_hex_string(HexStringStart, Acc); +unescape_string([$\\, $x | [$d | _] = HexStringStart], Acc) -> + unescape_handle_hex_string(HexStringStart, Acc); +unescape_string([$\\, $x | [$e | _] = HexStringStart], Acc) -> + unescape_handle_hex_string(HexStringStart, Acc); +unescape_string([$\\, $x | [$f | _] = HexStringStart], Acc) -> + unescape_handle_hex_string(HexStringStart, Acc); +%% We treat all other escape sequences as not valid input to leave room for +%% extending the function to support more escape codes +unescape_string([$\\, X | _Rest], _Acc) -> + erlang:throw({unrecognized_escape_sequence, list_to_binary([$\\, X])}); +unescape_string([First | Rest], Acc) -> + unescape_string(Rest, [First | Acc]). + +unescape_handle_hex_string(HexStringStart, Acc) -> + {RemainingString, Num} = parse_hex_string(HexStringStart), + unescape_string(RemainingString, [Num | Acc]). + +parse_hex_string(SeqStartingWithHexDigit) -> + parse_hex_string(SeqStartingWithHexDigit, []). + +parse_hex_string([], Acc) -> + ReversedAcc = lists:reverse(Acc), + {[], list_to_integer(ReversedAcc, 16)}; +parse_hex_string([First | Rest] = String, Acc) -> + case is_hex_digit(First) of + true -> + parse_hex_string(Rest, [First | Acc]); + false -> + ReversedAcc = lists:reverse(Acc), + {String, list_to_integer(ReversedAcc, 16)} + end. + +is_hex_digit($0) -> true; +is_hex_digit($1) -> true; +is_hex_digit($2) -> true; +is_hex_digit($3) -> true; +is_hex_digit($4) -> true; +is_hex_digit($5) -> true; +is_hex_digit($6) -> true; +is_hex_digit($7) -> true; +is_hex_digit($8) -> true; +is_hex_digit($9) -> true; +is_hex_digit($A) -> true; +is_hex_digit($B) -> true; +is_hex_digit($C) -> true; +is_hex_digit($D) -> true; +is_hex_digit($E) -> true; +is_hex_digit($F) -> true; +is_hex_digit($a) -> true; +is_hex_digit($b) -> true; +is_hex_digit($c) -> true; +is_hex_digit($d) -> true; +is_hex_digit($e) -> true; +is_hex_digit($f) -> true; +is_hex_digit(_) -> false. + %%------------------------------------------------------------------------------ %% Array Funcs %%------------------------------------------------------------------------------ diff --git a/apps/emqx_rule_engine/test/emqx_rule_funcs_SUITE.erl b/apps/emqx_rule_engine/test/emqx_rule_funcs_SUITE.erl index 073ac5bf6..b31889ab4 100644 --- a/apps/emqx_rule_engine/test/emqx_rule_funcs_SUITE.erl +++ b/apps/emqx_rule_engine/test/emqx_rule_funcs_SUITE.erl @@ -736,6 +736,60 @@ t_regex_replace(_) -> ?assertEqual(<<"aebed">>, apply_func(regex_replace, [<<"accbcd">>, <<"c+">>, <<"e">>])), ?assertEqual(<<"a[cc]b[c]d">>, apply_func(regex_replace, [<<"accbcd">>, <<"c+">>, <<"[&]">>])). +t_unescape(_) -> + ?assertEqual(<<"\n">>, emqx_rule_funcs:unescape(<<"\\n">>)), + ?assertEqual(<<"\t">>, emqx_rule_funcs:unescape(<<"\\t">>)), + ?assertEqual(<<"\r">>, emqx_rule_funcs:unescape(<<"\\r">>)), + ?assertEqual(<<"\b">>, emqx_rule_funcs:unescape(<<"\\b">>)), + ?assertEqual(<<"\f">>, emqx_rule_funcs:unescape(<<"\\f">>)), + ?assertEqual(<<"\v">>, emqx_rule_funcs:unescape(<<"\\v">>)), + ?assertEqual(<<"'">>, emqx_rule_funcs:unescape(<<"\\'">>)), + ?assertEqual(<<"\"">>, emqx_rule_funcs:unescape(<<"\\\"">>)), + ?assertEqual(<<"?">>, emqx_rule_funcs:unescape(<<"\\?">>)), + ?assertEqual(<<"\a">>, emqx_rule_funcs:unescape(<<"\\a">>)), + % Test escaping backslash itself + ?assertEqual(<<"\\">>, emqx_rule_funcs:unescape(<<"\\\\">>)), + % Test a string without any escape sequences + ?assertEqual(<<"Hello, World!">>, emqx_rule_funcs:unescape(<<"Hello, World!">>)), + % Test a string with escape sequences + ?assertEqual(<<"Hello,\t World\n!">>, emqx_rule_funcs:unescape(<<"Hello,\\t World\\n!">>)), + % Test unrecognized escape sequence (should throw an error) + ?assertException( + throw, {unrecognized_escape_sequence, <<$\\, $L>>}, emqx_rule_funcs:unescape(<<"\\L">>) + ), + % Test hexadecimal escape sequences + + % Newline + ?assertEqual(<<"\n">>, emqx_rule_funcs:unescape(<<"\\x0A">>)), + % Newline + ?assertEqual(<<"hej\n">>, emqx_rule_funcs:unescape(<<"hej\\x0A">>)), + % Newline + ?assertEqual(<<"\nhej">>, emqx_rule_funcs:unescape(<<"\\x0Ahej">>)), + % Newline + ?assertEqual(<<"hej\nhej">>, emqx_rule_funcs:unescape(<<"hej\\x0Ahej">>)), + % "ABC" + ?assertEqual(<<"ABC">>, emqx_rule_funcs:unescape(<<"\\x41\\x42\\x43">>)), + % "\xFF" = 255 in decimal + ?assertEqual(<<"\xFF"/utf8>>, emqx_rule_funcs:unescape(<<"\\xFF">>)), + % "W" = \x57 + ?assertEqual(<<"Hello, World!">>, emqx_rule_funcs:unescape(<<"Hello, \\x57orld!">>)). + +t_unescape_hex(_) -> + ?assertEqual(<<"A"/utf8>>, emqx_rule_funcs:unescape(<<"\\x41">>)), + ?assertEqual(<<"Hello"/utf8>>, emqx_rule_funcs:unescape(<<"\\x48\\x65\\x6c\\x6c\\x6f">>)), + ?assertEqual(<<"A"/utf8>>, emqx_rule_funcs:unescape(<<"\\x0041">>)), + ?assertEqual(<<"€"/utf8>>, emqx_rule_funcs:unescape(<<"\\x20AC">>)), + ?assertEqual(<<"❤"/utf8>>, emqx_rule_funcs:unescape(<<"\\x2764">>)), + ?assertException( + throw, {unrecognized_escape_sequence, <<"\\x">>}, emqx_rule_funcs:unescape(<<"\\xG1">>) + ), + ?assertException( + throw, {invalid_unicode_character, _}, emqx_rule_funcs:unescape(<<"\\x11000000">>) + ), + ?assertEqual( + <<"Hello, 世界"/utf8>>, emqx_rule_funcs:unescape(<<"Hello, \\x00004E16\\x0000754C">>) + ). + jq_1_elm_res(JSONString) -> Bin = list_to_binary(JSONString), [apply_func(json_decode, [Bin])]. diff --git a/changes/ce/feat-12671.en.md b/changes/ce/feat-12671.en.md new file mode 100644 index 000000000..d49f11269 --- /dev/null +++ b/changes/ce/feat-12671.en.md @@ -0,0 +1 @@ +An `unescape` function has been added to the rule engine SQL language to handle expansion of escape sequences in strings. This addition has been done because string literals in the SQL language don't support any escape codes (e.g., `\n` and `\t`). This enhancement allows for more flexible string manipulation within SQL expressions.