From 627b8c45d2b454f74bea017d132bcedf08d666c5 Mon Sep 17 00:00:00 2001 From: JimMoen Date: Wed, 5 Jun 2024 18:09:24 +0800 Subject: [PATCH] fix(utils_sql): improve insert sql regular expression --- apps/emqx_utils/src/emqx_utils_sql.erl | 31 +++++++++++++++++--------- 1 file changed, 20 insertions(+), 11 deletions(-) diff --git a/apps/emqx_utils/src/emqx_utils_sql.erl b/apps/emqx_utils/src/emqx_utils_sql.erl index ff468d031..958507e3f 100644 --- a/apps/emqx_utils/src/emqx_utils_sql.erl +++ b/apps/emqx_utils/src/emqx_utils_sql.erl @@ -54,17 +54,26 @@ get_statement_type(Query) -> -spec parse_insert(iodata()) -> {ok, {_Statement :: binary(), _Rows :: binary()}} | {error, not_insert_sql}. parse_insert(SQL) -> - case re:split(SQL, "((?i)values)", [{return, binary}]) of - [Part1, _, Part3] -> - case string:trim(Part1, leading) of - <<"insert", _/binary>> = InsertSQL -> - {ok, {InsertSQL, Part3}}; - <<"INSERT", _/binary>> = InsertSQL -> - {ok, {InsertSQL, Part3}}; - _ -> - {error, not_insert_sql} - end; - _ -> + Pattern = << + %% case-insensitive + "(?i)^\\s*", + %% Group-1: insert into, table name and columns (when existed). + %% All space characters suffixed to will be kept + %% `INSERT INTO [(, ..)]` + "(insert\\s+into\\s+[^\\s\\(\\)]+\\s*(?:\\([^\\)]*\\))?)", + %% Keyword: `VALUES` + "\\s*values\\s*", + %% Group-2: literals value(s) or placeholder(s) with round brackets. + %% And the sub-pattern in brackets does not do any capturing + %% `([ | ], ..])` + "(\\((?:[^()]++|(?2))*\\))", + "\\s*$" + >>, + + case re:run(SQL, Pattern, [{capture, all_but_first, binary}]) of + {match, [InsertInto, ValuesTemplate]} -> + {ok, {InsertInto, ValuesTemplate}}; + nomatch -> {error, not_insert_sql} end.