perf(utils_sql): pre-build regexp

This commit is contained in:
JimMoen 2024-06-06 11:01:38 +08:00
parent 6bf3492eb4
commit dff31b293c
No known key found for this signature in database
1 changed files with 38 additions and 17 deletions

View File

@ -31,8 +31,44 @@
-type statement_type() :: select | insert | delete | update. -type statement_type() :: select | insert | delete | update.
-type value() :: null | binary() | number() | boolean() | [value()]. -type value() :: null | binary() | number() | boolean() | [value()].
-define(INSERT_RE_MP_KEY, insert_re_mp).
-define(INSERT_RE_BIN, <<
%% case-insensitive
"(?i)^\\s*",
%% Group-1: insert into, table name and columns (when existed).
%% All space characters suffixed to <TABLE_NAME> will be kept
%% `INSERT INTO <TABLE_NAME> [(<COLUMN>, ..)]`
"(insert\\s+into\\s+[^\\s\\(\\)]+\\s*(?:\\([^\\)]*\\))?)",
%% Keyword: `VALUES`
"\\s*values\\s*",
%% Group-2: literals value(s) or placeholder(s) with round brackets.
%% And the sub-pattern in brackets does not do any capturing
%% `([<VALUE> | <PLACEHOLDER>], ..])`
"(\\((?:[^()]++|(?2))*\\))",
"\\s*$"
>>).
-dialyzer({no_improper_lists, [escape_mysql/4, escape_prepend/4]}). -dialyzer({no_improper_lists, [escape_mysql/4, escape_prepend/4]}).
-on_load(put_insert_mp/0).
put_insert_mp() ->
persistent_term:put({?MODULE, ?INSERT_RE_MP_KEY}, re:compile(?INSERT_RE_BIN)),
ok.
%% The type Copied from stdlib/src/re.erl to compatibility with OTP 26
%% Since `re:mp()` exported after OTP 27
-type mp() :: {re_pattern, _, _, _, _}.
-spec get_insert_mp() -> {ok, mp()}.
get_insert_mp() ->
case persistent_term:get({?MODULE, ?INSERT_RE_MP_KEY}, undefined) of
undefined ->
ok = put_insert_mp(),
get_insert_mp();
{ok, MP} ->
{ok, MP}
end.
-spec get_statement_type(iodata()) -> statement_type() | {error, unknown}. -spec get_statement_type(iodata()) -> statement_type() | {error, unknown}.
get_statement_type(Query) -> get_statement_type(Query) ->
KnownTypes = #{ KnownTypes = #{
@ -54,23 +90,8 @@ get_statement_type(Query) ->
-spec parse_insert(iodata()) -> -spec parse_insert(iodata()) ->
{ok, {_Statement :: binary(), _Rows :: binary()}} | {error, not_insert_sql}. {ok, {_Statement :: binary(), _Rows :: binary()}} | {error, not_insert_sql}.
parse_insert(SQL) -> parse_insert(SQL) ->
Pattern = << {ok, MP} = get_insert_mp(),
%% case-insensitive case re:run(SQL, MP, [{capture, all_but_first, binary}]) of
"(?i)^\\s*",
%% Group-1: insert into, table name and columns (when existed).
%% All space characters suffixed to <TABLE_NAME> will be kept
%% `INSERT INTO <TABLE_NAME> [(<COLUMN>, ..)]`
"(insert\\s+into\\s+[^\\s\\(\\)]+\\s*(?:\\([^\\)]*\\))?)",
%% Keyword: `VALUES`
"\\s*values\\s*",
%% Group-2: literals value(s) or placeholder(s) with round brackets.
%% And the sub-pattern in brackets does not do any capturing
%% `([<VALUE> | <PLACEHOLDER>], ..])`
"(\\((?:[^()]++|(?2))*\\))",
"\\s*$"
>>,
case re:run(SQL, Pattern, [{capture, all_but_first, binary}]) of
{match, [InsertInto, ValuesTemplate]} -> {match, [InsertInto, ValuesTemplate]} ->
{ok, {InsertInto, ValuesTemplate}}; {ok, {InsertInto, ValuesTemplate}};
nomatch -> nomatch ->