From 397c104a850c68dabad56f16ad78de498a016e83 Mon Sep 17 00:00:00 2001 From: firest Date: Fri, 26 Jul 2024 16:56:47 +0800 Subject: [PATCH 01/30] feat(exclusive): added CLI interface for exclusive topics --- apps/emqx_management/src/emqx_mgmt_cli.erl | 24 +++++++++++++++++-- .../test/emqx_mgmt_cli_SUITE.erl | 5 ++++ 2 files changed, 27 insertions(+), 2 deletions(-) diff --git a/apps/emqx_management/src/emqx_mgmt_cli.erl b/apps/emqx_management/src/emqx_mgmt_cli.erl index 2742167f5..035af188f 100644 --- a/apps/emqx_management/src/emqx_mgmt_cli.erl +++ b/apps/emqx_management/src/emqx_mgmt_cli.erl @@ -23,6 +23,7 @@ -include_lib("emqx/include/logger.hrl"). -define(DATA_BACKUP_OPTS, #{print_fun => fun emqx_ctl:print/2}). +-define(EXCLUSIVE_TAB, emqx_exclusive_subscription). -export([load/0]). @@ -45,7 +46,8 @@ olp/1, data/1, ds/1, - cluster_info/0 + cluster_info/0, + exclusive/1 ]). -spec load() -> ok. @@ -1024,7 +1026,9 @@ print({?SUBOPTION, {{Topic, Pid}, Options}}) when is_pid(Pid) -> NL = maps:get(nl, Options, 0), RH = maps:get(rh, Options, 0), RAP = maps:get(rap, Options, 0), - emqx_ctl:print("~ts -> topic:~ts qos:~p nl:~p rh:~p rap:~p~n", [SubId, Topic, QoS, NL, RH, RAP]). + emqx_ctl:print("~ts -> topic:~ts qos:~p nl:~p rh:~p rap:~p~n", [SubId, Topic, QoS, NL, RH, RAP]); +print({exclusive, {exclusive_subscription, Topic, ClientId}}) -> + emqx_ctl:print("topic:~ts -> ClientId:~ts~n", [Topic, ClientId]). format(_, undefined) -> undefined; @@ -1085,3 +1089,19 @@ safe_call_mria(Fun, Args, OnFail) -> }), OnFail end. +%%-------------------------------------------------------------------- +%% @doc Exclusive topics +exclusive(["list"]) -> + case ets:info(?EXCLUSIVE_TAB, size) of + 0 -> emqx_ctl:print("No topics.~n"); + _ -> dump(?EXCLUSIVE_TAB, exclusive) + end; +exclusive(["delete", Topic0]) -> + Topic = erlang:iolist_to_binary(Topic0), + emqx_exclusive_subscription:unsubscribe(Topic, #{is_exclusive => true}), + emqx_ctl:print("ok~n"); +exclusive(_) -> + emqx_ctl:usage([ + {"exclusive list", "List all exclusive topics"}, + {"exclusive delete ", "Delete an exclusive topic"} + ]). diff --git a/apps/emqx_management/test/emqx_mgmt_cli_SUITE.erl b/apps/emqx_management/test/emqx_mgmt_cli_SUITE.erl index cb3451fc1..f3757d7ec 100644 --- a/apps/emqx_management/test/emqx_mgmt_cli_SUITE.erl +++ b/apps/emqx_management/test/emqx_mgmt_cli_SUITE.erl @@ -354,4 +354,9 @@ t_autocluster_leave(Config) -> ) ). +t_exclusive(_Config) -> + emqx_ctl:run_command(["exclusive", "list"]), + emqx_ctl:run_command(["exclusive", "delete", "t/1"]), + ok. + format(Str, Opts) -> io:format("str:~s: Opts:~p", [Str, Opts]). From dc342a35ac7becf1f020f913d6ce7e8c812ec180 Mon Sep 17 00:00:00 2001 From: firest Date: Fri, 26 Jul 2024 17:18:52 +0800 Subject: [PATCH 02/30] chore: update changes --- changes/ce/feat-13524.en.md | 1 + 1 file changed, 1 insertion(+) create mode 100644 changes/ce/feat-13524.en.md diff --git a/changes/ce/feat-13524.en.md b/changes/ce/feat-13524.en.md new file mode 100644 index 000000000..efe21104f --- /dev/null +++ b/changes/ce/feat-13524.en.md @@ -0,0 +1 @@ +Added CLI interface `emqx ctl exclusive` for the feature exclusive topics. From d7cac74bedd2ae8fa1cbb039d48699e0be998ac2 Mon Sep 17 00:00:00 2001 From: JimMoen Date: Mon, 29 Jul 2024 17:24:01 +0800 Subject: [PATCH 03/30] feat: add authz skipped trace --- apps/emqx_auth/src/emqx_authz/emqx_authz.erl | 7 ++++++- changes/ce/feat-13534.en.md | 1 + 2 files changed, 7 insertions(+), 1 deletion(-) create mode 100644 changes/ce/feat-13534.en.md diff --git a/apps/emqx_auth/src/emqx_authz/emqx_authz.erl b/apps/emqx_auth/src/emqx_authz/emqx_authz.erl index 8bc60a600..7ca9748a2 100644 --- a/apps/emqx_auth/src/emqx_authz/emqx_authz.erl +++ b/apps/emqx_auth/src/emqx_authz/emqx_authz.erl @@ -477,9 +477,14 @@ authorize_deny( sources() ) -> authz_result(). -authorize(Client, PubSub, Topic, _DefaultResult, Sources) -> +authorize(#{username := Username} = Client, PubSub, Topic, _DefaultResult, Sources) -> case maps:get(is_superuser, Client, false) of true -> + ?TRACE("AUTHZ", "authorization_skipped_as_superuser", #{ + username => Username, + topic => Topic, + action => emqx_access_control:format_action(PubSub) + }), emqx_metrics:inc(?METRIC_SUPERUSER), {stop, #{result => allow, from => superuser}}; false -> diff --git a/changes/ce/feat-13534.en.md b/changes/ce/feat-13534.en.md new file mode 100644 index 000000000..5c5af0bf5 --- /dev/null +++ b/changes/ce/feat-13534.en.md @@ -0,0 +1 @@ +Add trace logging when superuser skipped authz check. From 5ddd7d7a6a21a546d6e95aff660e4b6e5e5c1acd Mon Sep 17 00:00:00 2001 From: JimMoen Date: Mon, 29 Jul 2024 17:24:16 +0800 Subject: [PATCH 04/30] test: superuser skipped all authz check --- apps/emqx_auth/src/emqx_authz/emqx_authz.erl | 1 + .../test/emqx_authz/emqx_authz_SUITE.erl | 72 +++++++++++++++++++ 2 files changed, 73 insertions(+) diff --git a/apps/emqx_auth/src/emqx_authz/emqx_authz.erl b/apps/emqx_auth/src/emqx_authz/emqx_authz.erl index 7ca9748a2..e76d52535 100644 --- a/apps/emqx_auth/src/emqx_authz/emqx_authz.erl +++ b/apps/emqx_auth/src/emqx_authz/emqx_authz.erl @@ -480,6 +480,7 @@ authorize_deny( authorize(#{username := Username} = Client, PubSub, Topic, _DefaultResult, Sources) -> case maps:get(is_superuser, Client, false) of true -> + ?tp(authz_skipped, #{reason => client_is_superuser, action => PubSub}), ?TRACE("AUTHZ", "authorization_skipped_as_superuser", #{ username => Username, topic => Topic, diff --git a/apps/emqx_auth/test/emqx_authz/emqx_authz_SUITE.erl b/apps/emqx_auth/test/emqx_authz/emqx_authz_SUITE.erl index 575eb4109..4745d7ec6 100644 --- a/apps/emqx_auth/test/emqx_authz/emqx_authz_SUITE.erl +++ b/apps/emqx_auth/test/emqx_authz/emqx_authz_SUITE.erl @@ -674,5 +674,77 @@ t_publish_last_will_testament_banned_client_connecting(_Config) -> ok. +t_sikpped_as_superuser(_Config) -> + ClientInfo = #{ + clientid => <<"clientid">>, + username => <<"username">>, + peerhost => {127, 0, 0, 1}, + zone => default, + listener => {tcp, default}, + is_superuser => true + }, + ?check_trace( + begin + ?assertEqual( + allow, + emqx_access_control:authorize(ClientInfo, ?AUTHZ_PUBLISH(?QOS_0), <<"p/t/0">>) + ), + ?assertEqual( + allow, + emqx_access_control:authorize(ClientInfo, ?AUTHZ_PUBLISH(?QOS_1), <<"p/t/1">>) + ), + ?assertEqual( + allow, + emqx_access_control:authorize(ClientInfo, ?AUTHZ_PUBLISH(?QOS_2), <<"p/t/2">>) + ), + ?assertEqual( + allow, + emqx_access_control:authorize(ClientInfo, ?AUTHZ_SUBSCRIBE(?QOS_0), <<"s/t/0">>) + ), + ?assertEqual( + allow, + emqx_access_control:authorize(ClientInfo, ?AUTHZ_SUBSCRIBE(?QOS_1), <<"s/t/1">>) + ), + ?assertEqual( + allow, + emqx_access_control:authorize(ClientInfo, ?AUTHZ_SUBSCRIBE(?QOS_2), <<"s/t/2">>) + ) + end, + fun(Trace) -> + ?assertMatch( + [ + #{ + reason := client_is_superuser, + action := #{qos := ?QOS_0, action_type := publish} + }, + #{ + reason := client_is_superuser, + action := #{qos := ?QOS_1, action_type := publish} + }, + #{ + reason := client_is_superuser, + action := #{qos := ?QOS_2, action_type := publish} + }, + #{ + reason := client_is_superuser, + action := #{qos := ?QOS_0, action_type := subscribe} + }, + #{ + reason := client_is_superuser, + action := #{qos := ?QOS_1, action_type := subscribe} + }, + #{ + reason := client_is_superuser, + action := #{qos := ?QOS_2, action_type := subscribe} + } + ], + ?of_kind(authz_skipped, Trace) + ), + ok + end + ), + + ok = snabbkaffe:stop(). + stop_apps(Apps) -> lists:foreach(fun application:stop/1, Apps). From 9f97bff7d0f83b58d7b6ac71c1458d48e0a480e4 Mon Sep 17 00:00:00 2001 From: Thales Macedo Garitezi Date: Tue, 30 Jul 2024 18:21:53 -0300 Subject: [PATCH 05/30] feat: expose `resource_opts.query_mode` for pulsar action Fixes https://emqx.atlassian.net/browse/EMQX-12782 --- .../test/emqx_bridge_v2_testlib.erl | 5 +- .../src/emqx_bridge_pulsar.app.src | 2 +- .../src/emqx_bridge_pulsar_action_info.erl | 28 ++- .../src/emqx_bridge_pulsar_connector.erl | 26 ++- .../src/emqx_bridge_pulsar_pubsub_schema.erl | 4 +- .../emqx_bridge_pulsar_connector_SUITE.erl | 9 +- .../test/emqx_bridge_pulsar_v2_SUITE.erl | 189 +++++++++++------- .../src/emqx_resource_buffer_worker.erl | 12 +- changes/ee/feat-13546.en.md | 1 + 9 files changed, 182 insertions(+), 94 deletions(-) create mode 100644 changes/ee/feat-13546.en.md diff --git a/apps/emqx_bridge/test/emqx_bridge_v2_testlib.erl b/apps/emqx_bridge/test/emqx_bridge_v2_testlib.erl index d98f4f926..46d1883bd 100644 --- a/apps/emqx_bridge/test/emqx_bridge_v2_testlib.erl +++ b/apps/emqx_bridge/test/emqx_bridge_v2_testlib.erl @@ -889,7 +889,8 @@ t_sync_query_down(Config, Opts) -> ), ?force_ordering( - #{?snk_kind := call_query}, + #{?snk_kind := SNKKind} when + SNKKind =:= call_query orelse SNKKind =:= simple_query_enter, #{?snk_kind := cut_connection, ?snk_span := start} ), %% Note: order of arguments here is reversed compared to `?force_ordering'. @@ -913,6 +914,7 @@ t_sync_query_down(Config, Opts) -> emqx_common_test_helpers:enable_failure(down, ProxyName, ProxyHost, ProxyPort) ) end), + ?tp("publishing_message", #{}), try {_, {ok, _}} = snabbkaffe:wait_async_action( @@ -921,6 +923,7 @@ t_sync_query_down(Config, Opts) -> infinity ) after + ?tp("healing_failure", #{}), emqx_common_test_helpers:heal_failure(down, ProxyName, ProxyHost, ProxyPort) end, {ok, _} = snabbkaffe:block_until(SuccessTPFilter, infinity), diff --git a/apps/emqx_bridge_pulsar/src/emqx_bridge_pulsar.app.src b/apps/emqx_bridge_pulsar/src/emqx_bridge_pulsar.app.src index a8eeba483..dcb86a3ca 100644 --- a/apps/emqx_bridge_pulsar/src/emqx_bridge_pulsar.app.src +++ b/apps/emqx_bridge_pulsar/src/emqx_bridge_pulsar.app.src @@ -1,6 +1,6 @@ {application, emqx_bridge_pulsar, [ {description, "EMQX Pulsar Bridge"}, - {vsn, "0.2.2"}, + {vsn, "0.2.3"}, {registered, []}, {applications, [ kernel, diff --git a/apps/emqx_bridge_pulsar/src/emqx_bridge_pulsar_action_info.erl b/apps/emqx_bridge_pulsar/src/emqx_bridge_pulsar_action_info.erl index 6d15687f6..fb9a38cc6 100644 --- a/apps/emqx_bridge_pulsar/src/emqx_bridge_pulsar_action_info.erl +++ b/apps/emqx_bridge_pulsar/src/emqx_bridge_pulsar_action_info.erl @@ -11,7 +11,8 @@ action_type_name/0, connector_type_name/0, schema_module/0, - is_action/1 + is_action/1, + connector_action_config_to_bridge_v1_config/2 ]). is_action(_) -> true. @@ -23,3 +24,28 @@ action_type_name() -> pulsar. connector_type_name() -> pulsar. schema_module() -> emqx_bridge_pulsar_pubsub_schema. + +connector_action_config_to_bridge_v1_config(ConnectorConfig, ActionConfig) -> + BridgeV1Config1 = emqx_action_info:connector_action_config_to_bridge_v1_config( + ConnectorConfig, ActionConfig + ), + BridgeV1Config = maps:with(v1_fields(pulsar_producer), BridgeV1Config1), + emqx_utils_maps:update_if_present( + <<"resource_opts">>, + fun(RO) -> maps:with(v1_fields(producer_resource_opts), RO) end, + BridgeV1Config + ). + +%%------------------------------------------------------------------------------------------ +%% Internal helper functions +%%------------------------------------------------------------------------------------------ + +v1_fields(Struct) -> + [ + to_bin(K) + || {K, _} <- emqx_bridge_pulsar:fields(Struct) + ]. + +to_bin(B) when is_binary(B) -> B; +to_bin(L) when is_list(L) -> list_to_binary(L); +to_bin(A) when is_atom(A) -> atom_to_binary(A, utf8). diff --git a/apps/emqx_bridge_pulsar/src/emqx_bridge_pulsar_connector.erl b/apps/emqx_bridge_pulsar/src/emqx_bridge_pulsar_connector.erl index 9d269493d..c98dd19ed 100644 --- a/apps/emqx_bridge_pulsar/src/emqx_bridge_pulsar_connector.erl +++ b/apps/emqx_bridge_pulsar/src/emqx_bridge_pulsar_connector.erl @@ -58,6 +58,8 @@ callback_mode() -> async_if_possible. +query_mode(#{resource_opts := #{query_mode := sync}}) -> + simple_sync_internal_buffer; query_mode(_Config) -> simple_async_internal_buffer. @@ -202,12 +204,17 @@ on_query(_InstanceId, {ChannelId, Message}, State) -> sync_timeout => SyncTimeout, is_async => false }), - try - pulsar:send_sync(Producers, [PulsarMessage], SyncTimeout) - catch - error:timeout -> - {error, timeout} - end + ?tp_span( + "pulsar_producer_query_enter", + #{instance_id => _InstanceId, message => Message, mode => sync}, + try + ?tp("pulsar_producer_send", #{msg => PulsarMessage, mode => sync}), + pulsar:send_sync(Producers, [PulsarMessage], SyncTimeout) + catch + error:timeout -> + {error, timeout} + end + ) end. -spec on_query_async( @@ -218,11 +225,11 @@ on_query_async(_InstanceId, {ChannelId, Message}, AsyncReplyFn, State) -> #{channels := Channels} = State, case maps:find(ChannelId, Channels) of error -> - {error, channel_not_found}; + {error, {unrecoverable_error, channel_not_found}}; {ok, #{message := MessageTmpl, producers := Producers}} -> ?tp_span( - pulsar_producer_on_query_async, - #{instance_id => _InstanceId, message => Message}, + "pulsar_producer_query_enter", + #{instance_id => _InstanceId, message => Message, mode => async}, on_query_async2(ChannelId, Producers, Message, MessageTmpl, AsyncReplyFn) ) end. @@ -233,6 +240,7 @@ on_query_async2(ChannelId, Producers, Message, MessageTmpl, AsyncReplyFn) -> message => PulsarMessage, is_async => true }), + ?tp("pulsar_producer_send", #{msg => PulsarMessage, mode => async}), pulsar:send(Producers, [PulsarMessage], #{callback_fn => AsyncReplyFn}). on_format_query_result({ok, Info}) -> diff --git a/apps/emqx_bridge_pulsar/src/emqx_bridge_pulsar_pubsub_schema.erl b/apps/emqx_bridge_pulsar/src/emqx_bridge_pulsar_pubsub_schema.erl index dff62843e..515fcdb5a 100644 --- a/apps/emqx_bridge_pulsar/src/emqx_bridge_pulsar_pubsub_schema.erl +++ b/apps/emqx_bridge_pulsar/src/emqx_bridge_pulsar_pubsub_schema.erl @@ -66,10 +66,8 @@ fields(action_resource_opts) -> batch_size, batch_time, worker_pool_size, - request_ttl, inflight_window, - max_buffer_bytes, - query_mode + max_buffer_bytes ], lists:filter( fun({K, _V}) -> not lists:member(K, UnsupportedOpts) end, diff --git a/apps/emqx_bridge_pulsar/test/emqx_bridge_pulsar_connector_SUITE.erl b/apps/emqx_bridge_pulsar/test/emqx_bridge_pulsar_connector_SUITE.erl index cd54e2194..0a908f5be 100644 --- a/apps/emqx_bridge_pulsar/test/emqx_bridge_pulsar_connector_SUITE.erl +++ b/apps/emqx_bridge_pulsar/test/emqx_bridge_pulsar_connector_SUITE.erl @@ -843,7 +843,8 @@ do_t_send_with_failure(Config, FailureType) -> ?wait_async_action( emqx:publish(Message0), #{ - ?snk_kind := pulsar_producer_on_query_async, + ?snk_kind := "pulsar_producer_query_enter", + mode := async, ?snk_span := {complete, _} }, 5_000 @@ -970,7 +971,11 @@ t_producer_process_crash(Config) -> {_, {ok, _}} = ?wait_async_action( emqx:publish(Message0), - #{?snk_kind := pulsar_producer_on_query_async, ?snk_span := {complete, _}}, + #{ + ?snk_kind := "pulsar_producer_query_enter", + mode := async, + ?snk_span := {complete, _} + }, 5_000 ), Data0 = receive_consumed(20_000), diff --git a/apps/emqx_bridge_pulsar/test/emqx_bridge_pulsar_v2_SUITE.erl b/apps/emqx_bridge_pulsar/test/emqx_bridge_pulsar_v2_SUITE.erl index 11caa15c6..94534fafd 100644 --- a/apps/emqx_bridge_pulsar/test/emqx_bridge_pulsar_v2_SUITE.erl +++ b/apps/emqx_bridge_pulsar/test/emqx_bridge_pulsar_v2_SUITE.erl @@ -23,31 +23,25 @@ %%------------------------------------------------------------------------------ all() -> - [ - {group, plain}, - {group, tls} - ]. + All0 = emqx_common_test_helpers:all(?MODULE), + All = All0 -- matrix_cases(), + Groups = lists:map(fun({G, _, _}) -> {group, G} end, groups()), + Groups ++ All. groups() -> - AllTCs = emqx_common_test_helpers:all(?MODULE), - [ - {plain, AllTCs}, - {tls, AllTCs} - ]. + emqx_common_test_helpers:matrix_to_groups(?MODULE, matrix_cases()). + +matrix_cases() -> + emqx_common_test_helpers:all(?MODULE). init_per_suite(Config) -> - %% Ensure enterprise bridge module is loaded - _ = emqx_bridge_enterprise:module_info(), - {ok, Cwd} = file:get_cwd(), - PrivDir = ?config(priv_dir, Config), - WorkDir = emqx_utils_fs:find_relpath(filename:join(PrivDir, "ebp"), Cwd), Apps = emqx_cth_suite:start( lists:flatten([ ?APPS, emqx_management, emqx_mgmt_api_test_util:emqx_dashboard() ]), - #{work_dir => WorkDir} + #{work_dir => emqx_cth_suite:work_dir(Config)} ), [{suite_apps, Apps} | Config]. @@ -61,6 +55,7 @@ init_per_group(plain = Type, Config) -> case emqx_common_test_helpers:is_tcp_server_available(PulsarHost, PulsarPort) of true -> Config1 = common_init_per_group(), + ConnectorName = ?MODULE, NewConfig = [ {proxy_name, ProxyName}, @@ -70,7 +65,7 @@ init_per_group(plain = Type, Config) -> {use_tls, false} | Config1 ++ Config ], - create_connector(?MODULE, NewConfig), + create_connector(ConnectorName, NewConfig), NewConfig; false -> maybe_skip_without_ci() @@ -82,6 +77,7 @@ init_per_group(tls = Type, Config) -> case emqx_common_test_helpers:is_tcp_server_available(PulsarHost, PulsarPort) of true -> Config1 = common_init_per_group(), + ConnectorName = ?MODULE, NewConfig = [ {proxy_name, ProxyName}, @@ -91,17 +87,21 @@ init_per_group(tls = Type, Config) -> {use_tls, true} | Config1 ++ Config ], - create_connector(?MODULE, NewConfig), + create_connector(ConnectorName, NewConfig), NewConfig; false -> maybe_skip_without_ci() - end. + end; +init_per_group(_Group, Config) -> + Config. end_per_group(Group, Config) when Group =:= plain; Group =:= tls -> common_end_per_group(Config), + ok; +end_per_group(_Group, _Config) -> ok. common_init_per_group() -> @@ -189,66 +189,49 @@ pulsar_connector(Config) -> ":", integer_to_binary(PulsarPort) ]), - Connector = #{ - <<"connectors">> => #{ - <<"pulsar">> => #{ - Name => #{ - <<"enable">> => true, - <<"ssl">> => #{ - <<"enable">> => UseTLS, - <<"verify">> => <<"verify_none">>, - <<"server_name_indication">> => <<"auto">> - }, - <<"authentication">> => <<"none">>, - <<"servers">> => ServerURL - } - } - } + InnerConfigMap = #{ + <<"enable">> => true, + <<"ssl">> => #{ + <<"enable">> => UseTLS, + <<"verify">> => <<"verify_none">>, + <<"server_name_indication">> => <<"auto">> + }, + <<"authentication">> => <<"none">>, + <<"servers">> => ServerURL }, - parse_and_check(<<"connectors">>, emqx_connector_schema, Connector, Name). + emqx_bridge_v2_testlib:parse_and_check_connector(?TYPE, Name, InnerConfigMap). pulsar_action(Config) -> + QueryMode = proplists:get_value(query_mode, Config, <<"sync">>), Name = atom_to_binary(?MODULE), - Action = #{ - <<"actions">> => #{ - <<"pulsar">> => #{ - Name => #{ - <<"connector">> => Name, - <<"enable">> => true, - <<"parameters">> => #{ - <<"retention_period">> => <<"infinity">>, - <<"max_batch_bytes">> => <<"1MB">>, - <<"batch_size">> => 100, - <<"strategy">> => <<"random">>, - <<"buffer">> => #{ - <<"mode">> => <<"memory">>, - <<"per_partition_limit">> => <<"10MB">>, - <<"segment_bytes">> => <<"5MB">>, - <<"memory_overload_protection">> => true - }, - <<"message">> => #{ - <<"key">> => <<"${.clientid}">>, - <<"value">> => <<"${.}">> - }, - <<"pulsar_topic">> => ?config(pulsar_topic, Config) - }, - <<"resource_opts">> => #{ - <<"health_check_interval">> => <<"1s">>, - <<"metrics_flush_interval">> => <<"300ms">> - } - } - } + InnerConfigMap = #{ + <<"connector">> => Name, + <<"enable">> => true, + <<"parameters">> => #{ + <<"retention_period">> => <<"infinity">>, + <<"max_batch_bytes">> => <<"1MB">>, + <<"batch_size">> => 100, + <<"strategy">> => <<"random">>, + <<"buffer">> => #{ + <<"mode">> => <<"memory">>, + <<"per_partition_limit">> => <<"10MB">>, + <<"segment_bytes">> => <<"5MB">>, + <<"memory_overload_protection">> => true + }, + <<"message">> => #{ + <<"key">> => <<"${.clientid}">>, + <<"value">> => <<"${.}">> + }, + <<"pulsar_topic">> => ?config(pulsar_topic, Config) + }, + <<"resource_opts">> => #{ + <<"query_mode">> => QueryMode, + <<"request_ttl">> => <<"1s">>, + <<"health_check_interval">> => <<"1s">>, + <<"metrics_flush_interval">> => <<"300ms">> } }, - parse_and_check(<<"actions">>, emqx_bridge_v2_schema, Action, Name). - -parse_and_check(Key, Mod, Conf, Name) -> - ConfStr = hocon_pp:do(Conf, #{}), - ct:pal(ConfStr), - {ok, RawConf} = hocon:binary(ConfStr, #{format => map}), - hocon_tconf:check_plain(Mod, RawConf, #{required => false, atom_key => false}), - #{Key := #{<<"pulsar">> := #{Name := RetConf}}} = RawConf, - RetConf. + emqx_bridge_v2_testlib:parse_and_check(action, ?TYPE, Name, InnerConfigMap). instance_id(Type, Name) -> ConnectorId = emqx_bridge_resource:resource_id(Type, ?TYPE, Name), @@ -404,20 +387,44 @@ assert_status_api(Line, Type, Name, Status) -> ). -define(assertStatusAPI(TYPE, NAME, STATUS), assert_status_api(?LINE, TYPE, NAME, STATUS)). +proplists_with(Keys, PList) -> + lists:filter(fun({K, _}) -> lists:member(K, Keys) end, PList). + +group_path(Config) -> + case emqx_common_test_helpers:group_path(Config) of + [] -> + undefined; + Path -> + Path + end. + %%------------------------------------------------------------------------------ %% Testcases %%------------------------------------------------------------------------------ -t_action_probe(Config) -> +t_action_probe(matrix) -> + [[plain], [tls]]; +t_action_probe(Config) when is_list(Config) -> Name = atom_to_binary(?FUNCTION_NAME), Action = pulsar_action(Config), {ok, Res0} = emqx_bridge_v2_testlib:probe_bridge_api(action, ?TYPE, Name, Action), ?assertMatch({{_, 204, _}, _, _}, Res0), ok. -t_action(Config) -> +t_action(matrix) -> + [ + [plain, async], + [plain, sync], + [tls, async] + ]; +t_action(Config) when is_list(Config) -> + QueryMode = + case group_path(Config) of + [_, QM | _] -> atom_to_binary(QM); + _ -> <<"async">> + end, Name = atom_to_binary(?FUNCTION_NAME), - create_action(Name, Config), + create_action(Name, [{query_mode, QueryMode} | Config]), Actions = emqx_bridge_v2:list(actions), Any = fun(#{name := BName}) -> BName =:= Name end, ?assert(lists:any(Any, Actions), Actions), @@ -465,7 +472,9 @@ t_action(Config) -> %% Tests that deleting/disabling an action that share the same Pulsar topic with other %% actions do not disturb the latter. -t_multiple_actions_sharing_topic(Config) -> +t_multiple_actions_sharing_topic(matrix) -> + [[plain], [tls]]; +t_multiple_actions_sharing_topic(Config) when is_list(Config) -> Type = ?TYPE, ConnectorName = <<"c">>, ConnectorConfig = pulsar_connector(Config), @@ -546,3 +555,31 @@ t_multiple_actions_sharing_topic(Config) -> [] ), ok. + +t_sync_query_down(matrix) -> + [[plain]]; +t_sync_query_down(Config0) when is_list(Config0) -> + ct:timetrap({seconds, 15}), + Payload = #{<<"x">> => <<"some data">>}, + PayloadBin = emqx_utils_json:encode(Payload), + ClientId = <<"some_client">>, + Opts = #{ + make_message_fn => fun(Topic) -> emqx_message:make(ClientId, Topic, PayloadBin) end, + enter_tp_filter => + ?match_event(#{?snk_kind := "pulsar_producer_send"}), + error_tp_filter => + ?match_event(#{?snk_kind := "resource_simple_sync_internal_buffer_query_timeout"}), + success_tp_filter => + ?match_event(#{?snk_kind := pulsar_echo_consumer_message}) + }, + Config = [ + {connector_type, ?TYPE}, + {connector_name, ?FUNCTION_NAME}, + {connector_config, pulsar_connector(Config0)}, + {action_type, ?TYPE}, + {action_name, ?FUNCTION_NAME}, + {action_config, pulsar_action(Config0)} + | proplists_with([proxy_name, proxy_host, proxy_port], Config0) + ], + emqx_bridge_v2_testlib:t_sync_query_down(Config, Opts), + ok. diff --git a/apps/emqx_resource/src/emqx_resource_buffer_worker.erl b/apps/emqx_resource/src/emqx_resource_buffer_worker.erl index 516795f39..8eb7b373d 100644 --- a/apps/emqx_resource/src/emqx_resource_buffer_worker.erl +++ b/apps/emqx_resource/src/emqx_resource_buffer_worker.erl @@ -198,6 +198,9 @@ simple_sync_internal_buffer_query(Id, Request, QueryOpts0) -> QueryOpts = #{timeout := Timeout} = maps:merge(simple_query_opts(), QueryOpts1), case simple_async_query(Id, Request, QueryOpts) of {error, _} = Error -> + ?tp("resource_simple_sync_internal_buffer_query_error", #{ + id => Id, request => Request + }), Error; {async_return, {error, _} = Error} -> Error; @@ -210,7 +213,11 @@ simple_sync_internal_buffer_query(Id, Request, QueryOpts0) -> receive {ReplyAlias, Response} -> Response - after 0 -> {error, timeout} + after 0 -> + ?tp("resource_simple_sync_internal_buffer_query_timeout", #{ + id => Id, request => Request + }), + {error, timeout} end end end @@ -1302,6 +1309,7 @@ do_call_query(QM, Id, Index, Ref, Query, #{query_mode := ReqQM} = QueryOpts, Res ?tp(simple_query_override, #{query_mode => ReqQM}), #{mod := Mod, state := ResSt, callback_mode := CBM, added_channels := Channels} = Resource, CallMode = call_mode(QM, CBM), + ?tp(simple_query_enter, #{}), apply_query_fun(CallMode, Mod, Id, Index, Ref, Query, ResSt, Channels, QueryOpts); do_call_query(QM, Id, Index, Ref, Query, QueryOpts, #{query_mode := ResQM} = Resource) when ResQM =:= simple_sync_internal_buffer; ResQM =:= simple_async_internal_buffer @@ -1309,6 +1317,7 @@ do_call_query(QM, Id, Index, Ref, Query, QueryOpts, #{query_mode := ResQM} = Res %% The connector supports buffer, send even in disconnected state #{mod := Mod, state := ResSt, callback_mode := CBM, added_channels := Channels} = Resource, CallMode = call_mode(QM, CBM), + ?tp(simple_query_enter, #{}), apply_query_fun(CallMode, Mod, Id, Index, Ref, Query, ResSt, Channels, QueryOpts); do_call_query(QM, Id, Index, Ref, Query, QueryOpts, #{status := connected} = Resource) -> %% when calling from the buffer worker or other simple queries, @@ -2297,6 +2306,7 @@ reply_call(Alias, Response) -> %% Used by `simple_sync_internal_buffer_query' to reply and chain existing `reply_to' %% callbacks. reply_call_internal_buffer(ReplyAlias, MaybeReplyTo, Response) -> + ?tp("reply_call_internal_buffer", #{}), ?MODULE:reply_call(ReplyAlias, Response), do_reply_caller(MaybeReplyTo, Response). diff --git a/changes/ee/feat-13546.en.md b/changes/ee/feat-13546.en.md new file mode 100644 index 000000000..c403409ac --- /dev/null +++ b/changes/ee/feat-13546.en.md @@ -0,0 +1 @@ +Added the option to configure the query mode for Pulsar Producer action. From 11aaa7b07d8b7e69bfb6f5c361d6d950caec20b2 Mon Sep 17 00:00:00 2001 From: Kjell Winblad Date: Fri, 5 Jul 2024 17:55:48 +0200 Subject: [PATCH 06/30] fix: make MQTT connector error log messages easier to understand Fixes: https://emqx.atlassian.net/browse/EMQX-12555 https://emqx.atlassian.net/browse/EMQX-12657 --- .../src/emqx_bridge_mqtt_connector.erl | 43 +++++++++++-- .../test/emqx_bridge_mqtt_SUITE.erl | 62 +++++++++++++++++++ 2 files changed, 100 insertions(+), 5 deletions(-) diff --git a/apps/emqx_bridge_mqtt/src/emqx_bridge_mqtt_connector.erl b/apps/emqx_bridge_mqtt/src/emqx_bridge_mqtt_connector.erl index 118542356..358856c35 100644 --- a/apps/emqx_bridge_mqtt/src/emqx_bridge_mqtt_connector.erl +++ b/apps/emqx_bridge_mqtt/src/emqx_bridge_mqtt_connector.erl @@ -514,15 +514,48 @@ connect(Pid, Name) -> {ok, Pid}; {error, Reason} = Error -> IsDryRun = emqx_resource:is_dry_run(Name), - ?SLOG(?LOG_LEVEL(IsDryRun), #{ - msg => "ingress_client_connect_failed", - reason => Reason, - resource_id => Name - }), + log_connect_error_reason(?LOG_LEVEL(IsDryRun), Reason, Name), _ = catch emqtt:stop(Pid), Error end. +log_connect_error_reason(Level, {tcp_closed, _} = Reason, Name) -> + ?tp(emqx_bridge_mqtt_connector_tcp_closed, #{}), + ?SLOG(Level, #{ + msg => "ingress_client_connect_failed", + reason => Reason, + name => Name, + explain => + << + "Your MQTT connection attempt was unsuccessful. " + "It might be at its maximum capacity for handling new connections. " + "To diagnose the issue further, you can check the server logs for " + "any specific messages related to the unavailability or connection limits." + >> + }); +log_connect_error_reason(Level, econnrefused = Reason, Name) -> + ?tp(emqx_bridge_mqtt_connector_econnrefused_error, #{}), + ?SLOG(Level, #{ + msg => "ingress_client_connect_failed", + reason => Reason, + name => Name, + explain => + << + "This error indicates that your connection attempt to the MQTT server was rejected. " + "In simpler terms, the server you tried to connect to refused your request. " + "There can be multiple reasons for this. " + "For example, the MQTT server you're trying to connect to might be down or not " + "running at all or you might have provided the wrong address " + "or port number for the server." + >> + }); +log_connect_error_reason(Level, Reason, Name) -> + ?SLOG(Level, #{ + msg => "ingress_client_connect_failed", + reason => Reason, + name => Name + }). + handle_disconnect(_Reason) -> ok. diff --git a/apps/emqx_bridge_mqtt/test/emqx_bridge_mqtt_SUITE.erl b/apps/emqx_bridge_mqtt/test/emqx_bridge_mqtt_SUITE.erl index a220eb9f7..574851f70 100644 --- a/apps/emqx_bridge_mqtt/test/emqx_bridge_mqtt_SUITE.erl +++ b/apps/emqx_bridge_mqtt/test/emqx_bridge_mqtt_SUITE.erl @@ -27,6 +27,8 @@ -include_lib("common_test/include/ct.hrl"). -include_lib("snabbkaffe/include/snabbkaffe.hrl"). +-import(emqx_common_test_helpers, [on_exit/1]). + %% output functions -export([inspect/3]). @@ -399,6 +401,56 @@ t_mqtt_conn_bridge_ingress_shared_subscription(_) -> {ok, 204, <<>>} = request(delete, uri(["bridges", BridgeID]), []), ok. +t_connect_with_more_clients_than_the_broker_accepts(_) -> + PoolSize = 100, + OrgConf = emqx_mgmt_listeners_conf:get_raw(tcp, default), + on_exit(fun() -> + emqx_mgmt_listeners_conf:update(tcp, default, OrgConf) + end), + NewConf = OrgConf#{<<"max_connections">> => 3}, + {ok, _} = emqx_mgmt_listeners_conf:update(tcp, default, NewConf), + BridgeName = atom_to_binary(?FUNCTION_NAME), + BridgeID = create_bridge( + ?SERVER_CONF#{ + <<"name">> => BridgeName, + <<"ingress">> => #{ + <<"pool_size">> => PoolSize, + <<"remote">> => #{ + <<"topic">> => <<"$share/ingress/", ?INGRESS_REMOTE_TOPIC, "/#">>, + <<"qos">> => 1 + }, + <<"local">> => #{ + <<"topic">> => <>, + <<"qos">> => <<"${qos}">>, + <<"payload">> => <<"${clientid}">>, + <<"retain">> => <<"${retain}">> + } + } + } + ), + snabbkaffe:block_until( + fun + (#{msg := emqx_bridge_mqtt_connector_tcp_closed}) -> + true; + (_) -> + false + end, + 5000 + ), + Trace = snabbkaffe:collect_trace(), + ?assert( + lists:any( + fun(K) -> + maps:get(msg, K, not_found) =:= + emqx_bridge_mqtt_connector_tcp_closed + end, + Trace + ) + ), + + {ok, 204, <<>>} = request(delete, uri(["bridges", BridgeID]), []), + ok. + t_mqtt_egress_bridge_warns_clean_start(_) -> BridgeName = atom_to_binary(?FUNCTION_NAME), Action = fun() -> @@ -1050,6 +1102,16 @@ t_mqtt_conn_bridge_egress_async_reconnect(_) -> Payload <- [integer_to_binary(I)] ], + Trace = snabbkaffe:collect_trace(50), + ?assert( + lists:any( + fun(K) -> + maps:get(msg, K, not_found) =:= + emqx_bridge_mqtt_connector_econnrefused_error + end, + Trace + ) + ), ok. start_publisher(Topic, Interval, CtrlPid) -> From ba2d4f3df3f723be4c42992f6321a72183be30f1 Mon Sep 17 00:00:00 2001 From: Kjell Winblad Date: Fri, 5 Jul 2024 18:00:33 +0200 Subject: [PATCH 07/30] docs: add change log entry --- changes/ce/fix-13425.en.md | 1 + 1 file changed, 1 insertion(+) create mode 100644 changes/ce/fix-13425.en.md diff --git a/changes/ce/fix-13425.en.md b/changes/ce/fix-13425.en.md new file mode 100644 index 000000000..e02e99c0a --- /dev/null +++ b/changes/ce/fix-13425.en.md @@ -0,0 +1 @@ +The MQTT connector error log messages have been improved to provide clearer and more detailed information. From baf2b96cbc12adbf9a66fba7a9b05868e1a8f08f Mon Sep 17 00:00:00 2001 From: Thales Macedo Garitezi Date: Thu, 1 Aug 2024 14:27:25 -0300 Subject: [PATCH 08/30] test: refactor test structure --- .../test/emqx_bridge_mqtt_SUITE.erl | 62 ++++++++----------- 1 file changed, 27 insertions(+), 35 deletions(-) diff --git a/apps/emqx_bridge_mqtt/test/emqx_bridge_mqtt_SUITE.erl b/apps/emqx_bridge_mqtt/test/emqx_bridge_mqtt_SUITE.erl index 574851f70..438b1f601 100644 --- a/apps/emqx_bridge_mqtt/test/emqx_bridge_mqtt_SUITE.erl +++ b/apps/emqx_bridge_mqtt/test/emqx_bridge_mqtt_SUITE.erl @@ -410,45 +410,37 @@ t_connect_with_more_clients_than_the_broker_accepts(_) -> NewConf = OrgConf#{<<"max_connections">> => 3}, {ok, _} = emqx_mgmt_listeners_conf:update(tcp, default, NewConf), BridgeName = atom_to_binary(?FUNCTION_NAME), - BridgeID = create_bridge( - ?SERVER_CONF#{ - <<"name">> => BridgeName, - <<"ingress">> => #{ - <<"pool_size">> => PoolSize, - <<"remote">> => #{ - <<"topic">> => <<"$share/ingress/", ?INGRESS_REMOTE_TOPIC, "/#">>, - <<"qos">> => 1 - }, - <<"local">> => #{ - <<"topic">> => <>, - <<"qos">> => <<"${qos}">>, - <<"payload">> => <<"${clientid}">>, - <<"retain">> => <<"${retain}">> + ?check_trace( + #{timetrap => 10_000}, + begin + BridgeID = create_bridge( + ?SERVER_CONF#{ + <<"name">> => BridgeName, + <<"ingress">> => #{ + <<"pool_size">> => PoolSize, + <<"remote">> => #{ + <<"topic">> => <<"$share/ingress/", ?INGRESS_REMOTE_TOPIC, "/#">>, + <<"qos">> => 1 + }, + <<"local">> => #{ + <<"topic">> => <>, + <<"qos">> => <<"${qos}">>, + <<"payload">> => <<"${clientid}">>, + <<"retain">> => <<"${retain}">> + } + } } - } - } - ), - snabbkaffe:block_until( - fun - (#{msg := emqx_bridge_mqtt_connector_tcp_closed}) -> - true; - (_) -> - false + ), + ?block_until(#{?snk_kind := emqx_bridge_mqtt_connector_tcp_closed}), + {ok, 204, <<>>} = request(delete, uri(["bridges", BridgeID]), []), + ok end, - 5000 - ), - Trace = snabbkaffe:collect_trace(), - ?assert( - lists:any( - fun(K) -> - maps:get(msg, K, not_found) =:= - emqx_bridge_mqtt_connector_tcp_closed - end, - Trace - ) + fun(Trace) -> + ?assertMatch([_ | _], ?of_kind(emqx_bridge_mqtt_connector_tcp_closed, Trace)), + ok + end ), - {ok, 204, <<>>} = request(delete, uri(["bridges", BridgeID]), []), ok. t_mqtt_egress_bridge_warns_clean_start(_) -> From 44e7f2e9b2555a61716daac5996ff98916814630 Mon Sep 17 00:00:00 2001 From: Thales Macedo Garitezi Date: Thu, 1 Aug 2024 14:49:43 -0300 Subject: [PATCH 09/30] refactor: use macros for status to avoid typos --- apps/emqx_bridge_mqtt/src/emqx_bridge_mqtt_connector.erl | 8 ++++---- apps/emqx_bridge_mqtt/src/emqx_bridge_mqtt_ingress.erl | 7 ++++--- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/apps/emqx_bridge_mqtt/src/emqx_bridge_mqtt_connector.erl b/apps/emqx_bridge_mqtt/src/emqx_bridge_mqtt_connector.erl index 358856c35..eb8b4e1ad 100644 --- a/apps/emqx_bridge_mqtt/src/emqx_bridge_mqtt_connector.erl +++ b/apps/emqx_bridge_mqtt/src/emqx_bridge_mqtt_connector.erl @@ -200,7 +200,7 @@ on_get_channel_status( } = _State ) when is_map_key(ChannelId, Channels) -> %% The channel should be ok as long as the MQTT client is ok - connected. + ?status_connected. on_get_channels(ResId) -> emqx_bridge_v2:get_channels_for_connector(ResId). @@ -359,7 +359,7 @@ on_get_status(_ResourceId, State) -> combine_status(Statuses) catch exit:timeout -> - connecting + ?status_connecting end. get_status({_Pool, Worker}) -> @@ -367,7 +367,7 @@ get_status({_Pool, Worker}) -> {ok, Client} -> emqx_bridge_mqtt_ingress:status(Client); {error, _} -> - disconnected + ?status_disconnected end. combine_status(Statuses) -> @@ -379,7 +379,7 @@ combine_status(Statuses) -> [Status | _] -> Status; [] -> - disconnected + ?status_disconnected end. mk_ingress_config( diff --git a/apps/emqx_bridge_mqtt/src/emqx_bridge_mqtt_ingress.erl b/apps/emqx_bridge_mqtt/src/emqx_bridge_mqtt_ingress.erl index 1749d4194..35aea67a6 100644 --- a/apps/emqx_bridge_mqtt/src/emqx_bridge_mqtt_ingress.erl +++ b/apps/emqx_bridge_mqtt/src/emqx_bridge_mqtt_ingress.erl @@ -19,6 +19,7 @@ -include_lib("emqx/include/logger.hrl"). -include_lib("emqx/include/emqx_mqtt.hrl"). -include_lib("snabbkaffe/include/snabbkaffe.hrl"). +-include_lib("emqx_resource/include/emqx_resource.hrl"). %% management APIs -export([ @@ -234,13 +235,13 @@ status(Pid) -> try case proplists:get_value(socket, info(Pid)) of Socket when Socket /= undefined -> - connected; + ?status_connected; undefined -> - connecting + ?status_connecting end catch exit:{noproc, _} -> - disconnected + ?status_disconnected end. %% From 52b2d73b2802176c035d0ca1f0a777cb980fd694 Mon Sep 17 00:00:00 2001 From: Thales Macedo Garitezi Date: Thu, 1 Aug 2024 15:13:25 -0300 Subject: [PATCH 10/30] test: move new test to newer module and use current apis --- .../test/emqx_bridge_mqtt_SUITE.erl | 42 ------------------- .../emqx_bridge_mqtt_v2_subscriber_SUITE.erl | 25 +++++++++++ 2 files changed, 25 insertions(+), 42 deletions(-) diff --git a/apps/emqx_bridge_mqtt/test/emqx_bridge_mqtt_SUITE.erl b/apps/emqx_bridge_mqtt/test/emqx_bridge_mqtt_SUITE.erl index 438b1f601..e1af22c3d 100644 --- a/apps/emqx_bridge_mqtt/test/emqx_bridge_mqtt_SUITE.erl +++ b/apps/emqx_bridge_mqtt/test/emqx_bridge_mqtt_SUITE.erl @@ -401,48 +401,6 @@ t_mqtt_conn_bridge_ingress_shared_subscription(_) -> {ok, 204, <<>>} = request(delete, uri(["bridges", BridgeID]), []), ok. -t_connect_with_more_clients_than_the_broker_accepts(_) -> - PoolSize = 100, - OrgConf = emqx_mgmt_listeners_conf:get_raw(tcp, default), - on_exit(fun() -> - emqx_mgmt_listeners_conf:update(tcp, default, OrgConf) - end), - NewConf = OrgConf#{<<"max_connections">> => 3}, - {ok, _} = emqx_mgmt_listeners_conf:update(tcp, default, NewConf), - BridgeName = atom_to_binary(?FUNCTION_NAME), - ?check_trace( - #{timetrap => 10_000}, - begin - BridgeID = create_bridge( - ?SERVER_CONF#{ - <<"name">> => BridgeName, - <<"ingress">> => #{ - <<"pool_size">> => PoolSize, - <<"remote">> => #{ - <<"topic">> => <<"$share/ingress/", ?INGRESS_REMOTE_TOPIC, "/#">>, - <<"qos">> => 1 - }, - <<"local">> => #{ - <<"topic">> => <>, - <<"qos">> => <<"${qos}">>, - <<"payload">> => <<"${clientid}">>, - <<"retain">> => <<"${retain}">> - } - } - } - ), - ?block_until(#{?snk_kind := emqx_bridge_mqtt_connector_tcp_closed}), - {ok, 204, <<>>} = request(delete, uri(["bridges", BridgeID]), []), - ok - end, - fun(Trace) -> - ?assertMatch([_ | _], ?of_kind(emqx_bridge_mqtt_connector_tcp_closed, Trace)), - ok - end - ), - - ok. - t_mqtt_egress_bridge_warns_clean_start(_) -> BridgeName = atom_to_binary(?FUNCTION_NAME), Action = fun() -> diff --git a/apps/emqx_bridge_mqtt/test/emqx_bridge_mqtt_v2_subscriber_SUITE.erl b/apps/emqx_bridge_mqtt/test/emqx_bridge_mqtt_v2_subscriber_SUITE.erl index b9097b9c3..9030d2ac7 100644 --- a/apps/emqx_bridge_mqtt/test/emqx_bridge_mqtt_v2_subscriber_SUITE.erl +++ b/apps/emqx_bridge_mqtt/test/emqx_bridge_mqtt_v2_subscriber_SUITE.erl @@ -246,3 +246,28 @@ t_receive_via_rule(Config) -> end ), ok. + +t_connect_with_more_clients_than_the_broker_accepts(Config0) -> + OrgConf = emqx_mgmt_listeners_conf:get_raw(tcp, default), + on_exit(fun() -> + emqx_mgmt_listeners_conf:update(tcp, default, OrgConf) + end), + NewConf = OrgConf#{<<"max_connections">> => 3}, + {ok, _} = emqx_mgmt_listeners_conf:update(tcp, default, NewConf), + ConnectorConfig0 = ?config(connector_config, Config0), + ConnectorConfig = ConnectorConfig0#{<<"pool_size">> := 100}, + Config = emqx_utils:merge_opts(Config0, [{connector_config, ConnectorConfig}]), + ?check_trace( + #{timetrap => 10_000}, + begin + {ok, _} = emqx_bridge_v2_testlib:create_bridge_api(Config), + ?block_until(#{?snk_kind := emqx_bridge_mqtt_connector_tcp_closed}), + ok + end, + fun(Trace) -> + ?assertMatch([_ | _], ?of_kind(emqx_bridge_mqtt_connector_tcp_closed, Trace)), + ok + end + ), + + ok. From 3162fe7a27931d6d5094786d48aea846f0e3d69a Mon Sep 17 00:00:00 2001 From: Thales Macedo Garitezi Date: Thu, 1 Aug 2024 15:23:14 -0300 Subject: [PATCH 11/30] feat: prettify some error explanations --- .../src/emqx_bridge_mqtt_connector.erl | 62 +++++++++++++------ .../emqx_bridge_mqtt_v2_subscriber_SUITE.erl | 31 ++++++++-- 2 files changed, 69 insertions(+), 24 deletions(-) diff --git a/apps/emqx_bridge_mqtt/src/emqx_bridge_mqtt_connector.erl b/apps/emqx_bridge_mqtt/src/emqx_bridge_mqtt_connector.erl index eb8b4e1ad..02cf6595f 100644 --- a/apps/emqx_bridge_mqtt/src/emqx_bridge_mqtt_connector.erl +++ b/apps/emqx_bridge_mqtt/src/emqx_bridge_mqtt_connector.erl @@ -98,7 +98,7 @@ on_start(ResourceId, #{server := Server} = Conf) -> server => Server }}; {error, Reason} -> - {error, Reason} + {error, emqx_maybe:define(explain_error(Reason), Reason)} end. on_add_channel( @@ -356,7 +356,12 @@ on_get_status(_ResourceId, State) -> Workers = [{Pool, Worker} || {Pool, PN} <- Pools, {_Name, Worker} <- ecpool:workers(PN)], try emqx_utils:pmap(fun get_status/1, Workers, ?HEALTH_CHECK_TIMEOUT) of Statuses -> - combine_status(Statuses) + case combine_status(Statuses) of + {Status, Msg} -> + {Status, State, Msg}; + Status -> + Status + end catch exit:timeout -> ?status_connecting @@ -375,7 +380,21 @@ combine_status(Statuses) -> %% Natural order of statuses: [connected, connecting, disconnected] %% * `disconnected` wins over any other status %% * `connecting` wins over `connected` - case lists:reverse(lists:usort(Statuses)) of + ToStatus = fun + ({S, _Reason}) -> S; + (S) when is_atom(S) -> S + end, + CompareFn = fun(S1A, S2A) -> + S1 = ToStatus(S1A), + S2 = ToStatus(S2A), + S1 > S2 + end, + case lists:usort(CompareFn, Statuses) of + [{Status, Reason} | _] -> + case explain_error(Reason) of + undefined -> Status; + Msg -> {Status, Msg} + end; [Status | _] -> Status; [] -> @@ -525,13 +544,7 @@ log_connect_error_reason(Level, {tcp_closed, _} = Reason, Name) -> msg => "ingress_client_connect_failed", reason => Reason, name => Name, - explain => - << - "Your MQTT connection attempt was unsuccessful. " - "It might be at its maximum capacity for handling new connections. " - "To diagnose the issue further, you can check the server logs for " - "any specific messages related to the unavailability or connection limits." - >> + explain => explain_error(Reason) }); log_connect_error_reason(Level, econnrefused = Reason, Name) -> ?tp(emqx_bridge_mqtt_connector_econnrefused_error, #{}), @@ -539,15 +552,7 @@ log_connect_error_reason(Level, econnrefused = Reason, Name) -> msg => "ingress_client_connect_failed", reason => Reason, name => Name, - explain => - << - "This error indicates that your connection attempt to the MQTT server was rejected. " - "In simpler terms, the server you tried to connect to refused your request. " - "There can be multiple reasons for this. " - "For example, the MQTT server you're trying to connect to might be down or not " - "running at all or you might have provided the wrong address " - "or port number for the server." - >> + explain => explain_error(Reason) }); log_connect_error_reason(Level, Reason, Name) -> ?SLOG(Level, #{ @@ -556,6 +561,25 @@ log_connect_error_reason(Level, Reason, Name) -> name => Name }). +explain_error(econnrefused) -> + << + "This error indicates that your connection attempt to the MQTT server was rejected. " + "In simpler terms, the server you tried to connect to refused your request. " + "There can be multiple reasons for this. " + "For example, the MQTT server you're trying to connect to might be down or not " + "running at all or you might have provided the wrong address " + "or port number for the server." + >>; +explain_error({tcp_closed, _}) -> + << + "Your MQTT connection attempt was unsuccessful. " + "It might be at its maximum capacity for handling new connections. " + "To diagnose the issue further, you can check the server logs for " + "any specific messages related to the unavailability or connection limits." + >>; +explain_error(_Reason) -> + undefined. + handle_disconnect(_Reason) -> ok. diff --git a/apps/emqx_bridge_mqtt/test/emqx_bridge_mqtt_v2_subscriber_SUITE.erl b/apps/emqx_bridge_mqtt/test/emqx_bridge_mqtt_v2_subscriber_SUITE.erl index 9030d2ac7..e0598fa1e 100644 --- a/apps/emqx_bridge_mqtt/test/emqx_bridge_mqtt_v2_subscriber_SUITE.erl +++ b/apps/emqx_bridge_mqtt/test/emqx_bridge_mqtt_v2_subscriber_SUITE.erl @@ -131,6 +131,9 @@ hookpoint(Config) -> BridgeId = bridge_id(Config), emqx_bridge_resource:bridge_hookpoint(BridgeId). +simplify_result(Res) -> + emqx_bridge_v2_testlib:simplify_result(Res). + %%------------------------------------------------------------------------------ %% Testcases %%------------------------------------------------------------------------------ @@ -247,21 +250,39 @@ t_receive_via_rule(Config) -> ), ok. -t_connect_with_more_clients_than_the_broker_accepts(Config0) -> +t_connect_with_more_clients_than_the_broker_accepts(Config) -> + Name = ?config(connector_name, Config), OrgConf = emqx_mgmt_listeners_conf:get_raw(tcp, default), on_exit(fun() -> emqx_mgmt_listeners_conf:update(tcp, default, OrgConf) end), NewConf = OrgConf#{<<"max_connections">> => 3}, {ok, _} = emqx_mgmt_listeners_conf:update(tcp, default, NewConf), - ConnectorConfig0 = ?config(connector_config, Config0), - ConnectorConfig = ConnectorConfig0#{<<"pool_size">> := 100}, - Config = emqx_utils:merge_opts(Config0, [{connector_config, ConnectorConfig}]), ?check_trace( #{timetrap => 10_000}, begin - {ok, _} = emqx_bridge_v2_testlib:create_bridge_api(Config), + ?assertMatch( + {201, #{ + <<"status">> := <<"disconnected">>, + <<"status_reason">> := + <<"Your MQTT connection attempt was unsuccessful", _/binary>> + }}, + simplify_result( + emqx_bridge_v2_testlib:create_connector_api( + Config, + #{<<"pool_size">> => 100} + ) + ) + ), ?block_until(#{?snk_kind := emqx_bridge_mqtt_connector_tcp_closed}), + ?assertMatch( + {200, #{ + <<"status">> := <<"disconnected">>, + <<"status_reason">> := + <<"Your MQTT connection attempt was unsuccessful", _/binary>> + }}, + simplify_result(emqx_bridge_v2_testlib:get_connector_api(mqtt, Name)) + ), ok end, fun(Trace) -> From bba9d085d6a0e03e90fca5ca7e87e964df269a87 Mon Sep 17 00:00:00 2001 From: Thales Macedo Garitezi Date: Thu, 1 Aug 2024 15:34:58 -0300 Subject: [PATCH 12/30] test: refactor test structure --- .../test/emqx_bridge_api_SUITE.erl | 2 +- .../src/emqx_bridge_mqtt_connector.erl | 1 + .../test/emqx_bridge_mqtt_SUITE.erl | 62 +++++++++---------- 3 files changed, 31 insertions(+), 34 deletions(-) diff --git a/apps/emqx_bridge/test/emqx_bridge_api_SUITE.erl b/apps/emqx_bridge/test/emqx_bridge_api_SUITE.erl index 6b160f3b3..fdbbc5376 100644 --- a/apps/emqx_bridge/test/emqx_bridge_api_SUITE.erl +++ b/apps/emqx_bridge/test/emqx_bridge_api_SUITE.erl @@ -1154,7 +1154,7 @@ t_bridges_probe(Config) -> ?assertMatch( {ok, 400, #{ <<"code">> := <<"TEST_FAILED">>, - <<"message">> := <<"Connection refused">> + <<"message">> := <<"Connection refused", _/binary>> }}, request_json( post, diff --git a/apps/emqx_bridge_mqtt/src/emqx_bridge_mqtt_connector.erl b/apps/emqx_bridge_mqtt/src/emqx_bridge_mqtt_connector.erl index 02cf6595f..6b9a40123 100644 --- a/apps/emqx_bridge_mqtt/src/emqx_bridge_mqtt_connector.erl +++ b/apps/emqx_bridge_mqtt/src/emqx_bridge_mqtt_connector.erl @@ -563,6 +563,7 @@ log_connect_error_reason(Level, Reason, Name) -> explain_error(econnrefused) -> << + "Connection refused. " "This error indicates that your connection attempt to the MQTT server was rejected. " "In simpler terms, the server you tried to connect to refused your request. " "There can be multiple reasons for this. " diff --git a/apps/emqx_bridge_mqtt/test/emqx_bridge_mqtt_SUITE.erl b/apps/emqx_bridge_mqtt/test/emqx_bridge_mqtt_SUITE.erl index e1af22c3d..42cf9d2b8 100644 --- a/apps/emqx_bridge_mqtt/test/emqx_bridge_mqtt_SUITE.erl +++ b/apps/emqx_bridge_mqtt/test/emqx_bridge_mqtt_SUITE.erl @@ -27,8 +27,6 @@ -include_lib("common_test/include/ct.hrl"). -include_lib("snabbkaffe/include/snabbkaffe.hrl"). --import(emqx_common_test_helpers, [on_exit/1]). - %% output functions -export([inspect/3]). @@ -1027,40 +1025,38 @@ t_mqtt_conn_bridge_egress_async_reconnect(_) -> ct:sleep(1000), %% stop the listener 1883 to make the bridge disconnected - ok = emqx_listeners:stop_listener('tcp:default'), - ct:sleep(1500), - ?assertMatch( - #{<<"status">> := Status} when - Status == <<"connecting">> orelse Status == <<"disconnected">>, - request_bridge(BridgeIDEgress) - ), + ?check_trace( + begin + ok = emqx_listeners:stop_listener('tcp:default'), + ct:sleep(1500), + ?assertMatch( + #{<<"status">> := Status} when + Status == <<"connecting">> orelse Status == <<"disconnected">>, + request_bridge(BridgeIDEgress) + ), - %% start the listener 1883 to make the bridge reconnected - ok = emqx_listeners:start_listener('tcp:default'), - timer:sleep(1500), - ?assertMatch( - #{<<"status">> := <<"connected">>}, - request_bridge(BridgeIDEgress) - ), + %% start the listener 1883 to make the bridge reconnected + ok = emqx_listeners:start_listener('tcp:default'), + timer:sleep(1500), + ?assertMatch( + #{<<"status">> := <<"connected">>}, + request_bridge(BridgeIDEgress) + ), - N = stop_publisher(Publisher), + N = stop_publisher(Publisher), - %% all those messages should eventually be delivered - [ - assert_mqtt_msg_received(RemoteTopic, Payload) - || I <- lists:seq(1, N), - Payload <- [integer_to_binary(I)] - ], - - Trace = snabbkaffe:collect_trace(50), - ?assert( - lists:any( - fun(K) -> - maps:get(msg, K, not_found) =:= - emqx_bridge_mqtt_connector_econnrefused_error - end, - Trace - ) + %% all those messages should eventually be delivered + [ + assert_mqtt_msg_received(RemoteTopic, Payload) + || I <- lists:seq(1, N), + Payload <- [integer_to_binary(I)] + ], + ok + end, + fun(Trace) -> + ?assertMatch([_ | _], ?of_kind(emqx_bridge_mqtt_connector_econnrefused_error, Trace)), + ok + end ), ok. From bd87e3ce2b7a59578f6fdb36f53b0d0bb9f13a4d Mon Sep 17 00:00:00 2001 From: Shawn <506895667@qq.com> Date: Mon, 5 Aug 2024 15:58:43 +0800 Subject: [PATCH 13/30] chore: update esockd to 5.12.0 --- apps/emqx/rebar.config | 2 +- mix.exs | 2 +- rebar.config | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/apps/emqx/rebar.config b/apps/emqx/rebar.config index 7a6ec9810..1d818fff0 100644 --- a/apps/emqx/rebar.config +++ b/apps/emqx/rebar.config @@ -28,7 +28,7 @@ {lc, {git, "https://github.com/emqx/lc.git", {tag, "0.3.2"}}}, {gproc, {git, "https://github.com/emqx/gproc", {tag, "0.9.0.1"}}}, {cowboy, {git, "https://github.com/emqx/cowboy", {tag, "2.9.2"}}}, - {esockd, {git, "https://github.com/emqx/esockd", {tag, "5.11.3"}}}, + {esockd, {git, "https://github.com/emqx/esockd", {tag, "5.12.0"}}}, {ekka, {git, "https://github.com/emqx/ekka", {tag, "0.19.5"}}}, {gen_rpc, {git, "https://github.com/emqx/gen_rpc", {tag, "3.3.1"}}}, {hocon, {git, "https://github.com/emqx/hocon.git", {tag, "0.43.1"}}}, diff --git a/mix.exs b/mix.exs index dc2953683..366186e6b 100644 --- a/mix.exs +++ b/mix.exs @@ -182,7 +182,7 @@ defmodule EMQXUmbrella.MixProject do end def common_dep(:ekka), do: {:ekka, github: "emqx/ekka", tag: "0.19.5", override: true} - def common_dep(:esockd), do: {:esockd, github: "emqx/esockd", tag: "5.11.3", override: true} + def common_dep(:esockd), do: {:esockd, github: "emqx/esockd", tag: "5.12.0", override: true} def common_dep(:gproc), do: {:gproc, github: "emqx/gproc", tag: "0.9.0.1", override: true} def common_dep(:hocon), do: {:hocon, github: "emqx/hocon", tag: "0.43.1", override: true} def common_dep(:lc), do: {:lc, github: "emqx/lc", tag: "0.3.2", override: true} diff --git a/rebar.config b/rebar.config index b260561cb..cc7024b79 100644 --- a/rebar.config +++ b/rebar.config @@ -82,7 +82,7 @@ {gproc, {git, "https://github.com/emqx/gproc", {tag, "0.9.0.1"}}}, {jiffy, {git, "https://github.com/emqx/jiffy", {tag, "1.0.6"}}}, {cowboy, {git, "https://github.com/emqx/cowboy", {tag, "2.9.2"}}}, - {esockd, {git, "https://github.com/emqx/esockd", {tag, "5.11.3"}}}, + {esockd, {git, "https://github.com/emqx/esockd", {tag, "5.12.0"}}}, {rocksdb, {git, "https://github.com/emqx/erlang-rocksdb", {tag, "1.8.0-emqx-6"}}}, {ekka, {git, "https://github.com/emqx/ekka", {tag, "0.19.5"}}}, {gen_rpc, {git, "https://github.com/emqx/gen_rpc", {tag, "3.3.1"}}}, From 4cf715113912a859db5e2db233a91d43e8e0c3f5 Mon Sep 17 00:00:00 2001 From: Ivan Dyachkov Date: Mon, 5 Aug 2024 11:01:57 +0200 Subject: [PATCH 14/30] chore: prepare 5.8.0-alpha.1 --- .github/workflows/build_packages_cron.yaml | 1 + .github/workflows/codeql.yaml | 1 + .github/workflows/green_master.yaml | 1 + apps/emqx/include/emqx_release.hrl | 4 ++-- deploy/charts/emqx-enterprise/Chart.yaml | 4 ++-- deploy/charts/emqx/Chart.yaml | 4 ++-- scripts/rel/cut.sh | 6 ++++++ scripts/rel/sync-remotes.sh | 8 ++++++-- 8 files changed, 21 insertions(+), 8 deletions(-) diff --git a/.github/workflows/build_packages_cron.yaml b/.github/workflows/build_packages_cron.yaml index b7d05ad94..27f4698b6 100644 --- a/.github/workflows/build_packages_cron.yaml +++ b/.github/workflows/build_packages_cron.yaml @@ -23,6 +23,7 @@ jobs: profile: - ['emqx', 'master'] - ['emqx', 'release-57'] + - ['emqx', 'release-58'] os: - ubuntu22.04 - amzn2023 diff --git a/.github/workflows/codeql.yaml b/.github/workflows/codeql.yaml index f086b97f6..f6077262f 100644 --- a/.github/workflows/codeql.yaml +++ b/.github/workflows/codeql.yaml @@ -24,6 +24,7 @@ jobs: branch: - master - release-57 + - release-58 language: - cpp - python diff --git a/.github/workflows/green_master.yaml b/.github/workflows/green_master.yaml index 8479ea5ed..a5317027b 100644 --- a/.github/workflows/green_master.yaml +++ b/.github/workflows/green_master.yaml @@ -24,6 +24,7 @@ jobs: ref: - master - release-57 + - release-58 steps: - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 with: diff --git a/apps/emqx/include/emqx_release.hrl b/apps/emqx/include/emqx_release.hrl index c8e84ea06..7979d00fd 100644 --- a/apps/emqx/include/emqx_release.hrl +++ b/apps/emqx/include/emqx_release.hrl @@ -32,7 +32,7 @@ %% `apps/emqx/src/bpapi/README.md' %% Opensource edition --define(EMQX_RELEASE_CE, "5.7.1"). +-define(EMQX_RELEASE_CE, "5.8.0-alpha.1"). %% Enterprise edition --define(EMQX_RELEASE_EE, "5.7.1"). +-define(EMQX_RELEASE_EE, "5.8.0-alpha.1"). diff --git a/deploy/charts/emqx-enterprise/Chart.yaml b/deploy/charts/emqx-enterprise/Chart.yaml index cd795d4f4..3c25b9ebc 100644 --- a/deploy/charts/emqx-enterprise/Chart.yaml +++ b/deploy/charts/emqx-enterprise/Chart.yaml @@ -14,8 +14,8 @@ type: application # This is the chart version. This version number should be incremented each time you make changes # to the chart and its templates, including the app version. -version: 5.7.1 +version: 5.8.0-alpha.1 # This is the version number of the application being deployed. This version number should be # incremented each time you make changes to the application. -appVersion: 5.7.1 +appVersion: 5.8.0-alpha.1 diff --git a/deploy/charts/emqx/Chart.yaml b/deploy/charts/emqx/Chart.yaml index d31648f2f..49a3f7c84 100644 --- a/deploy/charts/emqx/Chart.yaml +++ b/deploy/charts/emqx/Chart.yaml @@ -14,8 +14,8 @@ type: application # This is the chart version. This version number should be incremented each time you make changes # to the chart and its templates, including the app version. -version: 5.7.1 +version: 5.8.0-alpha.1 # This is the version number of the application being deployed. This version number should be # incremented each time you make changes to the application. -appVersion: 5.7.1 +appVersion: 5.8.0-alpha.1 diff --git a/scripts/rel/cut.sh b/scripts/rel/cut.sh index 1affd48bf..8c8899b91 100755 --- a/scripts/rel/cut.sh +++ b/scripts/rel/cut.sh @@ -135,6 +135,12 @@ rel_branch() { e5.7.*) echo 'release-57' ;; + v5.8.*) + echo 'release-58' + ;; + e5.8.*) + echo 'release-58' + ;; *) logerr "Unsupported version tag $TAG" exit 1 diff --git a/scripts/rel/sync-remotes.sh b/scripts/rel/sync-remotes.sh index 430021a79..c986535ce 100755 --- a/scripts/rel/sync-remotes.sh +++ b/scripts/rel/sync-remotes.sh @@ -5,7 +5,7 @@ set -euo pipefail # ensure dir cd -P -- "$(dirname -- "${BASH_SOURCE[0]}")/../.." -BASE_BRANCHES=( 'release-57' 'release-56' 'release-55' 'master' ) +BASE_BRANCHES=( 'release-58' 'release-57' 'release-56' 'release-55' 'master' ) usage() { cat < Date: Tue, 6 Aug 2024 13:44:16 +0800 Subject: [PATCH 15/30] chore(dashboard): bump dashboard version to v1.10.0-beta.1 & e1.8.0-beta.1 --- Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index 2f4519cfd..69667c952 100644 --- a/Makefile +++ b/Makefile @@ -10,8 +10,8 @@ include env.sh # Dashboard version # from https://github.com/emqx/emqx-dashboard5 -export EMQX_DASHBOARD_VERSION ?= v1.9.1 -export EMQX_EE_DASHBOARD_VERSION ?= e1.7.1 +export EMQX_DASHBOARD_VERSION ?= v1.10.0-beta.1 +export EMQX_EE_DASHBOARD_VERSION ?= e1.8.0-beta.1 export EMQX_RELUP ?= true export EMQX_REL_FORM ?= tgz From b80513e941be82d1a9a3bbfbcf5d30b79a0b1bdd Mon Sep 17 00:00:00 2001 From: Kinplemelon Date: Tue, 6 Aug 2024 09:33:20 +0800 Subject: [PATCH 16/30] ci: update emqx docs link in dashboard --- scripts/ui-tests/dashboard_test.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/scripts/ui-tests/dashboard_test.py b/scripts/ui-tests/dashboard_test.py index c349739b6..356432b8e 100644 --- a/scripts/ui-tests/dashboard_test.py +++ b/scripts/ui-tests/dashboard_test.py @@ -131,10 +131,7 @@ def test_docs_link(driver, dashboard_url): # it's v5.x in the url emqx_version = 'v' + emqx_version - if prefix == 'e': - docs_base_url = "https://docs.emqx.com/en/enterprise" - else: - docs_base_url = "https://docs.emqx.com/en/emqx" + docs_base_url = "https://docs.emqx.com/en/emqx" docs_url = f"{docs_base_url}/{emqx_version}" xpath = f"//div[@id='app']//div[@class='nav-header']//a[@href[starts-with(.,'{docs_url}')]]" From 0aa4cdbaf355e6bce3fed48a975b1fe251c1af5d Mon Sep 17 00:00:00 2001 From: Andrew Mayorov Date: Tue, 30 Jul 2024 16:51:14 +0200 Subject: [PATCH 17/30] feat(ds): add generic preconditions implementation --- .../src/emqx_ds_precondition.erl | 184 ++++++++++++++++++ 1 file changed, 184 insertions(+) create mode 100644 apps/emqx_durable_storage/src/emqx_ds_precondition.erl diff --git a/apps/emqx_durable_storage/src/emqx_ds_precondition.erl b/apps/emqx_durable_storage/src/emqx_ds_precondition.erl new file mode 100644 index 000000000..5f86d6c25 --- /dev/null +++ b/apps/emqx_durable_storage/src/emqx_ds_precondition.erl @@ -0,0 +1,184 @@ +%%-------------------------------------------------------------------- +%% Copyright (c) 20212024 EMQ Technologies Co., Ltd. All Rights Reserved. +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. +%%-------------------------------------------------------------------- + +-module(emqx_ds_precondition). +-include_lib("emqx_utils/include/emqx_message.hrl"). +-include_lib("emqx_durable_storage/include/emqx_ds.hrl"). + +-export([verify/3]). +-export([matches/2]). + +-export_type([matcher/0, mismatch/0]). + +-type matcher() :: #message_matcher{}. +-type mismatch() :: emqx_types:message() | not_found. + +-callback lookup_message(_Ctx, matcher()) -> + emqx_types:message() | not_found | emqx_ds:error(_). + +%% + +-spec verify(module(), _Ctx, [emqx_ds:precondition()]) -> + ok | {precondition_failed, mismatch()} | emqx_ds:error(_). +verify(Mod, Ctx, [_Precondition = {Cond, Msg} | Rest]) -> + case verify_precondition(Mod, Ctx, Cond, Msg) of + ok -> + verify(Mod, Ctx, Rest); + Failed -> + Failed + end; +verify(_Mod, _Ctx, []) -> + ok. + +verify_precondition(Mod, Ctx, if_exists, Matcher) -> + case Mod:lookup_message(Ctx, Matcher) of + Msg = #message{} -> + verify_match(Msg, Matcher); + not_found -> + {precondition_failed, not_found}; + Error = {error, _, _} -> + Error + end; +verify_precondition(Mod, Ctx, unless_exists, Matcher) -> + case Mod:lookup_message(Ctx, Matcher) of + Msg = #message{} -> + verify_nomatch(Msg, Matcher); + not_found -> + ok; + Error = {error, _, _} -> + Error + end. + +verify_match(Msg, Matcher) -> + case matches(Msg, Matcher) of + true -> ok; + false -> {precondition_failed, Msg} + end. + +verify_nomatch(Msg, Matcher) -> + case matches(Msg, Matcher) of + false -> ok; + true -> {precondition_failed, Msg} + end. + +-spec matches(emqx_types:message(), matcher()) -> boolean(). +matches( + Message, + #message_matcher{from = From, topic = Topic, payload = Pat, headers = Headers} +) -> + case Message of + #message{from = From, topic = Topic} when Pat =:= '_' -> + matches_headers(Message, Headers); + #message{from = From, topic = Topic, payload = Pat} -> + matches_headers(Message, Headers); + _ -> + false + end. + +matches_headers(_Message, MatchHeaders) when map_size(MatchHeaders) =:= 0 -> + true; +matches_headers(#message{headers = Headers}, MatchHeaders) -> + maps:intersect(MatchHeaders, Headers) =:= MatchHeaders. + +%% Basic tests + +-ifdef(TEST). +-include_lib("eunit/include/eunit.hrl"). +-compile(export_all). + +conjunction_test() -> + %% Contradictory preconditions, always false. + Preconditions = [ + {if_exists, matcher(<<"c1">>, <<"t/1">>, 0, '_')}, + {unless_exists, matcher(<<"c1">>, <<"t/1">>, 0, '_')} + ], + ?assertEqual( + {precondition_failed, not_found}, + verify(?MODULE, [], Preconditions) + ), + %% Check that the order does not matter. + ?assertEqual( + {precondition_failed, not_found}, + verify(?MODULE, [], lists:reverse(Preconditions)) + ), + ?assertEqual( + {precondition_failed, message(<<"c1">>, <<"t/1">>, 0, <<>>)}, + verify( + ?MODULE, + [message(<<"c1">>, <<"t/1">>, 0, <<>>)], + Preconditions + ) + ). + +matches_test() -> + ?assert( + matches( + message(<<"mtest1">>, <<"t/same">>, 12345, <>), + matcher(<<"mtest1">>, <<"t/same">>, 12345, '_') + ) + ). + +matches_headers_test() -> + ?assert( + matches( + message(<<"mtest2">>, <<"t/same">>, 23456, <>, #{h1 => 42, h2 => <<>>}), + matcher(<<"mtest2">>, <<"t/same">>, 23456, '_', #{h2 => <<>>}) + ) + ). + +mismatches_headers_test() -> + ?assertNot( + matches( + message(<<"mtest3">>, <<"t/same">>, 23456, <>, #{h1 => 42, h2 => <<>>}), + matcher(<<"mtest3">>, <<"t/same">>, 23456, '_', #{h2 => <<>>, h3 => <<"required">>}) + ) + ). + +matcher(ClientID, Topic, TS, Payload) -> + matcher(ClientID, Topic, TS, Payload, #{}). + +matcher(ClientID, Topic, TS, Payload, Headers) -> + #message_matcher{ + from = ClientID, + topic = Topic, + timestamp = TS, + payload = Payload, + headers = Headers + }. + +message(ClientID, Topic, TS, Payload) -> + message(ClientID, Topic, TS, Payload, #{}). + +message(ClientID, Topic, TS, Payload, Headers) -> + #message{ + id = <<>>, + qos = 0, + from = ClientID, + topic = Topic, + timestamp = TS, + payload = Payload, + headers = Headers + }. + +lookup_message(Messages, Matcher) -> + case lists:search(fun(M) -> matches(M, Matcher) end, Messages) of + {value, Message} -> + Message; + false -> + not_found + end. + +-endif. From 11951f8f6cb8734378dc2dead96f0a1615f4c0ce Mon Sep 17 00:00:00 2001 From: Andrew Mayorov Date: Tue, 30 Jul 2024 16:53:38 +0200 Subject: [PATCH 18/30] feat(ds): adopt buffer interface to `emqx_ds:operation()` --- .../src/emqx_ds_builtin_local.erl | 26 +++-- .../src/emqx_ds_replication_layer.erl | 31 ++++-- .../src/emqx_ds_buffer.erl | 103 ++++++++---------- .../test/emqx_ds_test_helpers.erl | 2 +- 4 files changed, 89 insertions(+), 73 deletions(-) diff --git a/apps/emqx_ds_builtin_local/src/emqx_ds_builtin_local.erl b/apps/emqx_ds_builtin_local/src/emqx_ds_builtin_local.erl index a7cc795b6..29fc98083 100644 --- a/apps/emqx_ds_builtin_local/src/emqx_ds_builtin_local.erl +++ b/apps/emqx_ds_builtin_local/src/emqx_ds_builtin_local.erl @@ -43,7 +43,7 @@ %% `emqx_ds_buffer': init_buffer/3, flush_buffer/4, - shard_of_message/4 + shard_of_operation/4 ]). %% Internal exports: @@ -55,6 +55,7 @@ -export_type([db_opts/0, shard/0, iterator/0, delete_iterator/0]). -include_lib("emqx_utils/include/emqx_message.hrl"). +-include_lib("emqx_durable_storage/include/emqx_ds.hrl"). %%================================================================================ %% Type declarations @@ -255,14 +256,23 @@ assign_message_timestamps(Latest, [], Acc) -> assign_timestamp(TimestampUs, Message) -> {TimestampUs, Message}. --spec shard_of_message(emqx_ds:db(), emqx_types:message(), clientid | topic, _Options) -> shard(). -shard_of_message(DB, #message{from = From, topic = Topic}, SerializeBy, _Options) -> +-spec shard_of_operation(emqx_ds:db(), emqx_ds:operation(), clientid | topic, _Options) -> shard(). +shard_of_operation(DB, #message{from = From, topic = Topic}, SerializeBy, _Options) -> + case SerializeBy of + clientid -> Key = From; + topic -> Key = Topic + end, + shard_of_key(DB, Key); +shard_of_operation(DB, {_, #message_matcher{from = From, topic = Topic}}, SerializeBy, _Options) -> + case SerializeBy of + clientid -> Key = From; + topic -> Key = Topic + end, + shard_of_key(DB, Key). + +shard_of_key(DB, Key) -> N = emqx_ds_builtin_local_meta:n_shards(DB), - Hash = - case SerializeBy of - clientid -> erlang:phash2(From, N); - topic -> erlang:phash2(Topic, N) - end, + Hash = erlang:phash2(Key, N), integer_to_binary(Hash). -spec get_streams(emqx_ds:db(), emqx_ds:topic_filter(), emqx_ds:time()) -> diff --git a/apps/emqx_ds_builtin_raft/src/emqx_ds_replication_layer.erl b/apps/emqx_ds_builtin_raft/src/emqx_ds_replication_layer.erl index 669abdbf1..f54981884 100644 --- a/apps/emqx_ds_builtin_raft/src/emqx_ds_replication_layer.erl +++ b/apps/emqx_ds_builtin_raft/src/emqx_ds_replication_layer.erl @@ -29,7 +29,7 @@ current_timestamp/2, - shard_of_message/4, + shard_of_operation/4, flush_buffer/4, init_buffer/3 ]). @@ -392,15 +392,30 @@ flush_buffer(DB, Shard, Messages, State) -> end, {State, Result}. --spec shard_of_message(emqx_ds:db(), emqx_types:message(), clientid | topic, _Options) -> +-spec shard_of_operation( + emqx_ds:db(), + emqx_ds:operation() | emqx_ds:precondition(), + clientid | topic, + _Options +) -> emqx_ds_replication_layer:shard_id(). -shard_of_message(DB, #message{from = From, topic = Topic}, SerializeBy, _Options) -> +shard_of_operation(DB, #message{from = From, topic = Topic}, SerializeBy, _Options) -> + case SerializeBy of + clientid -> Key = From; + topic -> Key = Topic + end, + shard_of_key(DB, Key); +shard_of_operation(DB, {_OpName, Matcher}, SerializeBy, _Options) -> + #message_matcher{from = From, topic = Topic} = Matcher, + case SerializeBy of + clientid -> Key = From; + topic -> Key = Topic + end, + shard_of_key(DB, Key). + +shard_of_key(DB, Key) -> N = emqx_ds_replication_shard_allocator:n_shards(DB), - Hash = - case SerializeBy of - clientid -> erlang:phash2(From, N); - topic -> erlang:phash2(Topic, N) - end, + Hash = erlang:phash2(Key, N), integer_to_binary(Hash). %%================================================================================ diff --git a/apps/emqx_durable_storage/src/emqx_ds_buffer.erl b/apps/emqx_durable_storage/src/emqx_ds_buffer.erl index dec9eea80..cf83c8f2e 100644 --- a/apps/emqx_durable_storage/src/emqx_ds_buffer.erl +++ b/apps/emqx_durable_storage/src/emqx_ds_buffer.erl @@ -21,7 +21,7 @@ -behaviour(gen_server). %% API: --export([start_link/4, store_batch/3, shard_of_message/3]). +-export([start_link/4, store_batch/3, shard_of_operation/3]). -export([ls/0]). %% behavior callbacks: @@ -46,19 +46,18 @@ -define(cbm(DB), {?MODULE, DB}). -record(enqueue_req, { - messages :: [emqx_types:message()], + operations :: [emqx_ds:operation()], sync :: boolean(), - atomic :: boolean(), - n_messages :: non_neg_integer(), + n_operations :: non_neg_integer(), payload_bytes :: non_neg_integer() }). -callback init_buffer(emqx_ds:db(), _Shard, _Options) -> {ok, _State}. --callback flush_buffer(emqx_ds:db(), _Shard, [emqx_types:message()], State) -> +-callback flush_buffer(emqx_ds:db(), _Shard, [emqx_ds:operation()], State) -> {State, ok | {error, recoverable | unrecoverable, _}}. --callback shard_of_message(emqx_ds:db(), emqx_types:message(), topic | clientid, _Options) -> +-callback shard_of_operation(emqx_ds:db(), emqx_ds:operation(), topic | clientid, _Options) -> _Shard. %%================================================================================ @@ -77,39 +76,33 @@ start_link(CallbackModule, CallbackOptions, DB, Shard) -> ?via(DB, Shard), ?MODULE, [CallbackModule, CallbackOptions, DB, Shard], [] ). --spec store_batch(emqx_ds:db(), [emqx_types:message()], emqx_ds:message_store_opts()) -> +-spec store_batch(emqx_ds:db(), [emqx_ds:operation()], emqx_ds:message_store_opts()) -> emqx_ds:store_batch_result(). -store_batch(DB, Messages, Opts) -> +store_batch(DB, Operations, Opts) -> Sync = maps:get(sync, Opts, true), - Atomic = maps:get(atomic, Opts, false), %% Usually we expect all messages in the batch to go into the %% single shard, so this function is optimized for the happy case. - case shards_of_batch(DB, Messages) of - [{Shard, {NMsgs, NBytes}}] -> + case shards_of_batch(DB, Operations) of + [{Shard, {NOps, NBytes}}] -> %% Happy case: enqueue_call_or_cast( ?via(DB, Shard), #enqueue_req{ - messages = Messages, + operations = Operations, sync = Sync, - atomic = Atomic, - n_messages = NMsgs, + n_operations = NOps, payload_bytes = NBytes } ); - [_, _ | _] when Atomic -> - %% It's impossible to commit a batch to multiple shards - %% atomically - {error, unrecoverable, atomic_commit_to_multiple_shards}; _Shards -> %% Use a slower implementation for the unlikely case: - repackage_messages(DB, Messages, Sync) + repackage_messages(DB, Operations, Sync) end. --spec shard_of_message(emqx_ds:db(), emqx_types:message(), clientid | topic) -> _Shard. -shard_of_message(DB, Message, ShardBy) -> +-spec shard_of_operation(emqx_ds:db(), emqx_ds:operation(), clientid | topic) -> _Shard. +shard_of_operation(DB, Operation, ShardBy) -> {CBM, Options} = persistent_term:get(?cbm(DB)), - CBM:shard_of_message(DB, Message, ShardBy, Options). + CBM:shard_of_operation(DB, Operation, ShardBy, Options). %%================================================================================ %% behavior callbacks @@ -129,7 +122,7 @@ shard_of_message(DB, Message, ShardBy) -> n = 0 :: non_neg_integer(), n_bytes = 0 :: non_neg_integer(), tref :: undefined | reference(), - queue :: queue:queue(emqx_types:message()), + queue :: queue:queue(emqx_ds:operation()), pending_replies = [] :: [gen_server:from()] }). @@ -168,31 +161,29 @@ format_status(Status) -> handle_call( #enqueue_req{ - messages = Msgs, + operations = Operations, sync = Sync, - atomic = Atomic, - n_messages = NMsgs, + n_operations = NOps, payload_bytes = NBytes }, From, S0 = #s{pending_replies = Replies0} ) -> S = S0#s{pending_replies = [From | Replies0]}, - {noreply, enqueue(Sync, Atomic, Msgs, NMsgs, NBytes, S)}; + {noreply, enqueue(Sync, Operations, NOps, NBytes, S)}; handle_call(_Call, _From, S) -> {reply, {error, unknown_call}, S}. handle_cast( #enqueue_req{ - messages = Msgs, + operations = Operations, sync = Sync, - atomic = Atomic, - n_messages = NMsgs, + n_operations = NOps, payload_bytes = NBytes }, S ) -> - {noreply, enqueue(Sync, Atomic, Msgs, NMsgs, NBytes, S)}; + {noreply, enqueue(Sync, Operations, NOps, NBytes, S)}; handle_cast(_Cast, S) -> {noreply, S}. @@ -215,11 +206,10 @@ terminate(_Reason, #s{db = DB}) -> enqueue( Sync, - Atomic, - Msgs, + Ops, BatchSize, BatchBytes, - S0 = #s{n = NMsgs0, n_bytes = NBytes0, queue = Q0} + S0 = #s{n = NOps0, n_bytes = NBytes0, queue = Q0} ) -> %% At this point we don't split the batches, even when they aren't %% atomic. It wouldn't win us anything in terms of memory, and @@ -227,18 +217,18 @@ enqueue( %% granularity should be fine enough. NMax = application:get_env(emqx_durable_storage, egress_batch_size, 1000), NBytesMax = application:get_env(emqx_durable_storage, egress_batch_bytes, infinity), - NMsgs = NMsgs0 + BatchSize, + NMsgs = NOps0 + BatchSize, NBytes = NBytes0 + BatchBytes, - case (NMsgs >= NMax orelse NBytes >= NBytesMax) andalso (NMsgs0 > 0) of + case (NMsgs >= NMax orelse NBytes >= NBytesMax) andalso (NOps0 > 0) of true -> %% Adding this batch would cause buffer to overflow. Flush %% it now, and retry: S1 = flush(S0), - enqueue(Sync, Atomic, Msgs, BatchSize, BatchBytes, S1); + enqueue(Sync, Ops, BatchSize, BatchBytes, S1); false -> %% The buffer is empty, we enqueue the atomic batch in its %% entirety: - Q1 = lists:foldl(fun queue:in/2, Q0, Msgs), + Q1 = lists:foldl(fun queue:in/2, Q0, Ops), S1 = S0#s{n = NMsgs, n_bytes = NBytes, queue = Q1}, case NMsgs >= NMax orelse NBytes >= NBytesMax of true -> @@ -336,18 +326,18 @@ do_flush( } end. --spec shards_of_batch(emqx_ds:db(), [emqx_types:message()]) -> +-spec shards_of_batch(emqx_ds:db(), [emqx_ds:operation()]) -> [{_ShardId, {NMessages, NBytes}}] when NMessages :: non_neg_integer(), NBytes :: non_neg_integer(). -shards_of_batch(DB, Messages) -> +shards_of_batch(DB, Batch) -> maps:to_list( lists:foldl( - fun(Message, Acc) -> + fun(Operation, Acc) -> %% TODO: sharding strategy must be part of the DS DB schema: - Shard = shard_of_message(DB, Message, clientid), - Size = payload_size(Message), + Shard = shard_of_operation(DB, Operation, clientid), + Size = payload_size(Operation), maps:update_with( Shard, fun({N, S}) -> @@ -358,36 +348,35 @@ shards_of_batch(DB, Messages) -> ) end, #{}, - Messages + Batch ) ). -repackage_messages(DB, Messages, Sync) -> +repackage_messages(DB, Batch, Sync) -> Batches = lists:foldl( - fun(Message, Acc) -> - Shard = shard_of_message(DB, Message, clientid), - Size = payload_size(Message), + fun(Operation, Acc) -> + Shard = shard_of_operation(DB, Operation, clientid), + Size = payload_size(Operation), maps:update_with( Shard, fun({N, S, Msgs}) -> - {N + 1, S + Size, [Message | Msgs]} + {N + 1, S + Size, [Operation | Msgs]} end, - {1, Size, [Message]}, + {1, Size, [Operation]}, Acc ) end, #{}, - Messages + Batch ), maps:fold( - fun(Shard, {NMsgs, ByteSize, RevMessages}, ErrAcc) -> + fun(Shard, {NOps, ByteSize, RevOperations}, ErrAcc) -> Err = enqueue_call_or_cast( ?via(DB, Shard), #enqueue_req{ - messages = lists:reverse(RevMessages), + operations = lists:reverse(RevOperations), sync = Sync, - atomic = false, - n_messages = NMsgs, + n_operations = NOps, payload_bytes = ByteSize } ), @@ -427,4 +416,6 @@ cancel_timer(S = #s{tref = TRef}) -> %% @doc Return approximate size of the MQTT message (it doesn't take %% all things into account, for example headers and extras) payload_size(#message{payload = P, topic = T}) -> - size(P) + size(T). + size(P) + size(T); +payload_size({_OpName, _}) -> + 0. diff --git a/apps/emqx_durable_storage/test/emqx_ds_test_helpers.erl b/apps/emqx_durable_storage/test/emqx_ds_test_helpers.erl index 08c08e0c5..cbf38bb62 100644 --- a/apps/emqx_durable_storage/test/emqx_ds_test_helpers.erl +++ b/apps/emqx_durable_storage/test/emqx_ds_test_helpers.erl @@ -377,7 +377,7 @@ nodes_of_clientid(DB, ClientId, Nodes = [N0 | _]) -> shard_of_clientid(DB, Node, ClientId) -> ?ON( Node, - emqx_ds_buffer:shard_of_message(DB, #message{from = ClientId}, clientid) + emqx_ds_buffer:shard_of_operation(DB, #message{from = ClientId}, clientid) ). %% Consume eagerly: From 5356d678cc33b74f64f15a233e0c345436f693bc Mon Sep 17 00:00:00 2001 From: Andrew Mayorov Date: Tue, 30 Jul 2024 16:55:38 +0200 Subject: [PATCH 19/30] feat(dsraft): support atomic batches + preconditions --- .../src/emqx_ds_replication_layer.erl | 145 ++++++++++++++---- .../src/emqx_ds_replication_layer.hrl | 3 +- apps/emqx_durable_storage/src/emqx_ds.erl | 5 +- .../src/emqx_ds_storage_layer.erl | 65 +++++--- .../src/emqx_ds_storage_reference.erl | 54 +++++-- 5 files changed, 207 insertions(+), 65 deletions(-) diff --git a/apps/emqx_ds_builtin_raft/src/emqx_ds_replication_layer.erl b/apps/emqx_ds_builtin_raft/src/emqx_ds_replication_layer.erl index f54981884..3430a3bda 100644 --- a/apps/emqx_ds_builtin_raft/src/emqx_ds_replication_layer.erl +++ b/apps/emqx_ds_builtin_raft/src/emqx_ds_replication_layer.erl @@ -83,6 +83,7 @@ ra_state/0 ]). +-include_lib("emqx_durable_storage/include/emqx_ds.hrl"). -include_lib("emqx_utils/include/emqx_message.hrl"). -include_lib("snabbkaffe/include/trace.hrl"). -include("emqx_ds_replication_layer.hrl"). @@ -135,11 +136,12 @@ ?enc := emqx_ds_storage_layer:delete_iterator() }. -%% TODO: this type is obsolete and is kept only for compatibility with -%% BPAPIs. Remove it when emqx_ds_proto_v4 is gone (EMQX 5.6) +%% Write batch. +%% Instances of this type currently form the mojority of the Raft log. -type batch() :: #{ ?tag := ?BATCH, - ?batch_messages := [emqx_types:message()] + ?batch_operations := [emqx_ds:operation()], + ?batch_preconditions => [emqx_ds:precondition()] }. -type generation_rank() :: {shard_id(), term()}. @@ -240,16 +242,45 @@ drop_db(DB) -> _ = emqx_ds_proto_v4:drop_db(list_nodes(), DB), emqx_ds_replication_layer_meta:drop_db(DB). --spec store_batch(emqx_ds:db(), [emqx_types:message(), ...], emqx_ds:message_store_opts()) -> +-spec store_batch(emqx_ds:db(), emqx_ds:batch(), emqx_ds:message_store_opts()) -> emqx_ds:store_batch_result(). -store_batch(DB, Messages, Opts) -> +store_batch(DB, Batch = #dsbatch{preconditions = [_ | _]}, Opts) -> + %% NOTE: Atomic batch is implied, will not check with DB config. + store_batch_atomic(DB, Batch, Opts); +store_batch(DB, Batch, Opts) -> + case emqx_ds_replication_layer_meta:db_config(DB) of + #{atomic_batches := true} -> + store_batch_atomic(DB, Batch, Opts); + #{} -> + store_batch_buffered(DB, Batch, Opts) + end. + +store_batch_buffered(DB, #dsbatch{operations = Operations}, Opts) -> + store_batch_buffered(DB, Operations, Opts); +store_batch_buffered(DB, Batch, Opts) -> try - emqx_ds_buffer:store_batch(DB, Messages, Opts) + emqx_ds_buffer:store_batch(DB, Batch, Opts) catch error:{Reason, _Call} when Reason == timeout; Reason == noproc -> {error, recoverable, Reason} end. +store_batch_atomic(DB, Batch, _Opts) -> + Shards = shards_of_batch(DB, Batch), + case Shards of + [Shard] -> + case ra_store_batch(DB, Shard, Batch) of + {timeout, ServerId} -> + {error, recoverable, {timeout, ServerId}}; + Result -> + Result + end; + [] -> + ok; + [_ | _] -> + {error, unrecoverable, nonatomic_batch_spans_multiple_storages} + end. + -spec get_streams(emqx_ds:db(), emqx_ds:topic_filter(), emqx_ds:time()) -> [{emqx_ds:stream_rank(), stream()}]. get_streams(DB, TopicFilter, StartTime) -> @@ -418,6 +449,23 @@ shard_of_key(DB, Key) -> Hash = erlang:phash2(Key, N), integer_to_binary(Hash). +shards_of_batch(DB, #dsbatch{operations = Operations, preconditions = Preconditions}) -> + shards_of_batch(DB, Preconditions, shards_of_batch(DB, Operations, [])); +shards_of_batch(DB, Operations) -> + shards_of_batch(DB, Operations, []). + +shards_of_batch(DB, [Operation | Rest], Acc) -> + case shard_of_operation(DB, Operation, clientid, #{}) of + Shard when Shard =:= hd(Acc) -> + shards_of_batch(DB, Rest, Acc); + Shard when Acc =:= [] -> + shards_of_batch(DB, Rest, [Shard]); + ShardAnother -> + [ShardAnother | Acc] + end; +shards_of_batch(_DB, [], Acc) -> + Acc. + %%================================================================================ %% Internal exports (RPC targets) %%================================================================================ @@ -639,13 +687,22 @@ list_nodes() -> end ). --spec ra_store_batch(emqx_ds:db(), emqx_ds_replication_layer:shard_id(), [emqx_types:message()]) -> - ok | {timeout, _} | {error, recoverable | unrecoverable, _Err}. -ra_store_batch(DB, Shard, Messages) -> - Command = #{ - ?tag => ?BATCH, - ?batch_messages => Messages - }, +-spec ra_store_batch(emqx_ds:db(), emqx_ds_replication_layer:shard_id(), emqx_ds:batch()) -> + ok | {timeout, _} | emqx_ds:error(_). +ra_store_batch(DB, Shard, Batch) -> + case Batch of + #dsbatch{operations = Operations, preconditions = Preconditions} -> + Command = #{ + ?tag => ?BATCH, + ?batch_operations => Operations, + ?batch_preconditions => Preconditions + }; + Operations -> + Command = #{ + ?tag => ?BATCH, + ?batch_operations => Operations + } + end, Servers = emqx_ds_replication_layer_shard:servers(DB, Shard, leader_preferred), case emqx_ds_replication_layer_shard:process_command(Servers, Command, ?RA_TIMEOUT) of {ok, Result, _Leader} -> @@ -782,6 +839,7 @@ ra_drop_shard(DB, Shard) -> -define(pd_ra_idx_need_release, '$emqx_ds_raft_idx_need_release'). -define(pd_ra_bytes_need_release, '$emqx_ds_raft_bytes_need_release'). +-define(pd_ra_force_monotonic, '$emqx_ds_raft_force_monotonic'). -spec init(_Args :: map()) -> ra_state(). init(#{db := DB, shard := Shard}) -> @@ -791,18 +849,30 @@ init(#{db := DB, shard := Shard}) -> {ra_state(), _Reply, _Effects}. apply( RaftMeta, - #{ + Command = #{ ?tag := ?BATCH, - ?batch_messages := MessagesIn + ?batch_operations := OperationsIn }, #{db_shard := DBShard = {DB, Shard}, latest := Latest0} = State0 ) -> - ?tp(ds_ra_apply_batch, #{db => DB, shard => Shard, batch => MessagesIn, latest => Latest0}), - {Stats, Latest, Messages} = assign_timestamps(Latest0, MessagesIn), - Result = emqx_ds_storage_layer:store_batch(DBShard, Messages, #{durable => false}), - State = State0#{latest := Latest}, - set_ts(DBShard, Latest), - Effects = try_release_log(Stats, RaftMeta, State), + ?tp(ds_ra_apply_batch, #{db => DB, shard => Shard, batch => OperationsIn, latest => Latest0}), + Preconditions = maps:get(?batch_preconditions, Command, []), + {Stats, Latest, Operations} = assign_timestamps(DB, Latest0, OperationsIn), + %% FIXME + case emqx_ds_precondition:verify(emqx_ds_storage_layer, DBShard, Preconditions) of + ok -> + Result = emqx_ds_storage_layer:store_batch(DBShard, Operations, #{durable => false}), + State = State0#{latest := Latest}, + set_ts(DBShard, Latest), + Effects = try_release_log(Stats, RaftMeta, State); + PreconditionFailed = {precondition_failed, _} -> + Result = {error, unrecoverable, PreconditionFailed}, + State = State0, + Effects = []; + Result -> + State = State0, + Effects = [] + end, Effects =/= [] andalso ?tp(ds_ra_effects, #{effects => Effects, meta => RaftMeta}), {State, Result, Effects}; apply( @@ -877,6 +947,21 @@ apply( Effects = handle_custom_event(DBShard, Latest, CustomEvent), {State#{latest => Latest}, ok, Effects}. +assign_timestamps(DB, Latest, Messages) -> + ForceMonotonic = force_monotonic_timestamps(DB), + assign_timestamps(ForceMonotonic, Latest, Messages, [], 0, 0). + +force_monotonic_timestamps(DB) -> + case erlang:get(?pd_ra_force_monotonic) of + undefined -> + DBConfig = emqx_ds_replication_layer_meta:db_config(DB), + Flag = maps:get(force_monotonic_timestamps, DBConfig), + erlang:put(?pd_ra_force_monotonic, Flag); + Flag -> + ok + end, + Flag. + try_release_log({_N, BatchSize}, RaftMeta = #{index := CurrentIdx}, State) -> %% NOTE %% Because cursor release means storage flush (see @@ -939,10 +1024,7 @@ tick(TimeMs, #{db_shard := DBShard = {DB, Shard}, latest := Latest}) -> ?tp(emqx_ds_replication_layer_tick, #{db => DB, shard => Shard, timestamp => Timestamp}), handle_custom_event(DBShard, Timestamp, tick). -assign_timestamps(Latest, Messages) -> - assign_timestamps(Latest, Messages, [], 0, 0). - -assign_timestamps(Latest0, [Message0 | Rest], Acc, N, Sz) -> +assign_timestamps(true, Latest0, [Message0 = #message{} | Rest], Acc, N, Sz) -> case emqx_message:timestamp(Message0, microsecond) of TimestampUs when TimestampUs > Latest0 -> Latest = TimestampUs, @@ -951,8 +1033,17 @@ assign_timestamps(Latest0, [Message0 | Rest], Acc, N, Sz) -> Latest = Latest0 + 1, Message = assign_timestamp(Latest, Message0) end, - assign_timestamps(Latest, Rest, [Message | Acc], N + 1, Sz + approx_message_size(Message0)); -assign_timestamps(Latest, [], Acc, N, Size) -> + MSize = approx_message_size(Message0), + assign_timestamps(true, Latest, Rest, [Message | Acc], N + 1, Sz + MSize); +assign_timestamps(false, Latest0, [Message0 = #message{} | Rest], Acc, N, Sz) -> + Timestamp = emqx_message:timestamp(Message0), + Latest = max(Latest0, Timestamp), + Message = assign_timestamp(Timestamp, Message0), + MSize = approx_message_size(Message0), + assign_timestamps(false, Latest, Rest, [Message | Acc], N + 1, Sz + MSize); +assign_timestamps(ForceMonotonic, Latest, [Operation | Rest], Acc, N, Sz) -> + assign_timestamps(ForceMonotonic, Latest, Rest, [Operation | Acc], N + 1, Sz); +assign_timestamps(_ForceMonotonic, Latest, [], Acc, N, Size) -> {{N, Size}, Latest, lists:reverse(Acc)}. assign_timestamp(TimestampUs, Message) -> diff --git a/apps/emqx_ds_builtin_raft/src/emqx_ds_replication_layer.hrl b/apps/emqx_ds_builtin_raft/src/emqx_ds_replication_layer.hrl index f33090c46..c87e9bcba 100644 --- a/apps/emqx_ds_builtin_raft/src/emqx_ds_replication_layer.hrl +++ b/apps/emqx_ds_builtin_raft/src/emqx_ds_replication_layer.hrl @@ -19,7 +19,8 @@ -define(enc, 3). %% ?BATCH --define(batch_messages, 2). +-define(batch_operations, 2). +-define(batch_preconditions, 4). -define(timestamp, 3). %% add_generation / update_config diff --git a/apps/emqx_durable_storage/src/emqx_ds.erl b/apps/emqx_durable_storage/src/emqx_ds.erl index 69de92325..0e4336a26 100644 --- a/apps/emqx_durable_storage/src/emqx_ds.erl +++ b/apps/emqx_durable_storage/src/emqx_ds.erl @@ -56,6 +56,7 @@ topic/0, batch/0, operation/0, + deletion/0, precondition/0, stream/0, delete_stream/0, @@ -110,7 +111,9 @@ message() %% Delete a message. %% Does nothing if the message does not exist. - | {delete, message_matcher('_')}. + | deletion(). + +-type deletion() :: {delete, message_matcher('_')}. %% Precondition. %% Fails whole batch if the storage already has the matching message (`if_exists'), diff --git a/apps/emqx_durable_storage/src/emqx_ds_storage_layer.erl b/apps/emqx_durable_storage/src/emqx_ds_storage_layer.erl index d6250254d..ac2edc254 100644 --- a/apps/emqx_durable_storage/src/emqx_ds_storage_layer.erl +++ b/apps/emqx_durable_storage/src/emqx_ds_storage_layer.erl @@ -37,6 +37,9 @@ next/4, delete_next/5, + %% Preconditions + lookup_message/2, + %% Generations update_config/3, add_generation/2, @@ -74,6 +77,7 @@ batch_store_opts/0 ]). +-include("emqx_ds.hrl"). -include_lib("snabbkaffe/include/snabbkaffe.hrl"). -define(REF(ShardId), {via, gproc, {n, l, {?MODULE, ShardId}}}). @@ -115,6 +119,11 @@ -type gen_id() :: 0..16#ffff. +-type batch() :: [ + {emqx_ds:time(), emqx_types:message()} + | emqx_ds:deletion() +]. + %% Options affecting how batches should be stored. %% See also: `emqx_ds:message_store_opts()'. -type batch_store_opts() :: @@ -294,6 +303,10 @@ | {ok, end_of_stream} | emqx_ds:error(_). +%% Lookup a single message, for preconditions to work. +-callback lookup_message(shard_id(), generation_data(), emqx_ds_precondition:matcher()) -> + emqx_types:message() | not_found | emqx_ds:error(_). + -callback handle_event(shard_id(), generation_data(), emqx_ds:time(), CustomEvent | tick) -> [CustomEvent]. @@ -317,14 +330,10 @@ drop_shard(Shard) -> %% @doc This is a convenicence wrapper that combines `prepare' and %% `commit' operations. --spec store_batch( - shard_id(), - [{emqx_ds:time(), emqx_types:message()}], - batch_store_opts() -) -> +-spec store_batch(shard_id(), batch(), batch_store_opts()) -> emqx_ds:store_batch_result(). -store_batch(Shard, Messages, Options) -> - case prepare_batch(Shard, Messages, #{}) of +store_batch(Shard, Batch, Options) -> + case prepare_batch(Shard, Batch, #{}) of {ok, CookedBatch} -> commit_batch(Shard, CookedBatch, Options); ignore -> @@ -342,23 +351,21 @@ store_batch(Shard, Messages, Options) -> %% %% The underlying storage layout MAY use timestamp as a unique message %% ID. --spec prepare_batch( - shard_id(), - [{emqx_ds:time(), emqx_types:message()}], - batch_prepare_opts() -) -> {ok, cooked_batch()} | ignore | emqx_ds:error(_). -prepare_batch(Shard, Messages = [{Time, _} | _], Options) -> +-spec prepare_batch(shard_id(), batch(), batch_prepare_opts()) -> + {ok, cooked_batch()} | ignore | emqx_ds:error(_). +prepare_batch(Shard, Batch, Options) -> %% NOTE %% We assume that batches do not span generations. Callers should enforce this. ?tp(emqx_ds_storage_layer_prepare_batch, #{ - shard => Shard, messages => Messages, options => Options + shard => Shard, batch => Batch, options => Options }), %% FIXME: always store messages in the current generation - case generation_at(Shard, Time) of + Time = batch_starts_at(Batch), + case is_integer(Time) andalso generation_at(Shard, Time) of {GenId, #{module := Mod, data := GenData}} -> T0 = erlang:monotonic_time(microsecond), Result = - case Mod:prepare_batch(Shard, GenData, Messages, Options) of + case Mod:prepare_batch(Shard, GenData, Batch, Options) of {ok, CookedBatch} -> {ok, #{?tag => ?COOKED_BATCH, ?generation => GenId, ?enc => CookedBatch}}; Error = {error, _, _} -> @@ -368,11 +375,21 @@ prepare_batch(Shard, Messages = [{Time, _} | _], Options) -> %% TODO store->prepare emqx_ds_builtin_metrics:observe_store_batch_time(Shard, T1 - T0), Result; + false -> + %% No write operations in this batch. + ignore; not_found -> + %% Generation is likely already GCed. ignore - end; -prepare_batch(_Shard, [], _Options) -> - ignore. + end. + +-spec batch_starts_at(batch()) -> emqx_ds:time() | undefined. +batch_starts_at([{Time, _Message} | _]) when is_integer(Time) -> + Time; +batch_starts_at([{delete, #message_matcher{timestamp = Time}} | _]) -> + Time; +batch_starts_at([]) -> + undefined. %% @doc Commit cooked batch to the storage. %% @@ -559,6 +576,16 @@ update_config(ShardId, Since, Options) -> add_generation(ShardId, Since) -> gen_server:call(?REF(ShardId), #call_add_generation{since = Since}, infinity). +-spec lookup_message(shard_id(), emqx_ds_precondition:matcher()) -> + emqx_types:message() | not_found | emqx_ds:error(_). +lookup_message(ShardId, Matcher = #message_matcher{timestamp = Time}) -> + case generation_at(ShardId, Time) of + {_GenId, #{module := Mod, data := GenData}} -> + Mod:lookup_message(ShardId, GenData, Matcher); + not_found -> + not_found + end. + -spec list_generations_with_lifetimes(shard_id()) -> #{ gen_id() => #{ diff --git a/apps/emqx_durable_storage/src/emqx_ds_storage_reference.erl b/apps/emqx_durable_storage/src/emqx_ds_storage_reference.erl index cfd6f30ac..869602fcd 100644 --- a/apps/emqx_durable_storage/src/emqx_ds_storage_reference.erl +++ b/apps/emqx_durable_storage/src/emqx_ds_storage_reference.erl @@ -21,6 +21,8 @@ %% used for testing. -module(emqx_ds_storage_reference). +-include("emqx_ds.hrl"). + -behaviour(emqx_ds_storage_layer). %% API: @@ -39,7 +41,8 @@ make_delete_iterator/5, update_iterator/4, next/6, - delete_next/7 + delete_next/7, + lookup_message/3 ]). %% internal exports: @@ -49,6 +52,8 @@ -include_lib("emqx_utils/include/emqx_message.hrl"). +-define(DB_KEY(TIMESTAMP), <>). + %%================================================================================ %% Type declarations %%================================================================================ @@ -102,23 +107,22 @@ drop(_ShardId, DBHandle, _GenId, _CFRefs, #s{cf = CFHandle}) -> ok = rocksdb:drop_column_family(DBHandle, CFHandle), ok. -prepare_batch(_ShardId, _Data, Messages, _Options) -> - {ok, Messages}. +prepare_batch(_ShardId, _Data, Batch, _Options) -> + {ok, Batch}. -commit_batch(_ShardId, #s{db = DB, cf = CF}, Messages, Options) -> - {ok, Batch} = rocksdb:batch(), - lists:foreach( - fun({TS, Msg}) -> - Key = <>, - Val = term_to_binary(Msg), - rocksdb:batch_put(Batch, CF, Key, Val) - end, - Messages - ), - Res = rocksdb:write_batch(DB, Batch, write_batch_opts(Options)), - rocksdb:release_batch(Batch), +commit_batch(_ShardId, S = #s{db = DB}, Batch, Options) -> + {ok, BatchHandle} = rocksdb:batch(), + lists:foreach(fun(Op) -> process_batch_operation(S, Op, BatchHandle) end, Batch), + Res = rocksdb:write_batch(DB, BatchHandle, write_batch_opts(Options)), + rocksdb:release_batch(BatchHandle), Res. +process_batch_operation(S, {TS, Msg = #message{}}, BatchHandle) -> + Val = encode_message(Msg), + rocksdb:batch_put(BatchHandle, S#s.cf, ?DB_KEY(TS), Val); +process_batch_operation(S, {delete, #message_matcher{timestamp = TS}}, BatchHandle) -> + rocksdb:batch_delete(BatchHandle, S#s.cf, ?DB_KEY(TS)). + get_streams(_Shard, _Data, _TopicFilter, _StartTime) -> [#stream{}]. @@ -205,6 +209,16 @@ delete_next(_Shard, #s{db = DB, cf = CF}, It0, Selector, BatchSize, _Now, IsCurr {ok, It, NumDeleted, NumIterated} end. +lookup_message(_ShardId, #s{db = DB, cf = CF}, #message_matcher{timestamp = TS}) -> + case rocksdb:get(DB, CF, ?DB_KEY(TS), _ReadOpts = []) of + {ok, Val} -> + decode_message(Val); + not_found -> + not_found; + {error, Reason} -> + {error, unrecoverable, Reason} + end. + %%================================================================================ %% Internal functions %%================================================================================ @@ -214,7 +228,7 @@ do_next(_, _, _, _, 0, Key, Acc) -> do_next(TopicFilter, StartTime, IT, Action, NLeft, Key0, Acc) -> case rocksdb:iterator_move(IT, Action) of {ok, Key = <>, Blob} -> - Msg = #message{topic = Topic} = binary_to_term(Blob), + Msg = #message{topic = Topic} = decode_message(Blob), TopicWords = emqx_topic:words(Topic), case emqx_topic:match(TopicWords, TopicFilter) andalso TS >= StartTime of true -> @@ -234,7 +248,7 @@ do_delete_next( ) -> case rocksdb:iterator_move(IT, Action) of {ok, Key, Blob} -> - Msg = #message{topic = Topic, timestamp = TS} = binary_to_term(Blob), + Msg = #message{topic = Topic, timestamp = TS} = decode_message(Blob), TopicWords = emqx_topic:words(Topic), case emqx_topic:match(TopicWords, TopicFilter) andalso TS >= StartTime of true -> @@ -285,6 +299,12 @@ do_delete_next( {Key0, {AccDel, AccIter}} end. +encode_message(Msg) -> + term_to_binary(Msg). + +decode_message(Val) -> + binary_to_term(Val). + %% @doc Generate a column family ID for the MQTT messages -spec data_cf(emqx_ds_storage_layer:gen_id()) -> [char()]. data_cf(GenId) -> From 68990f1538dc0b450b6e63fb8a412498fe21e935 Mon Sep 17 00:00:00 2001 From: Andrew Mayorov Date: Tue, 30 Jul 2024 16:56:29 +0200 Subject: [PATCH 20/30] feat(ds): support operations + preconditions in skipstream-lts --- .../src/emqx_ds_storage_skipstream_lts.erl | 104 ++++++++++++++---- 1 file changed, 82 insertions(+), 22 deletions(-) diff --git a/apps/emqx_durable_storage/src/emqx_ds_storage_skipstream_lts.erl b/apps/emqx_durable_storage/src/emqx_ds_storage_skipstream_lts.erl index cb87b8a6f..b466983b7 100644 --- a/apps/emqx_durable_storage/src/emqx_ds_storage_skipstream_lts.erl +++ b/apps/emqx_durable_storage/src/emqx_ds_storage_skipstream_lts.erl @@ -33,7 +33,8 @@ make_delete_iterator/5, update_iterator/4, next/6, - delete_next/7 + delete_next/7, + lookup_message/3 ]). %% internal exports: @@ -43,6 +44,7 @@ -include_lib("emqx_utils/include/emqx_message.hrl"). -include_lib("snabbkaffe/include/trace.hrl"). +-include("emqx_ds.hrl"). -include("emqx_ds_metrics.hrl"). -ifdef(TEST). @@ -56,11 +58,12 @@ %%================================================================================ %% TLOG entry -%% keys: --define(cooked_payloads, 6). +%% Keys: +-define(cooked_msg_ops, 6). -define(cooked_lts_ops, 7). %% Payload: --define(cooked_payload(TIMESTAMP, STATIC, VARYING, VALUE), +-define(cooked_delete, 100). +-define(cooked_msg_op(TIMESTAMP, STATIC, VARYING, VALUE), {TIMESTAMP, STATIC, VARYING, VALUE} ). @@ -176,25 +179,39 @@ drop(_ShardId, DBHandle, _GenId, _CFRefs, #s{data_cf = DataCF, trie_cf = TrieCF, ok = rocksdb:drop_column_family(DBHandle, TrieCF), ok. -prepare_batch(_ShardId, S = #s{trie = Trie}, Messages, _Options) -> +prepare_batch( + _ShardId, + S = #s{trie = Trie}, + Operations, + _Options +) -> _ = erase(?lts_persist_ops), - Payloads = [ - begin - Tokens = words(Topic), - {Static, Varying} = emqx_ds_lts:topic_key(Trie, fun threshold_fun/1, Tokens), - ?cooked_payload(Timestamp, Static, Varying, serialize(S, Varying, Msg)) - end - || {Timestamp, Msg = #message{topic = Topic}} <- Messages - ], + OperationsCooked = emqx_utils:flattermap( + fun + ({Timestamp, Msg = #message{topic = Topic}}) -> + Tokens = words(Topic), + {Static, Varying} = emqx_ds_lts:topic_key(Trie, fun threshold_fun/1, Tokens), + ?cooked_msg_op(Timestamp, Static, Varying, serialize(S, Varying, Msg)); + ({delete, #message_matcher{topic = Topic, timestamp = Timestamp}}) -> + case emqx_ds_lts:lookup_topic_key(Trie, words(Topic)) of + {ok, {Static, Varying}} -> + ?cooked_msg_op(Timestamp, Static, Varying, ?cooked_delete); + undefined -> + %% Topic is unknown, nothing to delete. + [] + end + end, + Operations + ), {ok, #{ - ?cooked_payloads => Payloads, + ?cooked_msg_ops => OperationsCooked, ?cooked_lts_ops => pop_lts_persist_ops() }}. commit_batch( _ShardId, #s{db = DB, trie_cf = TrieCF, data_cf = DataCF, trie = Trie, hash_bytes = HashBytes}, - #{?cooked_lts_ops := LtsOps, ?cooked_payloads := Payloads}, + #{?cooked_lts_ops := LtsOps, ?cooked_msg_ops := Operations}, Options ) -> {ok, Batch} = rocksdb:batch(), @@ -210,12 +227,17 @@ commit_batch( _ = emqx_ds_lts:trie_update(Trie, LtsOps), %% Commit payloads: lists:foreach( - fun(?cooked_payload(Timestamp, Static, Varying, ValBlob)) -> - MasterKey = mk_key(Static, 0, <<>>, Timestamp), - ok = rocksdb:batch_put(Batch, DataCF, MasterKey, ValBlob), - mk_index(Batch, DataCF, HashBytes, Static, Varying, Timestamp) + fun + (?cooked_msg_op(Timestamp, Static, Varying, ValBlob = <<_/bytes>>)) -> + MasterKey = mk_key(Static, 0, <<>>, Timestamp), + ok = rocksdb:batch_put(Batch, DataCF, MasterKey, ValBlob), + mk_index(Batch, DataCF, HashBytes, Static, Varying, Timestamp); + (?cooked_msg_op(Timestamp, Static, Varying, ?cooked_delete)) -> + MasterKey = mk_key(Static, 0, <<>>, Timestamp), + ok = rocksdb:batch_delete(Batch, DataCF, MasterKey), + delete_index(Batch, DataCF, HashBytes, Static, Varying, Timestamp) end, - Payloads + Operations ), Result = rocksdb:write_batch(DB, Batch, [ {disable_wal, not maps:get(durable, Options, true)} @@ -285,6 +307,28 @@ delete_next(Shard, S, It0, Selector, BatchSize, Now, IsCurrent) -> Ret end. +lookup_message( + Shard, + S = #s{db = DB, data_cf = CF, trie = Trie}, + #message_matcher{topic = Topic, timestamp = Timestamp} +) -> + case emqx_ds_lts:lookup_topic_key(Trie, words(Topic)) of + {ok, {StaticIdx, _Varying}} -> + DSKey = mk_key(StaticIdx, 0, <<>>, Timestamp), + case rocksdb:get(DB, CF, DSKey, _ReadOpts = []) of + {ok, Val} -> + {ok, TopicStructure} = emqx_ds_lts:reverse_lookup(Trie, StaticIdx), + Msg = deserialize(S, Val), + enrich(Shard, S, TopicStructure, DSKey, Msg); + not_found -> + not_found; + {error, Reason} -> + {error, unrecoverable, Reason} + end; + undefined -> + not_found + end. + %%================================================================================ %% Internal exports %%================================================================================ @@ -330,12 +374,18 @@ serialize(#s{serialization_schema = SSchema, with_guid = WithGuid}, Varying, Msg }, emqx_ds_msg_serializer:serialize(SSchema, Msg). +enrich(#ctx{shard = Shard, s = S, topic_structure = TopicStructure}, DSKey, Msg0) -> + enrich(Shard, S, TopicStructure, DSKey, Msg0). + enrich( - #ctx{shard = Shard, topic_structure = Structure, s = #s{with_guid = WithGuid}}, + Shard, + #s{with_guid = WithGuid}, + TopicStructure, DSKey, Msg0 ) -> - Topic = emqx_topic:join(emqx_ds_lts:decompress_topic(Structure, words(Msg0#message.topic))), + Tokens = words(Msg0#message.topic), + Topic = emqx_topic:join(emqx_ds_lts:decompress_topic(TopicStructure, Tokens)), Msg0#message{ topic = Topic, id = @@ -584,6 +634,16 @@ mk_index(Batch, CF, HashBytes, Static, Timestamp, N, [TopicLevel | Varying]) -> mk_index(_Batch, _CF, _HashBytes, _Static, _Timestamp, _N, []) -> ok. +delete_index(Batch, CF, HashBytes, Static, Varying, Timestamp) -> + delete_index(Batch, CF, HashBytes, Static, Timestamp, 1, Varying). + +delete_index(Batch, CF, HashBytes, Static, Timestamp, N, [TopicLevel | Varying]) -> + Key = mk_key(Static, N, hash(HashBytes, TopicLevel), Timestamp), + ok = rocksdb:batch_delete(Batch, CF, Key), + delete_index(Batch, CF, HashBytes, Static, Timestamp, N + 1, Varying); +delete_index(_Batch, _CF, _HashBytes, _Static, _Timestamp, _N, []) -> + ok. + %%%%%%%% Keys %%%%%%%%%% get_key_range(StaticIdx, WildcardIdx, Hash) -> From 1559aac48606c2ed33623b5210b290c3d32b049a Mon Sep 17 00:00:00 2001 From: Andrew Mayorov Date: Tue, 30 Jul 2024 16:57:21 +0200 Subject: [PATCH 21/30] test(dsbackend): add shared tests for atomic batches + preconditions --- .../test/emqx_ds_backends_SUITE.erl | 164 ++++++++++++++++-- 1 file changed, 150 insertions(+), 14 deletions(-) diff --git a/apps/emqx_ds_backends/test/emqx_ds_backends_SUITE.erl b/apps/emqx_ds_backends/test/emqx_ds_backends_SUITE.erl index 1fc1594fc..e70d2b682 100644 --- a/apps/emqx_ds_backends/test/emqx_ds_backends_SUITE.erl +++ b/apps/emqx_ds_backends/test/emqx_ds_backends_SUITE.erl @@ -19,6 +19,7 @@ -compile(nowarn_export_all). -include("../../emqx/include/emqx.hrl"). +-include("../../emqx_durable_storage/include/emqx_ds.hrl"). -include_lib("common_test/include/ct.hrl"). -include_lib("stdlib/include/assert.hrl"). -include("../../emqx/include/asserts.hrl"). @@ -145,7 +146,7 @@ t_06_smoke_add_generation(Config) -> ?assertMatch(ok, emqx_ds:add_generation(DB)), [ {Gen1, #{created_at := Created1, since := Since1, until := Until1}}, - {Gen2, #{created_at := Created2, since := Since2, until := undefined}} + {_Gen2, #{created_at := Created2, since := Since2, until := undefined}} ] = maps:to_list(emqx_ds:list_generations_with_lifetimes(DB)), %% Check units of the return values (+/- 10s from test begin time): ?give_or_take(BeginTime, 10_000, Created1), @@ -234,8 +235,8 @@ t_09_atomic_store_batch(Config) -> DB = ?FUNCTION_NAME, ?check_trace( begin - application:set_env(emqx_durable_storage, egress_batch_size, 1), - ?assertMatch(ok, emqx_ds:open_db(DB, opts(Config))), + DBOpts = (opts(Config))#{atomic_batches => true}, + ?assertMatch(ok, emqx_ds:open_db(DB, DBOpts)), Msgs = [ message(<<"1">>, <<"1">>, 0), message(<<"2">>, <<"2">>, 1), @@ -243,13 +244,8 @@ t_09_atomic_store_batch(Config) -> ], ?assertEqual( ok, - emqx_ds:store_batch(DB, Msgs, #{ - atomic => true, - sync => true - }) - ), - {ok, Flush} = ?block_until(#{?snk_kind := emqx_ds_buffer_flush}), - ?assertMatch(#{batch := [_, _, _]}, Flush) + emqx_ds:store_batch(DB, Msgs, #{sync => true}) + ) end, [] ), @@ -289,6 +285,124 @@ t_10_non_atomic_store_batch(Config) -> ), ok. +t_11_batch_preconditions(Config) -> + DB = ?FUNCTION_NAME, + ?check_trace( + begin + DBOpts = (opts(Config))#{ + atomic_batches => true, + force_monotonic_timestamps => false + }, + ?assertMatch(ok, emqx_ds:open_db(DB, DBOpts)), + + %% Conditional delete + TS = 42, + Batch1 = #dsbatch{ + preconditions = [{if_exists, matcher(<<"c1">>, <<"t/a">>, '_', TS)}], + operations = [{delete, matcher(<<"c1">>, <<"t/a">>, '_', TS)}] + }, + %% Conditional insert + M1 = message(<<"c1">>, <<"t/a">>, <<"M1">>, TS), + Batch2 = #dsbatch{ + preconditions = [{unless_exists, matcher(<<"c1">>, <<"t/a">>, '_', TS)}], + operations = [M1] + }, + + %% No such message yet, precondition fails: + ?assertEqual( + {error, unrecoverable, {precondition_failed, not_found}}, + emqx_ds:store_batch(DB, Batch1) + ), + %% No such message yet, `unless` precondition holds: + ?assertEqual( + ok, + emqx_ds:store_batch(DB, Batch2) + ), + %% Now there's such message, `unless` precondition now fails: + ?assertEqual( + {error, unrecoverable, {precondition_failed, M1}}, + emqx_ds:store_batch(DB, Batch2) + ), + %% On the other hand, `if` precondition now holds: + ?assertEqual( + ok, + emqx_ds:store_batch(DB, Batch1) + ) + end, + fun(_Trace) -> + %% Wait at least until current epoch ends. + ct:sleep(1000), + %% There's no messages in the DB. + ?assertEqual( + [], + emqx_ds_test_helpers:consume(DB, emqx_topic:words(<<"t/#">>)) + ) + end + ). + +t_12_batch_precondition_conflicts(Config) -> + DB = ?FUNCTION_NAME, + NBatches = 50, + NMessages = 10, + ?check_trace( + begin + DBOpts = (opts(Config))#{ + atomic_batches => true, + force_monotonic_timestamps => false + }, + ?assertMatch(ok, emqx_ds:open_db(DB, DBOpts)), + + ConflictBatches = [ + #dsbatch{ + %% If the slot is free... + preconditions = [{if_exists, matcher(<<"c1">>, <<"t/slot">>, _Free = <<>>, 0)}], + %% Take it and write NMessages extra messages, so that batches take longer to + %% process and have higher chances to conflict with each other. + operations = + [ + message(<<"c1">>, <<"t/slot">>, integer_to_binary(I), _TS = 0) + | [ + message(<<"c1">>, {"t/owner/~p/~p", [I, J]}, <<>>, I * 100 + J) + || J <- lists:seq(1, NMessages) + ] + ] + } + || I <- lists:seq(1, NBatches) + ], + + %% Run those batches concurrently. + ok = emqx_ds:store_batch(DB, [message(<<"c1">>, <<"t/slot">>, <<>>, 0)]), + Results = emqx_utils:pmap( + fun(B) -> emqx_ds:store_batch(DB, B) end, + ConflictBatches, + infinity + ), + + %% Only one should have succeeded. + ?assertEqual([ok], [Ok || Ok = ok <- Results]), + + %% While other failed with an identical `precondition_failed`. + Failures = lists:usort([PreconditionFailed || {error, _, PreconditionFailed} <- Results]), + ?assertMatch( + [{precondition_failed, #message{topic = <<"t/slot">>, payload = <<_/bytes>>}}], + Failures + ), + + %% Wait at least until current epoch ends. + ct:sleep(1000), + %% Storage should contain single batch's messages. + [{precondition_failed, #message{payload = IOwner}}] = Failures, + WinnerBatch = lists:nth(binary_to_integer(IOwner), ConflictBatches), + BatchMessages = lists:sort(WinnerBatch#dsbatch.operations), + DBMessages = emqx_ds_test_helpers:consume(DB, emqx_topic:words(<<"t/#">>)), + ?assertEqual( + BatchMessages, + DBMessages + ) + end, + [] + ). + t_smoke_delete_next(Config) -> DB = ?FUNCTION_NAME, ?check_trace( @@ -534,12 +648,25 @@ message(ClientId, Topic, Payload, PublishedAt) -> message(Topic, Payload, PublishedAt) -> #message{ - topic = Topic, - payload = Payload, + topic = try_format(Topic), + payload = try_format(Payload), timestamp = PublishedAt, id = emqx_guid:gen() }. +matcher(ClientID, Topic, Payload, Timestamp) -> + #message_matcher{ + from = ClientID, + topic = try_format(Topic), + timestamp = Timestamp, + payload = Payload + }. + +try_format({Fmt, Args}) -> + emqx_utils:format(Fmt, Args); +try_format(String) -> + String. + delete(DB, It, Selector, BatchSize) -> delete(DB, It, Selector, BatchSize, 0). @@ -562,9 +689,18 @@ all() -> groups() -> TCs = emqx_common_test_helpers:all(?MODULE), + %% TODO: Remove once builtin-local supports preconditions + atomic batches. + BuiltinLocalTCs = + TCs -- + [ + t_09_atomic_store_batch, + t_11_batch_preconditions, + t_12_batch_precondition_conflicts + ], + BuiltinRaftTCs = TCs, [ - {builtin_local, TCs}, - {builtin_raft, TCs} + {builtin_local, BuiltinLocalTCs}, + {builtin_raft, BuiltinRaftTCs} ]. init_per_group(builtin_local, Config) -> From 109ffe7a70fdee8d780a999c9db14c115ac3af25 Mon Sep 17 00:00:00 2001 From: Andrew Mayorov Date: Wed, 31 Jul 2024 18:41:28 +0200 Subject: [PATCH 22/30] fix(dsbackend): unify timestamp resolution in operations / preconditions --- .../src/emqx_ds_builtin_local.erl | 18 ++++++++++++------ .../src/emqx_ds_replication_layer.erl | 6 +++--- 2 files changed, 15 insertions(+), 9 deletions(-) diff --git a/apps/emqx_ds_builtin_local/src/emqx_ds_builtin_local.erl b/apps/emqx_ds_builtin_local/src/emqx_ds_builtin_local.erl index 29fc98083..e55f4807e 100644 --- a/apps/emqx_ds_builtin_local/src/emqx_ds_builtin_local.erl +++ b/apps/emqx_ds_builtin_local/src/emqx_ds_builtin_local.erl @@ -231,9 +231,9 @@ flush_buffer(DB, Shard, Messages, S0 = #bs{options = Options}) -> make_batch(_ForceMonotonic = true, Latest, Messages) -> assign_monotonic_timestamps(Latest, Messages, []); make_batch(false, Latest, Messages) -> - assign_message_timestamps(Latest, Messages, []). + assign_operation_timestamps(Latest, Messages, []). -assign_monotonic_timestamps(Latest0, [Message | Rest], Acc0) -> +assign_monotonic_timestamps(Latest0, [Message = #message{} | Rest], Acc0) -> case emqx_message:timestamp(Message, microsecond) of TimestampUs when TimestampUs > Latest0 -> Latest = TimestampUs; @@ -242,15 +242,21 @@ assign_monotonic_timestamps(Latest0, [Message | Rest], Acc0) -> end, Acc = [assign_timestamp(Latest, Message) | Acc0], assign_monotonic_timestamps(Latest, Rest, Acc); +assign_monotonic_timestamps(Latest, [Operation | Rest], Acc0) -> + Acc = [Operation | Acc0], + assign_monotonic_timestamps(Latest, Rest, Acc); assign_monotonic_timestamps(Latest, [], Acc) -> {Latest, lists:reverse(Acc)}. -assign_message_timestamps(Latest0, [Message | Rest], Acc0) -> - TimestampUs = emqx_message:timestamp(Message, microsecond), +assign_operation_timestamps(Latest0, [Message = #message{} | Rest], Acc0) -> + TimestampUs = emqx_message:timestamp(Message), Latest = max(TimestampUs, Latest0), Acc = [assign_timestamp(TimestampUs, Message) | Acc0], - assign_message_timestamps(Latest, Rest, Acc); -assign_message_timestamps(Latest, [], Acc) -> + assign_operation_timestamps(Latest, Rest, Acc); +assign_operation_timestamps(Latest, [Operation | Rest], Acc0) -> + Acc = [Operation | Acc0], + assign_operation_timestamps(Latest, Rest, Acc); +assign_operation_timestamps(Latest, [], Acc) -> {Latest, lists:reverse(Acc)}. assign_timestamp(TimestampUs, Message) -> diff --git a/apps/emqx_ds_builtin_raft/src/emqx_ds_replication_layer.erl b/apps/emqx_ds_builtin_raft/src/emqx_ds_replication_layer.erl index 3430a3bda..6acbf94ca 100644 --- a/apps/emqx_ds_builtin_raft/src/emqx_ds_replication_layer.erl +++ b/apps/emqx_ds_builtin_raft/src/emqx_ds_replication_layer.erl @@ -1036,9 +1036,9 @@ assign_timestamps(true, Latest0, [Message0 = #message{} | Rest], Acc, N, Sz) -> MSize = approx_message_size(Message0), assign_timestamps(true, Latest, Rest, [Message | Acc], N + 1, Sz + MSize); assign_timestamps(false, Latest0, [Message0 = #message{} | Rest], Acc, N, Sz) -> - Timestamp = emqx_message:timestamp(Message0), - Latest = max(Latest0, Timestamp), - Message = assign_timestamp(Timestamp, Message0), + TimestampUs = emqx_message:timestamp(Message0), + Latest = max(Latest0, TimestampUs), + Message = assign_timestamp(TimestampUs, Message0), MSize = approx_message_size(Message0), assign_timestamps(false, Latest, Rest, [Message | Acc], N + 1, Sz + MSize); assign_timestamps(ForceMonotonic, Latest, [Operation | Rest], Acc, N, Sz) -> From 9f96e0957ef6468184120d110b26f3f05b80e78e Mon Sep 17 00:00:00 2001 From: Andrew Mayorov Date: Wed, 31 Jul 2024 18:43:39 +0200 Subject: [PATCH 23/30] test(ds): verify deletions work predictably --- .../test/emqx_ds_storage_layout_SUITE.erl | 112 ++++++++---------- 1 file changed, 47 insertions(+), 65 deletions(-) diff --git a/apps/emqx_durable_storage/test/emqx_ds_storage_layout_SUITE.erl b/apps/emqx_durable_storage/test/emqx_ds_storage_layout_SUITE.erl index e0531dad0..e0fea7875 100644 --- a/apps/emqx_durable_storage/test/emqx_ds_storage_layout_SUITE.erl +++ b/apps/emqx_durable_storage/test/emqx_ds_storage_layout_SUITE.erl @@ -18,11 +18,14 @@ -compile(export_all). -compile(nowarn_export_all). +-include("emqx_ds.hrl"). -include("../../emqx/include/emqx.hrl"). -include_lib("common_test/include/ct.hrl"). -include_lib("snabbkaffe/include/snabbkaffe.hrl"). -include_lib("stdlib/include/assert.hrl"). +-define(assertSameSet(A, B), ?assertEqual(lists:sort(A), lists:sort(B))). + -define(FUTURE, (1 bsl 64 - 1)). -define(SHARD, shard(?FUNCTION_NAME)). @@ -66,6 +69,30 @@ t_store(_Config) -> }, ?assertMatch(ok, emqx_ds:store_batch(?FUNCTION_NAME, [Msg])). +%% Smoke test of applying batch operations +t_operations(db_config, _Config) -> + #{force_monotonic_timestamps => false}. + +t_operations(_Config) -> + Batch1 = [ + make_message(100, <<"t/1">>, <<"M1">>), + make_message(200, <<"t/2">>, <<"M2">>), + make_message(300, <<"t/3">>, <<"M3">>) + ], + Batch2 = [ + make_deletion(200, <<"t/2">>, <<"M2">>), + make_deletion(300, <<"t/3">>, '_'), + make_deletion(400, <<"t/4">>, '_') + ], + ?assertEqual(ok, emqx_ds:store_batch(?FUNCTION_NAME, Batch1)), + ?assertEqual(ok, emqx_ds:store_batch(?FUNCTION_NAME, Batch2)), + ?assertMatch( + [ + #message{timestamp = 100, topic = <<"t/1">>, payload = <<"M1">>} + ], + dump_messages(?SHARD, <<"t/#">>, 0) + ). + %% Smoke test for iteration through a concrete topic t_iterate(_Config) -> %% Prepare data: @@ -124,8 +151,6 @@ t_delete(_Config) -> ?assertNot(is_map_key(TopicToDelete, MessagesByTopic), #{msgs => MessagesByTopic}), ?assertEqual(20, length(Messages)). --define(assertSameSet(A, B), ?assertEqual(lists:sort(A), lists:sort(B))). - %% Smoke test that verifies that concrete topics are mapped to %% individual streams, unless there's too many of them. t_get_streams(Config) -> @@ -417,79 +442,26 @@ dump_stream(Shard, Stream, TopicFilter, StartTime) -> %% || Topic <- Topics, PublishedAt <- Timestamps %% ]. -%% t_iterate_multigen(_Config) -> -%% {ok, 1} = emqx_ds_storage_layer:create_generation(?SHARD, 10, ?COMPACT_CONFIG), -%% {ok, 2} = emqx_ds_storage_layer:create_generation(?SHARD, 50, ?DEFAULT_CONFIG), -%% {ok, 3} = emqx_ds_storage_layer:create_generation(?SHARD, 1000, ?DEFAULT_CONFIG), -%% Topics = ["foo/bar", "foo/bar/baz", "a", "a/bar"], -%% Timestamps = lists:seq(1, 100), -%% _ = [ -%% store(?SHARD, PublishedAt, Topic, term_to_binary({Topic, PublishedAt})) -%% || Topic <- Topics, PublishedAt <- Timestamps -%% ], -%% ?assertEqual( -%% lists:sort([ -%% {Topic, PublishedAt} -%% || Topic <- ["foo/bar", "foo/bar/baz"], PublishedAt <- Timestamps -%% ]), -%% lists:sort([binary_to_term(Payload) || Payload <- iterate(?SHARD, "foo/#", 0)]) -%% ), -%% ?assertEqual( -%% lists:sort([ -%% {Topic, PublishedAt} -%% || Topic <- ["a", "a/bar"], PublishedAt <- lists:seq(60, 100) -%% ]), -%% lists:sort([binary_to_term(Payload) || Payload <- iterate(?SHARD, "a/#", 60)]) -%% ). - -%% t_iterate_multigen_preserve_restore(_Config) -> -%% ReplayID = atom_to_binary(?FUNCTION_NAME), -%% {ok, 1} = emqx_ds_storage_layer:create_generation(?SHARD, 10, ?COMPACT_CONFIG), -%% {ok, 2} = emqx_ds_storage_layer:create_generation(?SHARD, 50, ?DEFAULT_CONFIG), -%% {ok, 3} = emqx_ds_storage_layer:create_generation(?SHARD, 100, ?DEFAULT_CONFIG), -%% Topics = ["foo/bar", "foo/bar/baz", "a/bar"], -%% Timestamps = lists:seq(1, 100), -%% TopicFilter = "foo/#", -%% TopicsMatching = ["foo/bar", "foo/bar/baz"], -%% _ = [ -%% store(?SHARD, TS, Topic, term_to_binary({Topic, TS})) -%% || Topic <- Topics, TS <- Timestamps -%% ], -%% It0 = iterator(?SHARD, TopicFilter, 0), -%% {It1, Res10} = iterate(It0, 10), -%% % preserve mid-generation -%% ok = emqx_ds_storage_layer:preserve_iterator(It1, ReplayID), -%% {ok, It2} = emqx_ds_storage_layer:restore_iterator(?SHARD, ReplayID), -%% {It3, Res100} = iterate(It2, 88), -%% % preserve on the generation boundary -%% ok = emqx_ds_storage_layer:preserve_iterator(It3, ReplayID), -%% {ok, It4} = emqx_ds_storage_layer:restore_iterator(?SHARD, ReplayID), -%% {It5, Res200} = iterate(It4, 1000), -%% ?assertEqual({end_of_stream, []}, iterate(It5, 1)), -%% ?assertEqual( -%% lists:sort([{Topic, TS} || Topic <- TopicsMatching, TS <- Timestamps]), -%% lists:sort([binary_to_term(Payload) || Payload <- Res10 ++ Res100 ++ Res200]) -%% ), -%% ?assertEqual( -%% ok, -%% emqx_ds_storage_layer:discard_iterator(?SHARD, ReplayID) -%% ), -%% ?assertEqual( -%% {error, not_found}, -%% emqx_ds_storage_layer:restore_iterator(?SHARD, ReplayID) -%% ). - make_message(PublishedAt, Topic, Payload) when is_list(Topic) -> make_message(PublishedAt, list_to_binary(Topic), Payload); make_message(PublishedAt, Topic, Payload) when is_binary(Topic) -> ID = emqx_guid:gen(), #message{ id = ID, + from = <>, topic = Topic, timestamp = PublishedAt, payload = Payload }. +make_deletion(Timestamp, Topic, Payload) -> + {delete, #message_matcher{ + from = <>, + topic = Topic, + timestamp = Timestamp, + payload = Payload + }}. + make_topic(Tokens = [_ | _]) -> emqx_topic:join([bin(T) || T <- Tokens]). @@ -535,13 +507,23 @@ end_per_suite(Config) -> ok. init_per_testcase(TC, Config) -> - ok = emqx_ds:open_db(TC, ?DB_CONFIG(Config)), + ok = emqx_ds:open_db(TC, db_config(TC, Config)), Config. end_per_testcase(TC, _Config) -> emqx_ds:drop_db(TC), ok. +db_config(TC, Config) -> + ConfigBase = ?DB_CONFIG(Config), + SpecificConfig = + try + ?MODULE:TC(?FUNCTION_NAME, Config) + catch + error:undef -> #{} + end, + maps:merge(ConfigBase, SpecificConfig). + shard(TC) -> {TC, <<"0">>}. From 5502af18b7ea385010f28badba80603e967d577a Mon Sep 17 00:00:00 2001 From: Andrew Mayorov Date: Wed, 31 Jul 2024 18:44:57 +0200 Subject: [PATCH 24/30] feat(ds): support deletions + precondition-related API in bitfield-lts --- .../src/emqx_ds_storage_bitfield_lts.erl | 67 +++++++++++++------ .../src/emqx_ds_storage_layer.erl | 1 + 2 files changed, 49 insertions(+), 19 deletions(-) diff --git a/apps/emqx_durable_storage/src/emqx_ds_storage_bitfield_lts.erl b/apps/emqx_durable_storage/src/emqx_ds_storage_bitfield_lts.erl index fb831318e..28a4d54c2 100644 --- a/apps/emqx_durable_storage/src/emqx_ds_storage_bitfield_lts.erl +++ b/apps/emqx_durable_storage/src/emqx_ds_storage_bitfield_lts.erl @@ -37,6 +37,7 @@ update_iterator/4, next/6, delete_next/7, + lookup_message/3, handle_event/4 ]). @@ -46,6 +47,7 @@ -export_type([options/0]). +-include("emqx_ds.hrl"). -include("emqx_ds_metrics.hrl"). -include_lib("emqx_utils/include/emqx_message.hrl"). -include_lib("snabbkaffe/include/trace.hrl"). @@ -68,10 +70,13 @@ -define(start_time, 3). -define(storage_key, 4). -define(last_seen_key, 5). --define(cooked_payloads, 6). +-define(cooked_msg_ops, 6). -define(cooked_lts_ops, 7). -define(cooked_ts, 8). +%% atoms: +-define(delete, 100). + -type options() :: #{ bits_per_wildcard_level => pos_integer(), @@ -110,7 +115,7 @@ -type cooked_batch() :: #{ - ?cooked_payloads := [{binary(), binary()}], + ?cooked_msg_ops := [{binary(), binary() | ?delete}], ?cooked_lts_ops := [{binary(), binary()}], ?cooked_ts := integer() }. @@ -271,24 +276,28 @@ drop(_Shard, DBHandle, GenId, CFRefs, #s{trie = Trie, gvars = GVars}) -> -spec prepare_batch( emqx_ds_storage_layer:shard_id(), s(), - [{emqx_ds:time(), emqx_types:message()}, ...], + emqx_ds_storage_layer:batch(), emqx_ds_storage_layer:batch_store_opts() ) -> {ok, cooked_batch()}. -prepare_batch(_ShardId, S, Messages, _Options) -> +prepare_batch(_ShardId, S, Batch, _Options) -> _ = erase(?lts_persist_ops), - {Payloads, MaxTs} = + {Operations, MaxTs} = lists:mapfoldl( - fun({Timestamp, Msg}, Acc) -> - {Key, _} = make_key(S, Timestamp, Msg), - Payload = {Key, message_to_value_v1(Msg)}, - {Payload, max(Acc, Timestamp)} + fun + ({Timestamp, Msg = #message{topic = Topic}}, Acc) -> + {Key, _} = make_key(S, Timestamp, Topic), + Op = {Key, message_to_value_v1(Msg)}, + {Op, max(Acc, Timestamp)}; + ({delete, #message_matcher{topic = Topic, timestamp = Timestamp}}, Acc) -> + {Key, _} = make_key(S, Timestamp, Topic), + {_Op = {Key, ?delete}, Acc} end, 0, - Messages + Batch ), {ok, #{ - ?cooked_payloads => Payloads, + ?cooked_msg_ops => Operations, ?cooked_lts_ops => pop_lts_persist_ops(), ?cooked_ts => MaxTs }}. @@ -302,7 +311,7 @@ prepare_batch(_ShardId, S, Messages, _Options) -> commit_batch( _ShardId, _Data, - #{?cooked_payloads := [], ?cooked_lts_ops := LTS}, + #{?cooked_msg_ops := [], ?cooked_lts_ops := LTS}, _Options ) -> %% Assert: @@ -311,7 +320,7 @@ commit_batch( commit_batch( _ShardId, #s{db = DB, data = DataCF, trie = Trie, trie_cf = TrieCF, gvars = Gvars}, - #{?cooked_lts_ops := LtsOps, ?cooked_payloads := Payloads, ?cooked_ts := MaxTs}, + #{?cooked_lts_ops := LtsOps, ?cooked_msg_ops := Operations, ?cooked_ts := MaxTs}, Options ) -> {ok, Batch} = rocksdb:batch(), @@ -326,10 +335,13 @@ commit_batch( _ = emqx_ds_lts:trie_update(Trie, LtsOps), %% Commit payloads: lists:foreach( - fun({Key, Val}) -> - ok = rocksdb:batch_put(Batch, DataCF, Key, term_to_binary(Val)) + fun + ({Key, Val}) when is_tuple(Val) -> + ok = rocksdb:batch_put(Batch, DataCF, Key, term_to_binary(Val)); + ({Key, ?delete}) -> + ok = rocksdb:batch_delete(Batch, DataCF, Key) end, - Payloads + Operations ), Result = rocksdb:write_batch(DB, Batch, write_batch_opts(Options)), rocksdb:release_batch(Batch), @@ -556,6 +568,23 @@ delete_next_until( rocksdb:iterator_close(ITHandle) end. +-spec lookup_message(emqx_ds_storage_layer:shard_id(), s(), emqx_ds_precondition:matcher()) -> + emqx_types:message() | not_found | emqx_ds:error(_). +lookup_message( + _ShardId, + S = #s{db = DB, data = CF}, + #message_matcher{topic = Topic, timestamp = Timestamp} +) -> + {Key, _} = make_key(S, Timestamp, Topic), + case rocksdb:get(DB, CF, Key, _ReadOpts = []) of + {ok, Blob} -> + deserialize(Blob); + not_found -> + not_found; + Error -> + {error, unrecoverable, {rocksdb, Error}} + end. + handle_event(_ShardId, State = #s{gvars = Gvars}, Time, tick) -> %% If the last message was published more than one epoch ago, and %% the shard remains idle, we need to advance safety cutoff @@ -811,9 +840,9 @@ format_key(KeyMapper, Key) -> Vec = [integer_to_list(I, 16) || I <- emqx_ds_bitmask_keymapper:key_to_vector(KeyMapper, Key)], lists:flatten(io_lib:format("~.16B (~s)", [Key, string:join(Vec, ",")])). --spec make_key(s(), emqx_ds:time(), emqx_types:message()) -> {binary(), [binary()]}. -make_key(#s{keymappers = KeyMappers, trie = Trie}, Timestamp, #message{topic = TopicBin}) -> - Tokens = emqx_topic:words(TopicBin), +-spec make_key(s(), emqx_ds:time(), emqx_types:topic()) -> {binary(), [binary()]}. +make_key(#s{keymappers = KeyMappers, trie = Trie}, Timestamp, Topic) -> + Tokens = emqx_topic:words(Topic), {TopicIndex, Varying} = emqx_ds_lts:topic_key(Trie, fun threshold_fun/1, Tokens), VaryingHashes = [hash_topic_level(I) || I <- Varying], KeyMapper = array:get(length(Varying), KeyMappers), diff --git a/apps/emqx_durable_storage/src/emqx_ds_storage_layer.erl b/apps/emqx_durable_storage/src/emqx_ds_storage_layer.erl index ac2edc254..3afdad01a 100644 --- a/apps/emqx_durable_storage/src/emqx_ds_storage_layer.erl +++ b/apps/emqx_durable_storage/src/emqx_ds_storage_layer.erl @@ -64,6 +64,7 @@ -export_type([ gen_id/0, generation/0, + batch/0, cf_refs/0, stream/0, delete_stream/0, From fc0434afc8d879f9f1508180d6a48a6a95c2faa2 Mon Sep 17 00:00:00 2001 From: Andrew Mayorov Date: Wed, 31 Jul 2024 18:46:12 +0200 Subject: [PATCH 25/30] chore(dslocal): refine few typespecs --- apps/emqx_ds_builtin_local/src/emqx_ds_builtin_local.erl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/apps/emqx_ds_builtin_local/src/emqx_ds_builtin_local.erl b/apps/emqx_ds_builtin_local/src/emqx_ds_builtin_local.erl index e55f4807e..f002c26de 100644 --- a/apps/emqx_ds_builtin_local/src/emqx_ds_builtin_local.erl +++ b/apps/emqx_ds_builtin_local/src/emqx_ds_builtin_local.erl @@ -304,7 +304,7 @@ get_streams(DB, TopicFilter, StartTime) -> -spec make_iterator( emqx_ds:db(), emqx_ds:ds_specific_stream(), emqx_ds:topic_filter(), emqx_ds:time() ) -> - emqx_ds:make_iterator_result(emqx_ds:ds_specific_iterator()). + emqx_ds:make_iterator_result(iterator()). make_iterator(DB, ?stream(Shard, InnerStream), TopicFilter, StartTime) -> ShardId = {DB, Shard}, case @@ -318,7 +318,7 @@ make_iterator(DB, ?stream(Shard, InnerStream), TopicFilter, StartTime) -> Error end. --spec update_iterator(emqx_ds:db(), emqx_ds:ds_specific_iterator(), emqx_ds:message_key()) -> +-spec update_iterator(emqx_ds:db(), iterator(), emqx_ds:message_key()) -> emqx_ds:make_iterator_result(iterator()). update_iterator(DB, Iter0 = #{?tag := ?IT, ?shard := Shard, ?enc := StorageIter0}, Key) -> case emqx_ds_storage_layer:update_iterator({DB, Shard}, StorageIter0, Key) of @@ -396,7 +396,7 @@ do_next(DB, Iter0 = #{?tag := ?IT, ?shard := Shard, ?enc := StorageIter0}, N) -> end. -spec do_delete_next(emqx_ds:db(), delete_iterator(), emqx_ds:delete_selector(), pos_integer()) -> - emqx_ds:delete_next_result(emqx_ds:delete_iterator()). + emqx_ds:delete_next_result(delete_iterator()). do_delete_next( DB, Iter = #{?tag := ?DELETE_IT, ?shard := Shard, ?enc := StorageIter0}, Selector, N ) -> From d8aa39a31071ad1d049ea9af17ec57ac6c7966cb Mon Sep 17 00:00:00 2001 From: Andrew Mayorov Date: Wed, 31 Jul 2024 18:46:54 +0200 Subject: [PATCH 26/30] fix(dsraft): use local application environment --- apps/emqx_ds_builtin_raft/src/emqx_ds_replication_layer.erl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/apps/emqx_ds_builtin_raft/src/emqx_ds_replication_layer.erl b/apps/emqx_ds_builtin_raft/src/emqx_ds_replication_layer.erl index 6acbf94ca..007615674 100644 --- a/apps/emqx_ds_builtin_raft/src/emqx_ds_replication_layer.erl +++ b/apps/emqx_ds_builtin_raft/src/emqx_ds_replication_layer.erl @@ -675,7 +675,7 @@ list_nodes() -> -define(SHARD_RPC(DB, SHARD, NODE, BODY), case emqx_ds_replication_layer_shard:servers( - DB, SHARD, application:get_env(emqx_durable_storage, reads, leader_preferred) + DB, SHARD, application:get_env(emqx_ds_builtin_raft, reads, leader_preferred) ) of [{_, NODE} | _] -> From b0594271b2eb292dd9cf8cf318bbd8c331548efa Mon Sep 17 00:00:00 2001 From: Andrew Mayorov Date: Wed, 31 Jul 2024 19:16:11 +0200 Subject: [PATCH 27/30] chore(dsraft): fix a typespec --- apps/emqx_ds_builtin_raft/src/emqx_ds_replication_layer.erl | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/apps/emqx_ds_builtin_raft/src/emqx_ds_replication_layer.erl b/apps/emqx_ds_builtin_raft/src/emqx_ds_replication_layer.erl index 007615674..8be3417cd 100644 --- a/apps/emqx_ds_builtin_raft/src/emqx_ds_replication_layer.erl +++ b/apps/emqx_ds_builtin_raft/src/emqx_ds_replication_layer.erl @@ -101,7 +101,10 @@ n_shards => pos_integer(), n_sites => pos_integer(), replication_factor => pos_integer(), - replication_options => _TODO :: #{} + replication_options => _TODO :: #{}, + %% Inherited from `emqx_ds:generic_db_opts()`. + force_monotonic_timestamps => boolean(), + atomic_batches => boolean() }. %% This enapsulates the stream entity from the replication level. From 7b85faf12a46137a922f60555377c4e793ba0579 Mon Sep 17 00:00:00 2001 From: Andrew Mayorov Date: Mon, 5 Aug 2024 10:44:33 +0200 Subject: [PATCH 28/30] chore(dsraft): fix few spelling errors Co-Authored-By: Thales Macedo Garitezi --- apps/emqx_ds_builtin_raft/src/emqx_ds_replication_layer.erl | 4 ++-- apps/emqx_durable_storage/src/emqx_ds_precondition.erl | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/apps/emqx_ds_builtin_raft/src/emqx_ds_replication_layer.erl b/apps/emqx_ds_builtin_raft/src/emqx_ds_replication_layer.erl index 8be3417cd..11c809dbd 100644 --- a/apps/emqx_ds_builtin_raft/src/emqx_ds_replication_layer.erl +++ b/apps/emqx_ds_builtin_raft/src/emqx_ds_replication_layer.erl @@ -140,7 +140,7 @@ }. %% Write batch. -%% Instances of this type currently form the mojority of the Raft log. +%% Instances of this type currently form the majority of the Raft log. -type batch() :: #{ ?tag := ?BATCH, ?batch_operations := [emqx_ds:operation()], @@ -281,7 +281,7 @@ store_batch_atomic(DB, Batch, _Opts) -> [] -> ok; [_ | _] -> - {error, unrecoverable, nonatomic_batch_spans_multiple_storages} + {error, unrecoverable, atomic_batch_spans_multiple_shards} end. -spec get_streams(emqx_ds:db(), emqx_ds:topic_filter(), emqx_ds:time()) -> diff --git a/apps/emqx_durable_storage/src/emqx_ds_precondition.erl b/apps/emqx_durable_storage/src/emqx_ds_precondition.erl index 5f86d6c25..3002fcd08 100644 --- a/apps/emqx_durable_storage/src/emqx_ds_precondition.erl +++ b/apps/emqx_durable_storage/src/emqx_ds_precondition.erl @@ -1,5 +1,5 @@ %%-------------------------------------------------------------------- -%% Copyright (c) 20212024 EMQ Technologies Co., Ltd. All Rights Reserved. +%% Copyright (c) 2024 EMQ Technologies Co., Ltd. All Rights Reserved. %% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. From 5dd8fefded702c00e0c544d27fce1e73465bc71a Mon Sep 17 00:00:00 2001 From: Andrew Mayorov Date: Mon, 5 Aug 2024 16:34:17 +0200 Subject: [PATCH 29/30] test(ds): avoid side effects in check phase --- apps/emqx_ds_backends/test/emqx_ds_backends_SUITE.erl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/apps/emqx_ds_backends/test/emqx_ds_backends_SUITE.erl b/apps/emqx_ds_backends/test/emqx_ds_backends_SUITE.erl index e70d2b682..95ebfe99c 100644 --- a/apps/emqx_ds_backends/test/emqx_ds_backends_SUITE.erl +++ b/apps/emqx_ds_backends/test/emqx_ds_backends_SUITE.erl @@ -327,9 +327,8 @@ t_11_batch_preconditions(Config) -> ?assertEqual( ok, emqx_ds:store_batch(DB, Batch1) - ) - end, - fun(_Trace) -> + ), + %% Wait at least until current epoch ends. ct:sleep(1000), %% There's no messages in the DB. @@ -337,7 +336,8 @@ t_11_batch_preconditions(Config) -> [], emqx_ds_test_helpers:consume(DB, emqx_topic:words(<<"t/#">>)) ) - end + end, + [] ). t_12_batch_precondition_conflicts(Config) -> From 9ca3985bbdc23afbba308cf9b2d39e87c131c611 Mon Sep 17 00:00:00 2001 From: Thales Macedo Garitezi Date: Tue, 6 Aug 2024 10:24:21 -0300 Subject: [PATCH 30/30] test: attempt to reduce test flakiness --- .../test/emqx_bridge_kafka_impl_consumer_SUITE.erl | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/apps/emqx_bridge_kafka/test/emqx_bridge_kafka_impl_consumer_SUITE.erl b/apps/emqx_bridge_kafka/test/emqx_bridge_kafka_impl_consumer_SUITE.erl index 74d3a5f54..d7408f0e5 100644 --- a/apps/emqx_bridge_kafka/test/emqx_bridge_kafka_impl_consumer_SUITE.erl +++ b/apps/emqx_bridge_kafka/test/emqx_bridge_kafka_impl_consumer_SUITE.erl @@ -1918,13 +1918,14 @@ t_node_joins_existing_cluster(Config) -> _Attempts2 = 50, [] =/= erpc:call(N2, emqx_router, lookup_routes, [MQTTTopic]) ), + NumMsgs = 50 * NPartitions, {ok, SRef1} = snabbkaffe:subscribe( ?match_event(#{ ?snk_kind := kafka_consumer_handle_message, ?snk_span := {complete, _} }), - NPartitions, + NumMsgs, 20_000 ), lists:foreach( @@ -1933,7 +1934,7 @@ t_node_joins_existing_cluster(Config) -> Val = <<"v", (integer_to_binary(N))/binary>>, publish(Config, KafkaTopic, [#{key => Key, value => Val}]) end, - lists:seq(1, 10 * NPartitions) + lists:seq(1, NumMsgs) ), {ok, _} = snabbkaffe:receive_events(SRef1),