diff --git a/apps/emqx/integration_test/emqx_persistent_session_ds_SUITE.erl b/apps/emqx/integration_test/emqx_persistent_session_ds_SUITE.erl index ae4aab097..920e2528f 100644 --- a/apps/emqx/integration_test/emqx_persistent_session_ds_SUITE.erl +++ b/apps/emqx/integration_test/emqx_persistent_session_ds_SUITE.erl @@ -25,11 +25,16 @@ all() -> emqx_common_test_helpers:all(?MODULE). init_per_suite(Config) -> - TCApps = emqx_cth_suite:start( - app_specs(), - #{work_dir => emqx_cth_suite:work_dir(Config)} - ), - [{tc_apps, TCApps} | Config]. + case emqx_ds_test_helpers:skip_if_norepl() of + false -> + TCApps = emqx_cth_suite:start( + app_specs(), + #{work_dir => emqx_cth_suite:work_dir(Config)} + ), + [{tc_apps, TCApps} | Config]; + Yes -> + Yes + end. end_per_suite(Config) -> TCApps = ?config(tc_apps, Config), diff --git a/apps/emqx/rebar.config b/apps/emqx/rebar.config index 98a2d36fa..df9f69f87 100644 --- a/apps/emqx/rebar.config +++ b/apps/emqx/rebar.config @@ -24,6 +24,7 @@ {deps, [ {emqx_utils, {path, "../emqx_utils"}}, {emqx_durable_storage, {path, "../emqx_durable_storage"}}, + {emqx_ds_backends, {path, "../emqx_ds_backends"}}, {lc, {git, "https://github.com/emqx/lc.git", {tag, "0.3.2"}}}, {gproc, {git, "https://github.com/emqx/gproc", {tag, "0.9.0.1"}}}, {cowboy, {git, "https://github.com/emqx/cowboy", {tag, "2.9.2"}}}, diff --git a/apps/emqx/src/emqx.app.src b/apps/emqx/src/emqx.app.src index 97769fe1f..3d26c63ed 100644 --- a/apps/emqx/src/emqx.app.src +++ b/apps/emqx/src/emqx.app.src @@ -18,7 +18,7 @@ sasl, lc, hocon, - emqx_durable_storage, + emqx_ds_backends, bcrypt, pbkdf2, emqx_http_lib, diff --git a/apps/emqx/src/emqx_ds_schema.erl b/apps/emqx/src/emqx_ds_schema.erl index 5902bcfb7..1cda81d1d 100644 --- a/apps/emqx/src/emqx_ds_schema.erl +++ b/apps/emqx/src/emqx_ds_schema.erl @@ -18,7 +18,7 @@ -module(emqx_ds_schema). %% API: --export([schema/0, translate_builtin/1]). +-export([schema/0, translate_builtin_raft/1, translate_builtin_local/1]). %% Behavior callbacks: -export([fields/1, desc/1, namespace/0]). @@ -32,42 +32,51 @@ %% Type declarations %%================================================================================ +-ifndef(EMQX_RELEASE_EDITION). +-define(EMQX_RELEASE_EDITION, ce). +-endif. + +-if(?EMQX_RELEASE_EDITION == ee). +-define(DEFAULT_BACKEND, builtin_raft). +-define(BUILTIN_BACKENDS, [ref(builtin_raft), ref(builtin_local)]). +-else. +-define(DEFAULT_BACKEND, builtin_local). +-define(BUILTIN_BACKENDS, [ref(builtin_local)]). +-endif. + %%================================================================================ %% API %%================================================================================ -translate_builtin( +translate_builtin_raft( Backend = #{ - backend := builtin, + backend := builtin_raft, n_shards := NShards, n_sites := NSites, replication_factor := ReplFactor, layout := Layout } ) -> - Storage = - case Layout of - #{ - type := wildcard_optimized, - bits_per_topic_level := BitsPerTopicLevel, - epoch_bits := EpochBits, - topic_index_bytes := TIBytes - } -> - {emqx_ds_storage_bitfield_lts, #{ - bits_per_topic_level => BitsPerTopicLevel, - topic_index_bytes => TIBytes, - epoch_bits => EpochBits - }}; - #{type := reference} -> - {emqx_ds_storage_reference, #{}} - end, #{ - backend => builtin, + backend => builtin_raft, n_shards => NShards, n_sites => NSites, replication_factor => ReplFactor, replication_options => maps:get(replication_options, Backend, #{}), - storage => Storage + storage => translate_layout(Layout) + }. + +translate_builtin_local( + #{ + backend := builtin_local, + n_shards := NShards, + layout := Layout + } +) -> + #{ + backend => builtin_local, + n_shards => NShards, + storage => translate_layout(Layout) }. %%================================================================================ @@ -83,24 +92,24 @@ schema() -> ds_schema(#{ default => #{ - <<"backend">> => builtin + <<"backend">> => ?DEFAULT_BACKEND }, importance => ?IMPORTANCE_MEDIUM, desc => ?DESC(messages) })} ]. -fields(builtin) -> - %% Schema for the builtin backend: +fields(builtin_local) -> + %% Schema for the builtin_raft backend: [ {backend, sc( - builtin, + builtin_local, #{ 'readOnly' => true, - default => builtin, + default => builtin_local, importance => ?IMPORTANCE_MEDIUM, - desc => ?DESC(builtin_backend) + desc => ?DESC(backend_type) } )}, {'_config_handler', @@ -108,27 +117,32 @@ fields(builtin) -> {module(), atom()}, #{ 'readOnly' => true, - default => {?MODULE, translate_builtin}, + default => {?MODULE, translate_builtin_local}, importance => ?IMPORTANCE_HIDDEN } - )}, - {data_dir, + )} + | common_builtin_fields() + ]; +fields(builtin_raft) -> + %% Schema for the builtin_raft backend: + [ + {backend, sc( - string(), + builtin_raft, #{ - mapping => "emqx_durable_storage.db_data_dir", - required => false, + 'readOnly' => true, + default => builtin_raft, importance => ?IMPORTANCE_MEDIUM, - desc => ?DESC(builtin_data_dir) + desc => ?DESC(backend_type) } )}, - {n_shards, + {'_config_handler', sc( - pos_integer(), + {module(), atom()}, #{ - default => 12, - importance => ?IMPORTANCE_MEDIUM, - desc => ?DESC(builtin_n_shards) + 'readOnly' => true, + default => {?MODULE, translate_builtin_raft}, + importance => ?IMPORTANCE_HIDDEN } )}, %% TODO: Deprecate once cluster management and rebalancing is implemented. @@ -157,29 +171,10 @@ fields(builtin) -> default => #{}, importance => ?IMPORTANCE_HIDDEN } - )}, - {local_write_buffer, - sc( - ref(builtin_local_write_buffer), - #{ - importance => ?IMPORTANCE_HIDDEN, - desc => ?DESC(builtin_local_write_buffer) - } - )}, - {layout, - sc( - hoconsc:union(builtin_layouts()), - #{ - desc => ?DESC(builtin_layout), - importance => ?IMPORTANCE_MEDIUM, - default => - #{ - <<"type">> => wildcard_optimized - } - } )} + | common_builtin_fields() ]; -fields(builtin_local_write_buffer) -> +fields(builtin_write_buffer) -> [ {max_items, sc( @@ -188,7 +183,7 @@ fields(builtin_local_write_buffer) -> default => 1000, mapping => "emqx_durable_storage.egress_batch_size", importance => ?IMPORTANCE_HIDDEN, - desc => ?DESC(builtin_local_write_buffer_max_items) + desc => ?DESC(builtin_write_buffer_max_items) } )}, {flush_interval, @@ -198,7 +193,7 @@ fields(builtin_local_write_buffer) -> default => 100, mapping => "emqx_durable_storage.egress_flush_interval", importance => ?IMPORTANCE_HIDDEN, - desc => ?DESC(builtin_local_write_buffer_flush_interval) + desc => ?DESC(builtin_write_buffer_flush_interval) } )} ]; @@ -252,10 +247,55 @@ fields(layout_builtin_reference) -> )} ]. -desc(builtin) -> - ?DESC(builtin); -desc(builtin_local_write_buffer) -> - ?DESC(builtin_local_write_buffer); +common_builtin_fields() -> + [ + {data_dir, + sc( + string(), + #{ + mapping => "emqx_durable_storage.db_data_dir", + required => false, + importance => ?IMPORTANCE_MEDIUM, + desc => ?DESC(builtin_data_dir) + } + )}, + {n_shards, + sc( + pos_integer(), + #{ + default => 16, + importance => ?IMPORTANCE_MEDIUM, + desc => ?DESC(builtin_n_shards) + } + )}, + {local_write_buffer, + sc( + ref(builtin_write_buffer), + #{ + importance => ?IMPORTANCE_HIDDEN, + desc => ?DESC(builtin_write_buffer) + } + )}, + {layout, + sc( + hoconsc:union(builtin_layouts()), + #{ + desc => ?DESC(builtin_layout), + importance => ?IMPORTANCE_MEDIUM, + default => + #{ + <<"type">> => wildcard_optimized + } + } + )} + ]. + +desc(builtin_raft) -> + ?DESC(builtin_raft); +desc(builtin_local) -> + ?DESC(builtin_local); +desc(builtin_write_buffer) -> + ?DESC(builtin_write_buffer); desc(layout_builtin_wildcard_optimized) -> ?DESC(layout_builtin_wildcard_optimized); desc(layout_builtin_reference) -> @@ -267,12 +307,27 @@ desc(_) -> %% Internal functions %%================================================================================ +translate_layout( + #{ + type := wildcard_optimized, + bits_per_topic_level := BitsPerTopicLevel, + epoch_bits := EpochBits, + topic_index_bytes := TIBytes + } +) -> + {emqx_ds_storage_bitfield_lts, #{ + bits_per_topic_level => BitsPerTopicLevel, + topic_index_bytes => TIBytes, + epoch_bits => EpochBits + }}; +translate_layout(#{type := reference}) -> + {emqx_ds_storage_reference, #{}}. + ds_schema(Options) -> sc( - hoconsc:union([ - ref(builtin) - | emqx_schema_hooks:injection_point('durable_storage.backends', []) - ]), + hoconsc:union( + ?BUILTIN_BACKENDS ++ emqx_schema_hooks:injection_point('durable_storage.backends', []) + ), Options ). diff --git a/apps/emqx/test/emqx_persistent_messages_SUITE.erl b/apps/emqx/test/emqx_persistent_messages_SUITE.erl index 0b54c2c55..f225ba43d 100644 --- a/apps/emqx/test/emqx_persistent_messages_SUITE.erl +++ b/apps/emqx/test/emqx_persistent_messages_SUITE.erl @@ -32,9 +32,23 @@ all() -> emqx_common_test_helpers:all(?MODULE). +%% Needed for standalone mode: +-ifndef(EMQX_RELEASE_EDITION). +-define(EMQX_RELEASE_EDITION, ce). +-endif. + +-if(?EMQX_RELEASE_EDITION == ee). + init_per_suite(Config) -> Config. +-else. + +init_per_suite(Config) -> + {skip, no_replication}. + +-endif. + end_per_suite(_Config) -> ok. @@ -465,7 +479,7 @@ t_metrics_not_dropped(_Config) -> t_replication_options(_Config) -> ?assertMatch( #{ - backend := builtin, + backend := builtin_raft, replication_options := #{ wal_max_size_bytes := 16000000, wal_max_batch_size := 1024, @@ -570,7 +584,7 @@ wait_shards_online(Nodes = [Node | _]) -> ?retry(500, 10, [?assertEqual(NShards, shards_online(N)) || N <- Nodes]). shards_online(Node) -> - length(erpc:call(Node, emqx_ds_builtin_db_sup, which_shards, [?PERSISTENT_MESSAGE_DB])). + length(erpc:call(Node, emqx_ds_builtin_raft_db_sup, which_shards, [?PERSISTENT_MESSAGE_DB])). get_mqtt_port(Node, Type) -> {_IP, Port} = erpc:call(Node, emqx_config, get, [[listeners, Type, default, bind]]), diff --git a/apps/emqx/test/emqx_persistent_session_SUITE.erl b/apps/emqx/test/emqx_persistent_session_SUITE.erl index e9d09b980..54a8e7d51 100644 --- a/apps/emqx/test/emqx_persistent_session_SUITE.erl +++ b/apps/emqx/test/emqx_persistent_session_SUITE.erl @@ -81,7 +81,8 @@ init_per_group(persistence_enabled, Config) -> " heartbeat_interval = 100ms\n" " renew_streams_interval = 100ms\n" " session_gc_interval = 2s\n" - "}"}, + "}\n" + "durable_storage.messages.backend = builtin_local"}, {persistence, ds} | Config ]; diff --git a/apps/emqx_conf/src/emqx_conf_schema.erl b/apps/emqx_conf/src/emqx_conf_schema.erl index 5c5dd0d50..8481a8f79 100644 --- a/apps/emqx_conf/src/emqx_conf_schema.erl +++ b/apps/emqx_conf/src/emqx_conf_schema.erl @@ -1457,6 +1457,8 @@ cluster_options(k8s, Conf) -> {suffix, conf_get("cluster.k8s.suffix", Conf, "")} ]; cluster_options(manual, _Conf) -> + []; +cluster_options(singleton, _Conf) -> []. to_atom(Atom) when is_atom(Atom) -> diff --git a/apps/emqx_dashboard/test/emqx_dashboard_monitor_SUITE.erl b/apps/emqx_dashboard/test/emqx_dashboard_monitor_SUITE.erl index c82367faf..8f18aa685 100644 --- a/apps/emqx_dashboard/test/emqx_dashboard_monitor_SUITE.erl +++ b/apps/emqx_dashboard/test/emqx_dashboard_monitor_SUITE.erl @@ -82,37 +82,42 @@ end_per_suite(_Config) -> ok. init_per_group(persistent_sessions = Group, Config) -> - AppSpecsFn = fun(Enable) -> - Port = - case Enable of - true -> "18083"; - false -> "0" + case emqx_ds_test_helpers:skip_if_norepl() of + false -> + AppSpecsFn = fun(Enable) -> + Port = + case Enable of + true -> "18083"; + false -> "0" + end, + [ + emqx_conf, + {emqx, "durable_sessions {enable = true}"}, + {emqx_retainer, ?BASE_RETAINER_CONF}, + emqx_management, + emqx_mgmt_api_test_util:emqx_dashboard( + lists:concat([ + "dashboard.listeners.http { bind = " ++ Port ++ " }\n", + "dashboard.sample_interval = 1s\n", + "dashboard.listeners.http.enable = " ++ atom_to_list(Enable) + ]) + ) + ] end, - [ - emqx_conf, - {emqx, "durable_sessions {enable = true}"}, - {emqx_retainer, ?BASE_RETAINER_CONF}, - emqx_management, - emqx_mgmt_api_test_util:emqx_dashboard( - lists:concat([ - "dashboard.listeners.http { bind = " ++ Port ++ " }\n", - "dashboard.sample_interval = 1s\n", - "dashboard.listeners.http.enable = " ++ atom_to_list(Enable) - ]) - ) - ] - end, - NodeSpecs = [ - {dashboard_monitor1, #{apps => AppSpecsFn(true)}}, - {dashboard_monitor2, #{apps => AppSpecsFn(false)}} - ], - Nodes = - [N1 | _] = emqx_cth_cluster:start( - NodeSpecs, - #{work_dir => emqx_cth_suite:work_dir(Group, Config)} - ), - ?ON(N1, {ok, _} = emqx_common_test_http:create_default_app()), - [{cluster, Nodes} | Config]; + NodeSpecs = [ + {dashboard_monitor1, #{apps => AppSpecsFn(true)}}, + {dashboard_monitor2, #{apps => AppSpecsFn(false)}} + ], + Nodes = + [N1 | _] = emqx_cth_cluster:start( + NodeSpecs, + #{work_dir => emqx_cth_suite:work_dir(Group, Config)} + ), + ?ON(N1, {ok, _} = emqx_common_test_http:create_default_app()), + [{cluster, Nodes} | Config]; + Yes -> + Yes + end; init_per_group(common = Group, Config) -> Apps = emqx_cth_suite:start( [ diff --git a/apps/emqx_ds_backends/README.md b/apps/emqx_ds_backends/README.md new file mode 100644 index 000000000..02986e0e1 --- /dev/null +++ b/apps/emqx_ds_backends/README.md @@ -0,0 +1,32 @@ +# EMQX Durable Storage Backends + +This is a placeholder OTP application that depends on all durable storage backends available in the release. +Starting it will ensure that all backends are properly loaded and registered. + +Consumers of `emqx_durable_storage` API should depend on this application instead of the parent `emqx_durable_storage`. + +# Features + +N/A + +# Limitation + +N/A + +# Documentation links + +N/A + +# Usage + +Any business application that creates DS databases should add this application as a dependency. + +# Configurations + +None + +# Other +N/A + +# Contributing +Please see our [contributing.md](../../CONTRIBUTING.md). diff --git a/apps/emqx_ds_backends/rebar.config.script b/apps/emqx_ds_backends/rebar.config.script new file mode 100644 index 000000000..6caf605f6 --- /dev/null +++ b/apps/emqx_ds_backends/rebar.config.script @@ -0,0 +1,26 @@ +%% -*- mode:erlang -*- +Profile = case os:getenv("PROFILE") of + "emqx-enterprise" ++ _ -> + ee; + false -> + io:format(user, "WARN: environment variable PROFILE is not set, using 'emqx-enterprise'~n", []), + ee; + _ -> + ce + end, +CEDeps = + [ + {emqx_utils, {path, "../emqx_utils"}}, + {emqx_durable_storage, {path, "../emqx_durable_storage"}}, + {emqx_ds_builtin_local, {path, "../emqx_ds_builtin_local"}} + ], +EEDeps = + [ + {emqx_ds_builtin_raft, {path, "../emqx_ds_builtin_raft"}} + ], +case Profile of + ee -> + [{deps, CEDeps ++ EEDeps}]; + ce -> + [{deps, CEDeps}] +end. diff --git a/apps/emqx_ds_backends/src/emqx_ds_backends.app.src.script b/apps/emqx_ds_backends/src/emqx_ds_backends.app.src.script new file mode 100644 index 000000000..b3950edbc --- /dev/null +++ b/apps/emqx_ds_backends/src/emqx_ds_backends.app.src.script @@ -0,0 +1,26 @@ +%% -*- mode: erlang -*- +Profile = case os:getenv("PROFILE") of + "emqx-enterprise" ++ _ -> + ee; + false -> + io:format(user, "WARN: environment variable PROFILE is not set, using 'emqx-enterprise'~n", []), + ee; + _ -> + ce + end, + +{application, emqx_ds_backends, [ + {description, "A placeholder application that depends on all available DS backends"}, + % strict semver, bump manually! + {vsn, "0.1.0"}, + {modules, []}, + {registered, []}, + {applications, [kernel, stdlib, emqx_durable_storage, emqx_ds_builtin_local | + case Profile of + ee -> + [emqx_ds_builtin_raft]; + ce -> + [] + end]}, + {env, []} +]}. diff --git a/apps/emqx_durable_storage/test/emqx_ds_SUITE.erl b/apps/emqx_ds_backends/test/emqx_ds_backends_SUITE.erl similarity index 58% rename from apps/emqx_durable_storage/test/emqx_ds_SUITE.erl rename to apps/emqx_ds_backends/test/emqx_ds_backends_SUITE.erl index eb14456cb..ab1e0feb0 100644 --- a/apps/emqx_durable_storage/test/emqx_ds_SUITE.erl +++ b/apps/emqx_ds_backends/test/emqx_ds_backends_SUITE.erl @@ -13,7 +13,7 @@ %% See the License for the specific language governing permissions and %% limitations under the License. %%-------------------------------------------------------------------- --module(emqx_ds_SUITE). +-module(emqx_ds_backends_SUITE). -compile(export_all). -compile(nowarn_export_all). @@ -26,52 +26,27 @@ -define(N_SHARDS, 1). -opts() -> - #{ - backend => builtin, - storage => {emqx_ds_storage_reference, #{}}, - n_shards => ?N_SHARDS, - n_sites => 1, - replication_factor => 3, - replication_options => #{} - }. +opts(Config) -> + proplists:get_value(ds_conf, Config). %% A simple smoke test that verifies that opening/closing the DB %% doesn't crash, and not much else -t_00_smoke_open_drop(_Config) -> +t_00_smoke_open_drop(Config) -> DB = 'DB', - ?assertMatch(ok, emqx_ds:open_db(DB, opts())), - %% Check metadata: - %% We have only one site: - [Site] = emqx_ds_replication_layer_meta:sites(), - %% Check all shards: - Shards = emqx_ds_replication_layer_meta:shards(DB), - %% Since there is only one site all shards should be allocated - %% to this site: - MyShards = emqx_ds_replication_layer_meta:my_shards(DB), - ?assertEqual(?N_SHARDS, length(Shards)), - lists:foreach( - fun(Shard) -> - ?assertEqual( - [Site], emqx_ds_replication_layer_meta:replica_set(DB, Shard) - ) - end, - Shards - ), - ?assertEqual(lists:sort(Shards), lists:sort(MyShards)), + ?assertMatch(ok, emqx_ds:open_db(DB, opts(Config))), %% Reopen the DB and make sure the operation is idempotent: - ?assertMatch(ok, emqx_ds:open_db(DB, opts())), + ?assertMatch(ok, emqx_ds:open_db(DB, opts(Config))), %% Close the DB: ?assertMatch(ok, emqx_ds:drop_db(DB)). %% A simple smoke test that verifies that storing the messages doesn't %% crash -t_01_smoke_store(_Config) -> +t_01_smoke_store(Config) -> ?check_trace( #{timetrap => 10_000}, begin DB = default, - ?assertMatch(ok, emqx_ds:open_db(DB, opts())), + ?assertMatch(ok, emqx_ds:open_db(DB, opts(Config))), Msg = message(<<"foo/bar">>, <<"foo">>, 0), ?assertMatch(ok, emqx_ds:store_batch(DB, [Msg])) end, @@ -80,9 +55,9 @@ t_01_smoke_store(_Config) -> %% A simple smoke test that verifies that getting the list of streams %% doesn't crash and that iterators can be opened. -t_02_smoke_get_streams_start_iter(_Config) -> +t_02_smoke_get_streams_start_iter(Config) -> DB = ?FUNCTION_NAME, - ?assertMatch(ok, emqx_ds:open_db(DB, opts())), + ?assertMatch(ok, emqx_ds:open_db(DB, opts(Config))), StartTime = 0, TopicFilter = ['#'], [{Rank, Stream}] = emqx_ds:get_streams(DB, TopicFilter, StartTime), @@ -91,9 +66,9 @@ t_02_smoke_get_streams_start_iter(_Config) -> %% A simple smoke test that verifies that it's possible to iterate %% over messages. -t_03_smoke_iterate(_Config) -> +t_03_smoke_iterate(Config) -> DB = ?FUNCTION_NAME, - ?assertMatch(ok, emqx_ds:open_db(DB, opts())), + ?assertMatch(ok, emqx_ds:open_db(DB, opts(Config))), StartTime = 0, TopicFilter = ['#'], Msgs = [ @@ -101,7 +76,7 @@ t_03_smoke_iterate(_Config) -> message(<<"foo">>, <<"2">>, 1), message(<<"bar/bar">>, <<"3">>, 2) ], - ?assertMatch(ok, emqx_ds:store_batch(DB, Msgs)), + ?assertMatch(ok, emqx_ds:store_batch(DB, Msgs, #{sync => true})), [{_, Stream}] = emqx_ds:get_streams(DB, TopicFilter, StartTime), {ok, Iter0} = emqx_ds:make_iterator(DB, Stream, TopicFilter, StartTime), {ok, Iter, Batch} = emqx_ds_test_helpers:consume_iter(DB, Iter0), @@ -112,9 +87,9 @@ t_03_smoke_iterate(_Config) -> %% to the external resources, such as clients' sessions, and they %% should always be able to continue replaying the topics from where %% they are left off. -t_04_restart(_Config) -> +t_04_restart(Config) -> DB = ?FUNCTION_NAME, - ?assertMatch(ok, emqx_ds:open_db(DB, opts())), + ?assertMatch(ok, emqx_ds:open_db(DB, opts(Config))), TopicFilter = ['#'], StartTime = 0, Msgs = [ @@ -122,22 +97,22 @@ t_04_restart(_Config) -> message(<<"foo">>, <<"2">>, 1), message(<<"bar/bar">>, <<"3">>, 2) ], - ?assertMatch(ok, emqx_ds:store_batch(DB, Msgs)), + ?assertMatch(ok, emqx_ds:store_batch(DB, Msgs, #{sync => true})), [{_, Stream}] = emqx_ds:get_streams(DB, TopicFilter, StartTime), {ok, Iter0} = emqx_ds:make_iterator(DB, Stream, TopicFilter, StartTime), %% Restart the application: ?tp(warning, emqx_ds_SUITE_restart_app, #{}), ok = application:stop(emqx_durable_storage), {ok, _} = application:ensure_all_started(emqx_durable_storage), - ok = emqx_ds:open_db(DB, opts()), + ok = emqx_ds:open_db(DB, opts(Config)), %% The old iterator should be still operational: {ok, Iter, Batch} = emqx_ds_test_helpers:consume_iter(DB, Iter0), ?assertEqual(Msgs, Batch, {Iter0, Iter}). %% Check that we can create iterators directly from DS keys. -t_05_update_iterator(_Config) -> +t_05_update_iterator(Config) -> DB = ?FUNCTION_NAME, - ?assertMatch(ok, emqx_ds:open_db(DB, opts())), + ?assertMatch(ok, emqx_ds:open_db(DB, opts(Config))), TopicFilter = ['#'], StartTime = 0, Msgs = [ @@ -158,104 +133,49 @@ t_05_update_iterator(_Config) -> ?assertEqual(Msgs, [Msg0 | Batch], #{from_key => Iter1, final_iter => Iter}), ok. -t_06_update_config(_Config) -> +t_06_smoke_add_generation(Config) -> DB = ?FUNCTION_NAME, - ?assertMatch(ok, emqx_ds:open_db(DB, opts())), - TopicFilter = ['#'], + BeginTime = os:system_time(millisecond), - DataSet = update_data_set(), + ?assertMatch(ok, emqx_ds:open_db(DB, opts(Config))), + [{Gen1, #{created_at := Created1, since := Since1, until := undefined}}] = maps:to_list( + emqx_ds:list_generations_with_lifetimes(DB) + ), - ToMsgs = fun(Datas) -> - lists:map( - fun({Topic, Payload}) -> - message(Topic, Payload, emqx_message:timestamp_now()) - end, - Datas - ) - end, + ?assertMatch(ok, emqx_ds:add_generation(DB)), + [ + {Gen1, #{created_at := Created1, since := Since1, until := Until1}}, + {Gen2, #{created_at := Created2, since := Since2, until := undefined}} + ] = maps:to_list(emqx_ds:list_generations_with_lifetimes(DB)), + %% Check units of the return values (+/- 10s from test begin time): + ?give_or_take(BeginTime, 10_000, Created1), + ?give_or_take(BeginTime, 10_000, Created2), + ?give_or_take(BeginTime, 10_000, Since2), + ?give_or_take(BeginTime, 10_000, Until1). - {_, StartTimes, MsgsList} = - lists:foldl( - fun - (Datas, {true, TimeAcc, MsgAcc}) -> - Msgs = ToMsgs(Datas), - ?assertMatch(ok, emqx_ds:store_batch(DB, Msgs)), - {false, TimeAcc, [Msgs | MsgAcc]}; - (Datas, {Any, TimeAcc, MsgAcc}) -> - timer:sleep(500), - ?assertMatch(ok, emqx_ds:update_db_config(DB, opts())), - timer:sleep(500), - StartTime = emqx_message:timestamp_now(), - Msgs = ToMsgs(Datas), - ?assertMatch(ok, emqx_ds:store_batch(DB, Msgs)), - {Any, [StartTime | TimeAcc], [Msgs | MsgAcc]} - end, - {true, [emqx_message:timestamp_now()], []}, - DataSet - ), - - Checker = fun({StartTime, Msgs0}, Acc) -> - Msgs = Acc ++ Msgs0, - Batch = emqx_ds_test_helpers:consume(DB, TopicFilter, StartTime), - ?assertEqual(Msgs, Batch, StartTime), - Msgs - end, - lists:foldl(Checker, [], lists:zip(StartTimes, MsgsList)). - -t_07_add_generation(_Config) -> +t_07_smoke_update_config(Config) -> DB = ?FUNCTION_NAME, - ?assertMatch(ok, emqx_ds:open_db(DB, opts())), - TopicFilter = ['#'], - - DataSet = update_data_set(), - - ToMsgs = fun(Datas) -> - lists:map( - fun({Topic, Payload}) -> - message(Topic, Payload, emqx_message:timestamp_now()) - end, - Datas - ) - end, - - {_, StartTimes, MsgsList} = - lists:foldl( - fun - (Datas, {true, TimeAcc, MsgAcc}) -> - Msgs = ToMsgs(Datas), - ?assertMatch(ok, emqx_ds:store_batch(DB, Msgs)), - {false, TimeAcc, [Msgs | MsgAcc]}; - (Datas, {Any, TimeAcc, MsgAcc}) -> - timer:sleep(500), - ?assertMatch(ok, emqx_ds:add_generation(DB)), - timer:sleep(500), - StartTime = emqx_message:timestamp_now(), - Msgs = ToMsgs(Datas), - ?assertMatch(ok, emqx_ds:store_batch(DB, Msgs)), - {Any, [StartTime | TimeAcc], [Msgs | MsgAcc]} - end, - {true, [emqx_message:timestamp_now()], []}, - DataSet - ), - - Checker = fun({StartTime, Msgs0}, Acc) -> - Msgs = Acc ++ Msgs0, - Batch = emqx_ds_test_helpers:consume(DB, TopicFilter, StartTime), - ?assertEqual(Msgs, Batch, StartTime), - Msgs - end, - lists:foldl(Checker, [], lists:zip(StartTimes, MsgsList)). + ?assertMatch(ok, emqx_ds:open_db(DB, opts(Config))), + ?assertMatch( + [{_, _}], + maps:to_list(emqx_ds:list_generations_with_lifetimes(DB)) + ), + ?assertMatch(ok, emqx_ds:update_db_config(DB, opts(Config))), + ?assertMatch( + [{_, _}, {_, _}], + maps:to_list(emqx_ds:list_generations_with_lifetimes(DB)) + ). %% Verifies the basic usage of `list_generations_with_lifetimes' and `drop_generation'... %% 1) Cannot drop current generation. %% 2) All existing generations are returned by `list_generation_with_lifetimes'. %% 3) Dropping a generation removes it from the list. %% 4) Dropped generations stay dropped even after restarting the application. -t_08_smoke_list_drop_generation(_Config) -> +t_08_smoke_list_drop_generation(Config) -> DB = ?FUNCTION_NAME, ?check_trace( begin - ?assertMatch(ok, emqx_ds:open_db(DB, opts())), + ?assertMatch(ok, emqx_ds:open_db(DB, opts(Config))), %% Exactly one generation at first. Generations0 = emqx_ds:list_generations_with_lifetimes(DB), ?assertMatch( @@ -295,7 +215,7 @@ t_08_smoke_list_drop_generation(_Config) -> %% Should persist surviving generation list ok = application:stop(emqx_durable_storage), {ok, _} = application:ensure_all_started(emqx_durable_storage), - ok = emqx_ds:open_db(DB, opts()), + ok = emqx_ds:open_db(DB, opts(Config)), Generations3 = emqx_ds:list_generations_with_lifetimes(DB), ?assertMatch( @@ -310,12 +230,12 @@ t_08_smoke_list_drop_generation(_Config) -> ), ok. -t_09_atomic_store_batch(_Config) -> +t_09_atomic_store_batch(Config) -> DB = ?FUNCTION_NAME, ?check_trace( begin application:set_env(emqx_durable_storage, egress_batch_size, 1), - ?assertMatch(ok, emqx_ds:open_db(DB, opts())), + ?assertMatch(ok, emqx_ds:open_db(DB, opts(Config))), Msgs = [ message(<<"1">>, <<"1">>, 0), message(<<"2">>, <<"2">>, 1), @@ -328,19 +248,19 @@ t_09_atomic_store_batch(_Config) -> sync => true }) ), - {ok, Flush} = ?block_until(#{?snk_kind := emqx_ds_replication_layer_egress_flush}), + {ok, Flush} = ?block_until(#{?snk_kind := emqx_ds_buffer_flush}), ?assertMatch(#{batch := [_, _, _]}, Flush) end, [] ), ok. -t_10_non_atomic_store_batch(_Config) -> +t_10_non_atomic_store_batch(Config) -> DB = ?FUNCTION_NAME, ?check_trace( begin application:set_env(emqx_durable_storage, egress_batch_size, 1), - ?assertMatch(ok, emqx_ds:open_db(DB, opts())), + ?assertMatch(ok, emqx_ds:open_db(DB, opts(Config))), Msgs = [ message(<<"1">>, <<"1">>, 0), message(<<"2">>, <<"2">>, 1), @@ -358,7 +278,7 @@ t_10_non_atomic_store_batch(_Config) -> end, fun(Trace) -> %% Should contain one flush per message. - Batches = ?projection(batch, ?of_kind(emqx_ds_replication_layer_egress_flush, Trace)), + Batches = ?projection(batch, ?of_kind(emqx_ds_buffer_flush, Trace)), ?assertMatch([_], Batches), ?assertMatch( [_, _, _], @@ -369,11 +289,11 @@ t_10_non_atomic_store_batch(_Config) -> ), ok. -t_smoke_delete_next(_Config) -> +t_smoke_delete_next(Config) -> DB = ?FUNCTION_NAME, ?check_trace( begin - ?assertMatch(ok, emqx_ds:open_db(DB, opts())), + ?assertMatch(ok, emqx_ds:open_db(DB, opts(Config))), StartTime = 0, TopicFilter = [<<"foo">>, '#'], Msgs = @@ -410,7 +330,7 @@ t_smoke_delete_next(_Config) -> ), ok. -t_drop_generation_with_never_used_iterator(_Config) -> +t_drop_generation_with_never_used_iterator(Config) -> %% This test checks how the iterator behaves when: %% 1) it's created at generation 1 and not consumed from. %% 2) generation 2 is created and 1 dropped. @@ -418,7 +338,7 @@ t_drop_generation_with_never_used_iterator(_Config) -> %% In this case, the iterator won't see any messages and the stream will end. DB = ?FUNCTION_NAME, - ?assertMatch(ok, emqx_ds:open_db(DB, opts())), + ?assertMatch(ok, emqx_ds:open_db(DB, opts(Config))), [GenId0] = maps:keys(emqx_ds:list_generations_with_lifetimes(DB)), TopicFilter = emqx_topic:words(<<"foo/+">>), @@ -458,7 +378,7 @@ t_drop_generation_with_never_used_iterator(_Config) -> ok. -t_drop_generation_with_used_once_iterator(_Config) -> +t_drop_generation_with_used_once_iterator(Config) -> %% This test checks how the iterator behaves when: %% 1) it's created at generation 1 and consumes at least 1 message. %% 2) generation 2 is created and 1 dropped. @@ -466,7 +386,7 @@ t_drop_generation_with_used_once_iterator(_Config) -> %% In this case, the iterator should see no more messages and the stream will end. DB = ?FUNCTION_NAME, - ?assertMatch(ok, emqx_ds:open_db(DB, opts())), + ?assertMatch(ok, emqx_ds:open_db(DB, opts(Config))), [GenId0] = maps:keys(emqx_ds:list_generations_with_lifetimes(DB)), TopicFilter = emqx_topic:words(<<"foo/+">>), @@ -499,12 +419,12 @@ t_drop_generation_with_used_once_iterator(_Config) -> emqx_ds_test_helpers:consume_iter(DB, Iter1) ). -t_drop_generation_update_iterator(_Config) -> +t_drop_generation_update_iterator(Config) -> %% This checks the behavior of `emqx_ds:update_iterator' after the generation %% underlying the iterator has been dropped. DB = ?FUNCTION_NAME, - ?assertMatch(ok, emqx_ds:open_db(DB, opts())), + ?assertMatch(ok, emqx_ds:open_db(DB, opts(Config))), [GenId0] = maps:keys(emqx_ds:list_generations_with_lifetimes(DB)), TopicFilter = emqx_topic:words(<<"foo/+">>), @@ -528,12 +448,12 @@ t_drop_generation_update_iterator(_Config) -> emqx_ds:update_iterator(DB, Iter1, Key2) ). -t_make_iterator_stale_stream(_Config) -> +t_make_iterator_stale_stream(Config) -> %% This checks the behavior of `emqx_ds:make_iterator' after the generation underlying %% the stream has been dropped. DB = ?FUNCTION_NAME, - ?assertMatch(ok, emqx_ds:open_db(DB, opts())), + ?assertMatch(ok, emqx_ds:open_db(DB, opts(Config))), [GenId0] = maps:keys(emqx_ds:list_generations_with_lifetimes(DB)), TopicFilter = emqx_topic:words(<<"foo/+">>), @@ -556,7 +476,7 @@ t_make_iterator_stale_stream(_Config) -> ok. -t_get_streams_concurrently_with_drop_generation(_Config) -> +t_get_streams_concurrently_with_drop_generation(Config) -> %% This checks that we can get all streams while a generation is dropped %% mid-iteration. @@ -564,7 +484,7 @@ t_get_streams_concurrently_with_drop_generation(_Config) -> ?check_trace( #{timetrap => 5_000}, begin - ?assertMatch(ok, emqx_ds:open_db(DB, opts())), + ?assertMatch(ok, emqx_ds:open_db(DB, opts(Config))), [GenId0] = maps:keys(emqx_ds:list_generations_with_lifetimes(DB)), ok = emqx_ds:add_generation(DB), @@ -593,171 +513,6 @@ t_get_streams_concurrently_with_drop_generation(_Config) -> [] ). -t_error_mapping_replication_layer(_Config) -> - %% This checks that the replication layer maps recoverable errors correctly. - - ok = emqx_ds_test_helpers:mock_rpc(), - ok = snabbkaffe:start_trace(), - - DB = ?FUNCTION_NAME, - ?assertMatch(ok, emqx_ds:open_db(DB, (opts())#{n_shards => 2})), - [Shard1, Shard2] = emqx_ds_replication_layer_meta:shards(DB), - - TopicFilter = emqx_topic:words(<<"foo/#">>), - Msgs = [ - message(<<"C1">>, <<"foo/bar">>, <<"1">>, 0), - message(<<"C1">>, <<"foo/baz">>, <<"2">>, 1), - message(<<"C2">>, <<"foo/foo">>, <<"3">>, 2), - message(<<"C3">>, <<"foo/xyz">>, <<"4">>, 3), - message(<<"C4">>, <<"foo/bar">>, <<"5">>, 4), - message(<<"C5">>, <<"foo/oof">>, <<"6">>, 5) - ], - - ?assertMatch(ok, emqx_ds:store_batch(DB, Msgs)), - - ?block_until(#{?snk_kind := emqx_ds_replication_layer_egress_flush, shard := Shard1}), - ?block_until(#{?snk_kind := emqx_ds_replication_layer_egress_flush, shard := Shard2}), - - Streams0 = emqx_ds:get_streams(DB, TopicFilter, 0), - Iterators0 = lists:map( - fun({_Rank, S}) -> - {ok, Iter} = emqx_ds:make_iterator(DB, S, TopicFilter, 0), - Iter - end, - Streams0 - ), - - %% Disrupt the link to the second shard. - ok = emqx_ds_test_helpers:mock_rpc_result( - fun(_Node, emqx_ds_replication_layer, _Function, Args) -> - case Args of - [DB, Shard1 | _] -> passthrough; - [DB, Shard2 | _] -> unavailable - end - end - ), - - %% Result of `emqx_ds:get_streams/3` will just contain partial results, not an error. - Streams1 = emqx_ds:get_streams(DB, TopicFilter, 0), - ?assert( - length(Streams1) > 0 andalso length(Streams1) =< length(Streams0), - Streams1 - ), - - %% At least one of `emqx_ds:make_iterator/4` will end in an error. - Results1 = lists:map( - fun({_Rank, S}) -> - case emqx_ds:make_iterator(DB, S, TopicFilter, 0) of - Ok = {ok, _Iter} -> - Ok; - Error = {error, recoverable, {erpc, _}} -> - Error; - Other -> - ct:fail({unexpected_result, Other}) - end - end, - Streams0 - ), - ?assert( - length([error || {error, _, _} <- Results1]) > 0, - Results1 - ), - - %% At least one of `emqx_ds:next/3` over initial set of iterators will end in an error. - Results2 = lists:map( - fun(Iter) -> - case emqx_ds:next(DB, Iter, _BatchSize = 42) of - Ok = {ok, _Iter, [_ | _]} -> - Ok; - Error = {error, recoverable, {badrpc, _}} -> - Error; - Other -> - ct:fail({unexpected_result, Other}) - end - end, - Iterators0 - ), - ?assert( - length([error || {error, _, _} <- Results2]) > 0, - Results2 - ), - meck:unload(). - -%% This testcase verifies the behavior of `store_batch' operation -%% when the underlying code experiences recoverable or unrecoverable -%% problems. -t_store_batch_fail(_Config) -> - ?check_trace( - #{timetrap => 15_000}, - try - meck:new(emqx_ds_storage_layer, [passthrough, no_history]), - DB = ?FUNCTION_NAME, - ?assertMatch(ok, emqx_ds:open_db(DB, (opts())#{n_shards => 2})), - %% Success: - Batch1 = [ - message(<<"C1">>, <<"foo/bar">>, <<"1">>, 1), - message(<<"C1">>, <<"foo/bar">>, <<"2">>, 1) - ], - ?assertMatch(ok, emqx_ds:store_batch(DB, Batch1, #{sync => true})), - %% Inject unrecoverable error: - meck:expect(emqx_ds_storage_layer, store_batch, fun(_DB, _Shard, _Messages) -> - {error, unrecoverable, mock} - end), - Batch2 = [ - message(<<"C1">>, <<"foo/bar">>, <<"3">>, 1), - message(<<"C1">>, <<"foo/bar">>, <<"4">>, 1) - ], - ?assertMatch( - {error, unrecoverable, mock}, emqx_ds:store_batch(DB, Batch2, #{sync => true}) - ), - meck:unload(emqx_ds_storage_layer), - %% Inject a recoveralbe error: - meck:new(ra, [passthrough, no_history]), - meck:expect(ra, process_command, fun(Servers, Shard, Command) -> - ?tp(ra_command, #{servers => Servers, shard => Shard, command => Command}), - {timeout, mock} - end), - Batch3 = [ - message(<<"C1">>, <<"foo/bar">>, <<"5">>, 2), - message(<<"C2">>, <<"foo/bar">>, <<"6">>, 2), - message(<<"C1">>, <<"foo/bar">>, <<"7">>, 3), - message(<<"C2">>, <<"foo/bar">>, <<"8">>, 3) - ], - %% Note: due to idempotency issues the number of retries - %% is currently set to 0: - ?assertMatch( - {error, recoverable, {timeout, mock}}, - emqx_ds:store_batch(DB, Batch3, #{sync => true}) - ), - meck:unload(ra), - ?assertMatch(ok, emqx_ds:store_batch(DB, Batch3, #{sync => true})), - lists:sort(emqx_ds_test_helpers:consume_per_stream(DB, ['#'], 1)) - after - meck:unload() - end, - [ - {"message ordering", fun(StoredMessages, _Trace) -> - [{_, Stream1}, {_, Stream2}] = StoredMessages, - ?assertMatch( - [ - #message{payload = <<"1">>}, - #message{payload = <<"2">>}, - #message{payload = <<"5">>}, - #message{payload = <<"7">>} - ], - Stream1 - ), - ?assertMatch( - [ - #message{payload = <<"6">>}, - #message{payload = <<"8">>} - ], - Stream2 - ) - end} - ] - ). - update_data_set() -> [ [ @@ -802,27 +557,59 @@ delete(DB, It0, Selector, BatchSize, Acc) -> %% CT callbacks -all() -> emqx_common_test_helpers:all(?MODULE). +all() -> + [{group, builtin_local}, {group, builtin_raft}]. -init_per_suite(Config) -> - emqx_common_test_helpers:clear_screen(), - Apps = emqx_cth_suite:start( - [mria, emqx_durable_storage], - #{work_dir => ?config(priv_dir, Config)} - ), - [{apps, Apps} | Config]. +groups() -> + TCs = emqx_common_test_helpers:all(?MODULE), + [ + {builtin_local, TCs}, + {builtin_raft, TCs} + ]. -end_per_suite(Config) -> - ok = emqx_cth_suite:stop(?config(apps, Config)), - ok. +init_per_group(builtin_local, Config) -> + Conf = #{ + backend => builtin_local, + storage => {emqx_ds_storage_reference, #{}}, + n_shards => ?N_SHARDS + }, + [{ds_conf, Conf} | Config]; +init_per_group(builtin_raft, Config) -> + case emqx_ds_test_helpers:skip_if_norepl() of + false -> + Conf = #{ + backend => builtin_raft, + storage => {emqx_ds_storage_reference, #{}}, + n_shards => ?N_SHARDS, + n_sites => 1, + replication_factor => 3, + replication_options => #{} + }, + [{ds_conf, Conf} | Config]; + Yes -> + Yes + end. -init_per_testcase(_TC, Config) -> - application:ensure_all_started(emqx_durable_storage), +end_per_group(_Group, Config) -> Config. -end_per_testcase(_TC, _Config) -> - snabbkaffe:stop(), - ok = application:stop(emqx_durable_storage), - mria:stop(), - _ = mnesia:delete_schema([node()]), +init_per_suite(Config) -> + Config. + +end_per_suite(_Config) -> + ok. + +init_per_testcase(TC, Config) -> + Apps = emqx_cth_suite:start( + [emqx_durable_storage, emqx_ds_backends], + #{work_dir => emqx_cth_suite:work_dir(TC, Config)} + ), + ct:pal("Apps: ~p", [Apps]), + [{apps, Apps} | Config]. + +end_per_testcase(TC, Config) -> + ok = emqx_ds:drop_db(TC), + ok = emqx_cth_suite:stop(?config(apps, Config)), + _ = mnesia:delete_schema([node()]), + snabbkaffe:stop(), ok. diff --git a/apps/emqx_ds_builtin_local/README.md b/apps/emqx_ds_builtin_local/README.md new file mode 100644 index 000000000..fec609493 --- /dev/null +++ b/apps/emqx_ds_builtin_local/README.md @@ -0,0 +1,32 @@ +# Local Backend for EMQX Durable Storage + +# Features + +This backend uses local RocksDB database to store data. + +# Limitation + +This backend cannot be used in a clustered EMQX setup. + +# Documentation links + +TBD + +# Usage + +TBD + +# Configurations + +TBD + +# HTTP APIs + +TBD + +# Other + +TBD + +# Contributing +Please see our [contributing.md](../../CONTRIBUTING.md). diff --git a/apps/emqx_ds_builtin_local/rebar.config b/apps/emqx_ds_builtin_local/rebar.config new file mode 100644 index 000000000..d70aa75e0 --- /dev/null +++ b/apps/emqx_ds_builtin_local/rebar.config @@ -0,0 +1,5 @@ +%% -*- mode:erlang -*- + +{deps, [ + {emqx_durable_storage, {path, "../emqx_durable_storage"}} +]}. diff --git a/apps/emqx_ds_builtin_local/src/emqx_ds_builtin_local.app.src b/apps/emqx_ds_builtin_local/src/emqx_ds_builtin_local.app.src new file mode 100644 index 000000000..e8bcc1b48 --- /dev/null +++ b/apps/emqx_ds_builtin_local/src/emqx_ds_builtin_local.app.src @@ -0,0 +1,11 @@ +%% -*- mode: erlang -*- +{application, emqx_ds_builtin_local, [ + {description, "A DS backend that stores all data locally and thus doesn't support clustering."}, + % strict semver, bump manually! + {vsn, "0.1.0"}, + {modules, []}, + {registered, []}, + {applications, [kernel, stdlib, gproc, mria, rocksdb, emqx_durable_storage, emqx_utils]}, + {mod, {emqx_ds_builtin_local_app, []}}, + {env, []} +]}. diff --git a/apps/emqx_ds_builtin_local/src/emqx_ds_builtin_local.erl b/apps/emqx_ds_builtin_local/src/emqx_ds_builtin_local.erl new file mode 100644 index 000000000..5fe6eb559 --- /dev/null +++ b/apps/emqx_ds_builtin_local/src/emqx_ds_builtin_local.erl @@ -0,0 +1,382 @@ +%%-------------------------------------------------------------------- +%% Copyright (c) 2024 EMQ Technologies Co., Ltd. All Rights Reserved. +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. +%%-------------------------------------------------------------------- +-module(emqx_ds_builtin_local). + +-behaviour(emqx_ds). +-behaviour(emqx_ds_buffer). + +%% API: +-export([]). + +%% behavior callbacks: +-export([ + %% `emqx_ds': + open_db/2, + close_db/1, + add_generation/1, + update_db_config/2, + list_generations_with_lifetimes/1, + drop_generation/2, + drop_db/1, + store_batch/3, + get_streams/3, + get_delete_streams/3, + make_iterator/4, + make_delete_iterator/4, + update_iterator/3, + next/3, + delete_next/4, + + %% `emqx_ds_buffer': + init_buffer/3, + flush_buffer/4, + shard_of_message/4 +]). + +-export_type([db_opts/0, shard/0, iterator/0, delete_iterator/0]). + +-include_lib("emqx_utils/include/emqx_message.hrl"). + +%%================================================================================ +%% Type declarations +%%================================================================================ + +-define(tag, 1). +-define(shard, 2). +-define(enc, 3). + +-define(IT, 61). +-define(DELETE_IT, 62). + +-type shard() :: binary(). + +-opaque iterator() :: + #{ + ?tag := ?IT, + ?shard := shard(), + ?enc := term() + }. + +-opaque delete_iterator() :: + #{ + ?tag := ?DELETE_IT, + ?shard := shard(), + ?enc := term() + }. + +-type db_opts() :: + #{ + backend := builtin_local, + storage := emqx_ds_storage_layer:prototype(), + n_shards := pos_integer() + }. + +-type generation_rank() :: {shard(), emqx_ds_storage_layer:gen_id()}. + +-define(stream(SHARD, INNER), [2, SHARD | INNER]). +-define(delete_stream(SHARD, INNER), [3, SHARD | INNER]). + +%%================================================================================ +%% API functions +%%================================================================================ + +%%================================================================================ +%% behavior callbacks +%%================================================================================ + +-spec open_db(emqx_ds:db(), db_opts()) -> ok | {error, _}. +open_db(DB, CreateOpts) -> + case emqx_ds_builtin_local_sup:start_db(DB, CreateOpts) of + {ok, _} -> + ok; + {error, {already_started, _}} -> + ok; + {error, Err} -> + {error, Err} + end. + +-spec close_db(emqx_ds:db()) -> ok. +close_db(DB) -> + emqx_ds_builtin_local_sup:stop_db(DB). + +-spec add_generation(emqx_ds:db()) -> ok | {error, _}. +add_generation(DB) -> + Shards = emqx_ds_builtin_local_meta:shards(DB), + Errors = lists:filtermap( + fun(Shard) -> + ShardId = {DB, Shard}, + case + emqx_ds_storage_layer:add_generation( + ShardId, emqx_ds_builtin_local_meta:ensure_monotonic_timestamp(ShardId) + ) + of + ok -> + false; + Error -> + {true, {Shard, Error}} + end + end, + Shards + ), + case Errors of + [] -> ok; + _ -> {error, Errors} + end. + +-spec update_db_config(emqx_ds:db(), db_opts()) -> ok | {error, _}. +update_db_config(DB, CreateOpts) -> + Opts = #{} = emqx_ds_builtin_local_meta:update_db_config(DB, CreateOpts), + lists:foreach( + fun(Shard) -> + ShardId = {DB, Shard}, + emqx_ds_storage_layer:update_config( + ShardId, emqx_ds_builtin_local_meta:ensure_monotonic_timestamp(ShardId), Opts + ) + end, + emqx_ds_builtin_local_meta:shards(DB) + ). + +-spec list_generations_with_lifetimes(emqx_ds:db()) -> + #{emqx_ds:generation_rank() => emqx_ds:generation_info()}. +list_generations_with_lifetimes(DB) -> + lists:foldl( + fun(Shard, Acc) -> + maps:fold( + fun(GenId, Data0, Acc1) -> + Data = maps:update_with( + until, + fun timeus_to_timestamp/1, + maps:update_with(since, fun timeus_to_timestamp/1, Data0) + ), + Acc1#{{Shard, GenId} => Data} + end, + Acc, + emqx_ds_storage_layer:list_generations_with_lifetimes({DB, Shard}) + ) + end, + #{}, + emqx_ds_builtin_local_meta:shards(DB) + ). + +-spec drop_generation(emqx_ds:db(), generation_rank()) -> ok | {error, _}. +drop_generation(DB, {Shard, GenId}) -> + emqx_ds_storage_layer:drop_generation({DB, Shard}, GenId). + +-spec drop_db(emqx_ds:db()) -> ok | {error, _}. +drop_db(DB) -> + close_db(DB), + lists:foreach( + fun(Shard) -> + emqx_ds_storage_layer:drop_shard({DB, Shard}) + end, + emqx_ds_builtin_local_meta:shards(DB) + ), + emqx_ds_builtin_local_meta:drop_db(DB). + +-spec store_batch(emqx_ds:db(), [emqx_types:message()], emqx_ds:message_store_opts()) -> + emqx_ds:store_batch_result(). +store_batch(DB, Messages, Opts) -> + try + emqx_ds_buffer:store_batch(DB, Messages, Opts) + catch + error:{Reason, _Call} when Reason == timeout; Reason == noproc -> + {error, recoverable, Reason} + end. + +-record(bs, {options :: term()}). +-type buffer_state() :: #bs{}. + +-spec init_buffer(emqx_ds:db(), shard(), _Options) -> {ok, buffer_state()}. +init_buffer(DB, Shard, Options) -> + ShardId = {DB, Shard}, + case current_timestamp(ShardId) of + undefined -> + Latest = erlang:system_time(microsecond), + emqx_ds_builtin_local_meta:set_current_timestamp(ShardId, Latest); + _Latest -> + ok + end, + {ok, #bs{options = Options}}. + +-spec flush_buffer(emqx_ds:db(), shard(), [emqx_types:message()], buffer_state()) -> + {buffer_state(), emqx_ds:store_batch_result()}. +flush_buffer(DB, Shard, Messages, S0 = #bs{options = Options}) -> + {Latest, Batch} = assign_timestamps(current_timestamp({DB, Shard}), Messages), + Result = emqx_ds_storage_layer:store_batch({DB, Shard}, Batch, Options), + emqx_ds_builtin_local_meta:set_current_timestamp({DB, Shard}, Latest), + {S0, Result}. + +assign_timestamps(Latest, Messages) -> + assign_timestamps(Latest, Messages, []). + +assign_timestamps(Latest, [MessageIn | Rest], Acc) -> + case emqx_message:timestamp(MessageIn, microsecond) of + TimestampUs when TimestampUs > Latest -> + Message = assign_timestamp(TimestampUs, MessageIn), + assign_timestamps(TimestampUs, Rest, [Message | Acc]); + _Earlier -> + Message = assign_timestamp(Latest + 1, MessageIn), + assign_timestamps(Latest + 1, Rest, [Message | Acc]) + end; +assign_timestamps(Latest, [], Acc) -> + {Latest, lists:reverse(Acc)}. + +assign_timestamp(TimestampUs, Message) -> + {TimestampUs, Message}. + +-spec shard_of_message(emqx_ds:db(), emqx_types:message(), clientid | topic, _Options) -> shard(). +shard_of_message(DB, #message{from = From, topic = Topic}, SerializeBy, _Options) -> + N = emqx_ds_builtin_local_meta:n_shards(DB), + Hash = + case SerializeBy of + clientid -> erlang:phash2(From, N); + topic -> erlang:phash2(Topic, N) + end, + integer_to_binary(Hash). + +-spec get_streams(emqx_ds:db(), emqx_ds:topic_filter(), emqx_ds:time()) -> + [{emqx_ds:stream_rank(), emqx_ds:ds_specific_stream()}]. +get_streams(DB, TopicFilter, StartTime) -> + Shards = emqx_ds_builtin_local_meta:shards(DB), + lists:flatmap( + fun(Shard) -> + Streams = emqx_ds_storage_layer:get_streams( + {DB, Shard}, TopicFilter, timestamp_to_timeus(StartTime) + ), + lists:map( + fun({RankY, InnerStream}) -> + Rank = {Shard, RankY}, + {Rank, ?stream(Shard, InnerStream)} + end, + Streams + ) + end, + Shards + ). + +-spec make_iterator( + emqx_ds:db(), emqx_ds:ds_specific_stream(), emqx_ds:topic_filter(), emqx_ds:time() +) -> + emqx_ds:make_iterator_result(emqx_ds:ds_specific_iterator()). +make_iterator(DB, ?stream(Shard, InnerStream), TopicFilter, StartTime) -> + ShardId = {DB, Shard}, + case + emqx_ds_storage_layer:make_iterator( + ShardId, InnerStream, TopicFilter, timestamp_to_timeus(StartTime) + ) + of + {ok, Iter} -> + {ok, #{?tag => ?IT, ?shard => Shard, ?enc => Iter}}; + Error = {error, _, _} -> + Error + end. + +-spec update_iterator(emqx_ds:db(), emqx_ds:ds_specific_iterator(), emqx_ds:message_key()) -> + emqx_ds:make_iterator_result(iterator()). +update_iterator(DB, Iter0 = #{?tag := ?IT, ?shard := Shard, ?enc := StorageIter0}, Key) -> + case emqx_ds_storage_layer:update_iterator({DB, Shard}, StorageIter0, Key) of + {ok, StorageIter} -> + {ok, Iter0#{?enc => StorageIter}}; + Err = {error, _, _} -> + Err + end. + +-spec next(emqx_ds:db(), iterator(), pos_integer()) -> emqx_ds:next_result(iterator()). +next(DB, Iter0 = #{?tag := ?IT, ?shard := Shard, ?enc := StorageIter0}, N) -> + ShardId = {DB, Shard}, + T0 = erlang:monotonic_time(microsecond), + Result = emqx_ds_storage_layer:next(ShardId, StorageIter0, N, current_timestamp(ShardId)), + T1 = erlang:monotonic_time(microsecond), + emqx_ds_builtin_metrics:observe_next_time(DB, T1 - T0), + case Result of + {ok, StorageIter, Batch} -> + Iter = Iter0#{?enc := StorageIter}, + {ok, Iter, Batch}; + Other -> + Other + end. + +-spec get_delete_streams(emqx_ds:db(), emqx_ds:topic_filter(), emqx_ds:time()) -> + [emqx_ds:ds_specific_delete_stream()]. +get_delete_streams(DB, TopicFilter, StartTime) -> + Shards = emqx_ds_builtin_local_meta:shards(DB), + lists:flatmap( + fun(Shard) -> + Streams = emqx_ds_storage_layer:get_delete_streams( + {DB, Shard}, TopicFilter, timestamp_to_timeus(StartTime) + ), + lists:map( + fun(InnerStream) -> + ?delete_stream(Shard, InnerStream) + end, + Streams + ) + end, + Shards + ). + +-spec make_delete_iterator( + emqx_ds:db(), emqx_ds:ds_specific_delete_stream(), emqx_ds:topic_filter(), emqx_ds:time() +) -> + emqx_ds:make_delete_iterator_result(delete_iterator()). +make_delete_iterator(DB, ?delete_stream(Shard, InnerStream), TopicFilter, StartTime) -> + ShardId = {DB, Shard}, + case + emqx_ds_storage_layer:make_delete_iterator( + ShardId, InnerStream, TopicFilter, timestamp_to_timeus(StartTime) + ) + of + {ok, Iter} -> + {ok, #{?tag => ?DELETE_IT, ?shard => Shard, ?enc => Iter}}; + Error = {error, _, _} -> + Error + end. + +-spec delete_next(emqx_ds:db(), delete_iterator(), emqx_ds:delete_selector(), pos_integer()) -> + emqx_ds:delete_next_result(emqx_ds:delete_iterator()). +delete_next(DB, Iter = #{?tag := ?DELETE_IT, ?shard := Shard, ?enc := StorageIter0}, Selector, N) -> + ShardId = {DB, Shard}, + case + emqx_ds_storage_layer:delete_next( + ShardId, StorageIter0, Selector, N, current_timestamp(ShardId) + ) + of + {ok, StorageIter, Ndeleted} -> + {ok, Iter#{?enc => StorageIter}, Ndeleted}; + {ok, end_of_stream} -> + {ok, end_of_stream}; + Error -> + Error + end. + +%%================================================================================ +%% Internal exports +%%================================================================================ + +current_timestamp(ShardId) -> + emqx_ds_builtin_local_meta:current_timestamp(ShardId). + +%%================================================================================ +%% Internal functions +%%================================================================================ + +timestamp_to_timeus(TimestampMs) -> + TimestampMs * 1000. + +timeus_to_timestamp(undefined) -> + undefined; +timeus_to_timestamp(TimestampUs) -> + TimestampUs div 1000. diff --git a/apps/emqx_ds_builtin_local/src/emqx_ds_builtin_local_app.erl b/apps/emqx_ds_builtin_local/src/emqx_ds_builtin_local_app.erl new file mode 100644 index 000000000..b09ef3fe4 --- /dev/null +++ b/apps/emqx_ds_builtin_local/src/emqx_ds_builtin_local_app.erl @@ -0,0 +1,38 @@ +%%-------------------------------------------------------------------- +%% Copyright (c) 2024 EMQ Technologies Co., Ltd. All Rights Reserved. +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. +%%-------------------------------------------------------------------- +-module(emqx_ds_builtin_local_app). + +%% API: +-export([]). + +%% behavior callbacks: +-export([start/2]). + +%%================================================================================ +%% behavior callbacks +%%================================================================================ + +start(_StartType, _StartArgs) -> + emqx_ds:register_backend(builtin_local, emqx_ds_builtin_local), + emqx_ds_builtin_local_sup:start_top(). + +%%================================================================================ +%% Internal exports +%%================================================================================ + +%%================================================================================ +%% Internal functions +%%================================================================================ diff --git a/apps/emqx_ds_builtin_local/src/emqx_ds_builtin_local_db_sup.erl b/apps/emqx_ds_builtin_local/src/emqx_ds_builtin_local_db_sup.erl new file mode 100644 index 000000000..8776416e0 --- /dev/null +++ b/apps/emqx_ds_builtin_local/src/emqx_ds_builtin_local_db_sup.erl @@ -0,0 +1,219 @@ +%%-------------------------------------------------------------------- +%% Copyright (c) 2024 EMQ Technologies Co., Ltd. All Rights Reserved. +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. +%%-------------------------------------------------------------------- + +%% @doc Supervisor that contains all the processes that belong to a +%% given builtin DS database. +-module(emqx_ds_builtin_local_db_sup). + +-behaviour(supervisor). + +%% API: +-export([ + start_db/2, + start_shard/1, + stop_shard/1, + terminate_storage/1, + restart_storage/1, + ensure_shard/1 +]). +-export([which_dbs/0, which_shards/1]). + +%% Debug: +-export([ + get_shard_workers/1 +]). + +%% behaviour callbacks: +-export([init/1]). + +%% internal exports: +-export([start_link_sup/2]). + +%%================================================================================ +%% Type declarations +%%================================================================================ + +-define(via(REC), {via, gproc, {n, l, REC}}). + +-define(db_sup, ?MODULE). +-define(shards_sup, emqx_ds_builtin_local_db_shards_sup). +-define(shard_sup, emqx_ds_builtin_local_db_shard_sup). + +-record(?db_sup, {db}). +-record(?shards_sup, {db}). +-record(?shard_sup, {db, shard}). + +%%================================================================================ +%% API functions +%%================================================================================ + +-spec start_db(emqx_ds:db(), emqx_ds_builtin_local:db_opts()) -> {ok, pid()}. +start_db(DB, Opts) -> + start_link_sup(#?db_sup{db = DB}, Opts). + +-spec start_shard(emqx_ds_storage_layer:shard_id()) -> + supervisor:startchild_ret(). +start_shard({DB, Shard}) -> + supervisor:start_child(?via(#?shards_sup{db = DB}), shard_spec(DB, Shard)). + +-spec stop_shard(emqx_ds_storage_layer:shard_id()) -> ok | {error, not_found}. +stop_shard({DB, Shard}) -> + Sup = ?via(#?shards_sup{db = DB}), + case supervisor:terminate_child(Sup, Shard) of + ok -> + supervisor:delete_child(Sup, Shard); + {error, Reason} -> + {error, Reason} + end. + +-spec terminate_storage(emqx_ds_storage_layer:shard_id()) -> ok | {error, _Reason}. +terminate_storage({DB, Shard}) -> + Sup = ?via(#?shard_sup{db = DB, shard = Shard}), + supervisor:terminate_child(Sup, {Shard, storage}). + +-spec restart_storage(emqx_ds_storage_layer:shard_id()) -> {ok, _Child} | {error, _Reason}. +restart_storage({DB, Shard}) -> + Sup = ?via(#?shard_sup{db = DB, shard = Shard}), + supervisor:restart_child(Sup, {Shard, storage}). + +-spec ensure_shard(emqx_ds_storage_layer:shard_id()) -> + ok | {error, _Reason}. +ensure_shard(Shard) -> + ensure_started(start_shard(Shard)). + +-spec which_shards(emqx_ds:db()) -> + [_Child]. +which_shards(DB) -> + supervisor:which_children(?via(#?shards_sup{db = DB})). + +%% @doc Return the list of builtin DS databases that are currently +%% active on the node. +-spec which_dbs() -> [emqx_ds:db()]. +which_dbs() -> + Key = {n, l, #?db_sup{_ = '_', db = '$1'}}, + gproc:select({local, names}, [{{Key, '_', '_'}, [], ['$1']}]). + +%% @doc Get pids of all local shard servers for the given DB. +-spec get_shard_workers(emqx_ds:db()) -> #{_Shard => pid()}. +get_shard_workers(DB) -> + Shards = supervisor:which_children(?via(#?shards_sup{db = DB})), + L = lists:flatmap( + fun + ({_Shard, Sup, _, _}) when is_pid(Sup) -> + [{Id, Pid} || {Id, Pid, _, _} <- supervisor:which_children(Sup), is_pid(Pid)]; + (_) -> + [] + end, + Shards + ), + maps:from_list(L). + +%%================================================================================ +%% behaviour callbacks +%%================================================================================ + +init({#?db_sup{db = DB}, DefaultOpts}) -> + %% Spec for the top-level supervisor for the database: + logger:notice("Starting DS DB ~p", [DB]), + emqx_ds_builtin_metrics:init_for_db(DB), + Opts = emqx_ds_builtin_local_meta:open_db(DB, DefaultOpts), + Children = [ + sup_spec(#?shards_sup{db = DB}, Opts) + ], + SupFlags = #{ + strategy => one_for_all, + intensity => 0, + period => 1 + }, + {ok, {SupFlags, Children}}; +init({#?shards_sup{db = DB}, _Opts}) -> + %% Spec for the supervisor that manages the supervisors for + %% each local shard of the DB: + SupFlags = #{ + strategy => one_for_one, + intensity => 10, + period => 1 + }, + Children = [shard_spec(DB, Shard) || Shard <- emqx_ds_builtin_local_meta:shards(DB)], + {ok, {SupFlags, Children}}; +init({#?shard_sup{db = DB, shard = Shard}, _}) -> + SupFlags = #{ + strategy => rest_for_one, + intensity => 10, + period => 100 + }, + Opts = emqx_ds_builtin_local_meta:db_config(DB), + Children = [ + shard_storage_spec(DB, Shard, Opts), + shard_buffer_spec(DB, Shard, Opts) + ], + {ok, {SupFlags, Children}}. + +%%================================================================================ +%% Internal exports +%%================================================================================ + +start_link_sup(Id, Options) -> + supervisor:start_link(?via(Id), ?MODULE, {Id, Options}). + +%%================================================================================ +%% Internal functions +%%================================================================================ + +sup_spec(Id, Options) -> + #{ + id => element(1, Id), + start => {?MODULE, start_link_sup, [Id, Options]}, + type => supervisor, + shutdown => infinity + }. + +shard_spec(DB, Shard) -> + #{ + id => Shard, + start => {?MODULE, start_link_sup, [#?shard_sup{db = DB, shard = Shard}, []]}, + shutdown => infinity, + restart => permanent, + type => supervisor + }. + +shard_storage_spec(DB, Shard, Opts) -> + #{ + id => {Shard, storage}, + start => {emqx_ds_storage_layer, start_link, [{DB, Shard}, Opts]}, + shutdown => 5_000, + restart => permanent, + type => worker + }. + +shard_buffer_spec(DB, Shard, Options) -> + #{ + id => {Shard, buffer}, + start => {emqx_ds_buffer, start_link, [emqx_ds_builtin_local, Options, DB, Shard]}, + shutdown => 5_000, + restart => permanent, + type => worker + }. + +ensure_started(Res) -> + case Res of + {ok, _Pid} -> + ok; + {error, {already_started, _Pid}} -> + ok; + {error, Reason} -> + {error, Reason} + end. diff --git a/apps/emqx_ds_builtin_local/src/emqx_ds_builtin_local_meta.erl b/apps/emqx_ds_builtin_local/src/emqx_ds_builtin_local_meta.erl new file mode 100644 index 000000000..dbc68cd2c --- /dev/null +++ b/apps/emqx_ds_builtin_local/src/emqx_ds_builtin_local_meta.erl @@ -0,0 +1,204 @@ +%%-------------------------------------------------------------------- +%% Copyright (c) 2023-2024 EMQ Technologies Co., Ltd. All Rights Reserved. +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. +%%-------------------------------------------------------------------- +-module(emqx_ds_builtin_local_meta). + +-behaviour(gen_server). + +%% API: +-export([ + start_link/0, + open_db/2, + drop_db/1, + n_shards/1, + shards/1, + db_config/1, + update_db_config/2, + + current_timestamp/1, + set_current_timestamp/2, + ensure_monotonic_timestamp/1 +]). + +%% behavior callbacks: +-export([init/1, handle_call/3, handle_cast/2, handle_info/2, terminate/2]). + +%% internal exports: +-export([]). + +-export_type([]). + +-include_lib("stdlib/include/ms_transform.hrl"). + +%%================================================================================ +%% Type declarations +%%================================================================================ + +-define(META_TAB, emqx_ds_builtin_local_metadata_tab). +-record(?META_TAB, { + db :: emqx_ds:db(), + db_props :: emqx_ds_builtin_local:db_opts() +}). + +%% We save timestamp of the last written message to a mnesia table. +%% The saved value is restored when the node restarts. This is needed +%% to create a timestamp that is truly monotonic even in presence of +%% node restarts. +-define(TS_TAB, emqx_ds_builtin_local_timestamp_tab). +-record(?TS_TAB, { + id :: emqx_ds_storage_layer:shard_id(), + latest :: integer() +}). + +%%================================================================================ +%% API functions +%%================================================================================ + +-define(SERVER, ?MODULE). + +-spec start_link() -> {ok, pid()}. +start_link() -> + gen_server:start_link({local, ?SERVER}, ?MODULE, [], []). + +-spec open_db(emqx_ds:db(), emqx_ds_builtin_local:db_opts()) -> + emqx_ds_builtin_local:db_opts(). +open_db(DB, CreateOpts = #{backend := builtin_local, storage := _, n_shards := _}) -> + transaction( + fun() -> + case mnesia:wread({?META_TAB, DB}) of + [] -> + mnesia:write(#?META_TAB{db = DB, db_props = CreateOpts}), + CreateOpts; + [#?META_TAB{db_props = Opts}] -> + Opts + end + end + ). + +-spec drop_db(emqx_ds:db()) -> ok. +drop_db(DB) -> + transaction( + fun() -> + MS = ets:fun2ms(fun(#?TS_TAB{id = ID}) when element(1, ID) =:= DB -> + ID + end), + Timestamps = mnesia:select(?TS_TAB, MS, write), + [mnesia:delete({?TS_TAB, I}) || I <- Timestamps], + mnesia:delete({?META_TAB, DB}) + end + ). + +-spec update_db_config(emqx_ds:db(), emqx_ds_builtin_local:db_opts()) -> + emqx_ds_builtin_local:db_opts(). +update_db_config(DB, Opts) -> + transaction( + fun() -> + mnesia:write(#?META_TAB{db = DB, db_props = Opts}), + Opts + end + ). + +-spec n_shards(emqx_ds:db()) -> pos_integer(). +n_shards(DB) -> + #{n_shards := NShards} = db_config(DB), + NShards. + +-spec shards(emqx_ds:db()) -> [emqx_ds_builtin_local:shard()]. +shards(DB) -> + NShards = n_shards(DB), + [integer_to_binary(Shard) || Shard <- lists:seq(0, NShards - 1)]. + +-spec db_config(emqx_ds:db()) -> emqx_ds_builtin_local:db_opts(). +db_config(DB) -> + case mnesia:dirty_read(?META_TAB, DB) of + [#?META_TAB{db_props = Props}] -> + Props; + [] -> + error({no_such_db, DB}) + end. + +-spec set_current_timestamp(emqx_ds_storage_layer:shard_id(), emqx_ds:time()) -> ok. +set_current_timestamp(ShardId, Time) -> + mria:dirty_write(?TS_TAB, #?TS_TAB{id = ShardId, latest = Time}). + +-spec current_timestamp(emqx_ds_storage_layer:shard_id()) -> emqx_ds:time() | undefined. +current_timestamp(ShardId) -> + case mnesia:dirty_read(?TS_TAB, ShardId) of + [#?TS_TAB{latest = Latest}] -> + Latest; + [] -> + undefined + end. + +-spec ensure_monotonic_timestamp(emqx_ds_storage_layer:shard_id()) -> emqx_ds:time(). +ensure_monotonic_timestamp(ShardId) -> + mria:dirty_update_counter({?TS_TAB, ShardId}, 1). + +%%================================================================================ +%% behavior callbacks +%%================================================================================ + +-record(s, {}). +-define(timer_update, timer_update). + +init([]) -> + process_flag(trap_exit, true), + ensure_tables(), + S = #s{}, + {ok, S}. + +handle_call(_Call, _From, S) -> + {reply, {error, unknown_call}, S}. + +handle_cast(_Cast, S) -> + {noreply, S}. + +handle_info(_Info, S) -> + {noreply, S}. + +terminate(_Reason, _S) -> + ok. + +%%================================================================================ +%% Internal exports +%%================================================================================ + +%%================================================================================ +%% Internal functions +%%================================================================================ + +ensure_tables() -> + ok = mria:create_table(?META_TAB, [ + {local_content, true}, + {type, ordered_set}, + {storage, disc_copies}, + {record_name, ?META_TAB}, + {attributes, record_info(fields, ?META_TAB)} + ]), + ok = mria:create_table(?TS_TAB, [ + {local_content, true}, + {type, set}, + {storage, disc_copies}, + {record_name, ?TS_TAB}, + {attributes, record_info(fields, ?TS_TAB)} + ]). + +transaction(Fun) -> + case mria:transaction(mria:local_content_shard(), Fun) of + {atomic, Result} -> + Result; + {aborted, Reason} -> + {error, Reason} + end. diff --git a/apps/emqx_ds_builtin_local/src/emqx_ds_builtin_local_sup.erl b/apps/emqx_ds_builtin_local/src/emqx_ds_builtin_local_sup.erl new file mode 100644 index 000000000..f6a9b1757 --- /dev/null +++ b/apps/emqx_ds_builtin_local/src/emqx_ds_builtin_local_sup.erl @@ -0,0 +1,127 @@ +%%-------------------------------------------------------------------- +%% Copyright (c) 2023-2024 EMQ Technologies Co., Ltd. All Rights Reserved. +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. +%%-------------------------------------------------------------------- + +%% @doc This supervisor manages the global worker processes needed for +%% the functioning of builtin local databases, and all builtin local +%% databases that attach to it. +-module(emqx_ds_builtin_local_sup). + +-behaviour(supervisor). + +%% API: +-export([start_db/2, stop_db/1]). + +%% behavior callbacks: +-export([init/1]). + +%% internal exports: +-export([start_top/0, start_databases_sup/0]). + +-export_type([]). + +%%================================================================================ +%% Type declarations +%%================================================================================ + +-define(top, ?MODULE). +-define(databases, emqx_ds_builtin_local_db_sup). + +%%================================================================================ +%% API functions +%%================================================================================ + +-spec start_top() -> {ok, pid()}. +start_top() -> + supervisor:start_link({local, ?top}, ?MODULE, ?top). + +-spec start_db(emqx_ds:db(), emqx_ds_builtin_local:db_opts()) -> + supervisor:startchild_ret(). +start_db(DB, Opts) -> + ChildSpec = #{ + id => DB, + start => {?databases, start_db, [DB, Opts]}, + type => supervisor, + shutdown => infinity + }, + supervisor:start_child(?databases, ChildSpec). + +-spec stop_db(emqx_ds:db()) -> ok. +stop_db(DB) -> + case whereis(?databases) of + Pid when is_pid(Pid) -> + _ = supervisor:terminate_child(?databases, DB), + _ = supervisor:delete_child(?databases, DB), + ok; + undefined -> + ok + end. + +%%================================================================================ +%% behavior callbacks +%%================================================================================ + +%% There are two layers of supervision: +%% +%% 1. top supervisor for the builtin backend. It contains the global +%% worker processes (like the metadata server), and `?databases' +%% supervisior. +%% +%% 2. `?databases': a `one_for_one' supervisor where each child is a +%% `db' supervisor that contains processes that represent the DB. +%% Chidren are attached dynamically to this one. +init(?top) -> + %% Children: + MetadataServer = #{ + id => metadata_server, + start => {emqx_ds_builtin_local_meta, start_link, []}, + restart => permanent, + type => worker, + shutdown => 5000 + }, + DBsSup = #{ + id => ?databases, + start => {?MODULE, start_databases_sup, []}, + restart => permanent, + type => supervisor, + shutdown => infinity + }, + %% + SupFlags = #{ + strategy => one_for_all, + intensity => 1, + period => 1, + auto_shutdown => never + }, + {ok, {SupFlags, [MetadataServer, DBsSup]}}; +init(?databases) -> + %% Children are added dynamically: + SupFlags = #{ + strategy => one_for_one, + intensity => 10, + period => 1 + }, + {ok, {SupFlags, []}}. + +%%================================================================================ +%% Internal exports +%%================================================================================ + +start_databases_sup() -> + supervisor:start_link({local, ?databases}, ?MODULE, ?databases). + +%%================================================================================ +%% Internal functions +%%================================================================================ diff --git a/apps/emqx_ds_builtin_local/test/emqx_ds_builtin_local_SUITE.erl b/apps/emqx_ds_builtin_local/test/emqx_ds_builtin_local_SUITE.erl new file mode 100644 index 000000000..d8593ce40 --- /dev/null +++ b/apps/emqx_ds_builtin_local/test/emqx_ds_builtin_local_SUITE.erl @@ -0,0 +1,346 @@ +%%-------------------------------------------------------------------- +%% Copyright (c) 2024 EMQ Technologies Co., Ltd. All Rights Reserved. +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. +%%-------------------------------------------------------------------- +-module(emqx_ds_builtin_local_SUITE). + +-compile(export_all). +-compile(nowarn_export_all). + +-include_lib("emqx/include/emqx.hrl"). +-include_lib("common_test/include/ct.hrl"). +-include_lib("stdlib/include/assert.hrl"). +-include_lib("emqx/include/asserts.hrl"). +-include_lib("snabbkaffe/include/snabbkaffe.hrl"). + +-define(N_SHARDS, 1). + +opts(_Config) -> + #{ + backend => builtin_local, + storage => {emqx_ds_storage_reference, #{}}, + n_shards => ?N_SHARDS + }. + +t_drop_generation_with_never_used_iterator(Config) -> + %% This test checks how the iterator behaves when: + %% 1) it's created at generation 1 and not consumed from. + %% 2) generation 2 is created and 1 dropped. + %% 3) iteration begins. + %% In this case, the iterator won't see any messages and the stream will end. + + DB = ?FUNCTION_NAME, + ?assertMatch(ok, emqx_ds:open_db(DB, opts(Config))), + [GenId0] = maps:keys(emqx_ds:list_generations_with_lifetimes(DB)), + + TopicFilter = emqx_topic:words(<<"foo/+">>), + StartTime = 0, + Msgs0 = [ + message(<<"foo/bar">>, <<"1">>, 0), + message(<<"foo/baz">>, <<"2">>, 1) + ], + ?assertMatch(ok, emqx_ds:store_batch(DB, Msgs0)), + + [{_, Stream0}] = emqx_ds:get_streams(DB, TopicFilter, StartTime), + {ok, Iter0} = emqx_ds:make_iterator(DB, Stream0, TopicFilter, StartTime), + + ok = emqx_ds:add_generation(DB), + ok = emqx_ds:drop_generation(DB, GenId0), + + Now = emqx_message:timestamp_now(), + Msgs1 = [ + message(<<"foo/bar">>, <<"3">>, Now + 100), + message(<<"foo/baz">>, <<"4">>, Now + 101) + ], + ?assertMatch(ok, emqx_ds:store_batch(DB, Msgs1)), + + ?assertError( + {error, unrecoverable, generation_not_found}, + emqx_ds_test_helpers:consume_iter(DB, Iter0) + ), + + %% New iterator for the new stream will only see the later messages. + [{_, Stream1}] = emqx_ds:get_streams(DB, TopicFilter, StartTime), + ?assertNotEqual(Stream0, Stream1), + {ok, Iter1} = emqx_ds:make_iterator(DB, Stream1, TopicFilter, StartTime), + + {ok, Iter, Batch} = emqx_ds_test_helpers:consume_iter(DB, Iter1, #{batch_size => 1}), + ?assertNotEqual(end_of_stream, Iter), + ?assertEqual(Msgs1, Batch), + + ok. + +t_drop_generation_with_used_once_iterator(Config) -> + %% This test checks how the iterator behaves when: + %% 1) it's created at generation 1 and consumes at least 1 message. + %% 2) generation 2 is created and 1 dropped. + %% 3) iteration continues. + %% In this case, the iterator should see no more messages and the stream will end. + + DB = ?FUNCTION_NAME, + ?assertMatch(ok, emqx_ds:open_db(DB, opts(Config))), + [GenId0] = maps:keys(emqx_ds:list_generations_with_lifetimes(DB)), + + TopicFilter = emqx_topic:words(<<"foo/+">>), + StartTime = 0, + Msgs0 = + [Msg0 | _] = [ + message(<<"foo/bar">>, <<"1">>, 0), + message(<<"foo/baz">>, <<"2">>, 1) + ], + ?assertMatch(ok, emqx_ds:store_batch(DB, Msgs0)), + + [{_, Stream0}] = emqx_ds:get_streams(DB, TopicFilter, StartTime), + {ok, Iter0} = emqx_ds:make_iterator(DB, Stream0, TopicFilter, StartTime), + {ok, Iter1, Batch1} = emqx_ds:next(DB, Iter0, 1), + ?assertNotEqual(end_of_stream, Iter1), + ?assertEqual([Msg0], [Msg || {_Key, Msg} <- Batch1]), + + ok = emqx_ds:add_generation(DB), + ok = emqx_ds:drop_generation(DB, GenId0), + + Now = emqx_message:timestamp_now(), + Msgs1 = [ + message(<<"foo/bar">>, <<"3">>, Now + 100), + message(<<"foo/baz">>, <<"4">>, Now + 101) + ], + ?assertMatch(ok, emqx_ds:store_batch(DB, Msgs1)), + + ?assertError( + {error, unrecoverable, generation_not_found}, + emqx_ds_test_helpers:consume_iter(DB, Iter1) + ). + +t_drop_generation_update_iterator(Config) -> + %% This checks the behavior of `emqx_ds:update_iterator' after the generation + %% underlying the iterator has been dropped. + + DB = ?FUNCTION_NAME, + ?assertMatch(ok, emqx_ds:open_db(DB, opts(Config))), + [GenId0] = maps:keys(emqx_ds:list_generations_with_lifetimes(DB)), + + TopicFilter = emqx_topic:words(<<"foo/+">>), + StartTime = 0, + Msgs0 = [ + message(<<"foo/bar">>, <<"1">>, 0), + message(<<"foo/baz">>, <<"2">>, 1) + ], + ?assertMatch(ok, emqx_ds:store_batch(DB, Msgs0)), + + [{_, Stream0}] = emqx_ds:get_streams(DB, TopicFilter, StartTime), + {ok, Iter0} = emqx_ds:make_iterator(DB, Stream0, TopicFilter, StartTime), + {ok, Iter1, _Batch1} = emqx_ds:next(DB, Iter0, 1), + {ok, _Iter2, [{Key2, _Msg}]} = emqx_ds:next(DB, Iter1, 1), + + ok = emqx_ds:add_generation(DB), + ok = emqx_ds:drop_generation(DB, GenId0), + + ?assertEqual( + {error, unrecoverable, generation_not_found}, + emqx_ds:update_iterator(DB, Iter1, Key2) + ). + +t_make_iterator_stale_stream(Config) -> + %% This checks the behavior of `emqx_ds:make_iterator' after the generation underlying + %% the stream has been dropped. + + DB = ?FUNCTION_NAME, + ?assertMatch(ok, emqx_ds:open_db(DB, opts(Config))), + [GenId0] = maps:keys(emqx_ds:list_generations_with_lifetimes(DB)), + + TopicFilter = emqx_topic:words(<<"foo/+">>), + StartTime = 0, + Msgs0 = [ + message(<<"foo/bar">>, <<"1">>, 0), + message(<<"foo/baz">>, <<"2">>, 1) + ], + ?assertMatch(ok, emqx_ds:store_batch(DB, Msgs0)), + + [{_, Stream0}] = emqx_ds:get_streams(DB, TopicFilter, StartTime), + + ok = emqx_ds:add_generation(DB), + ok = emqx_ds:drop_generation(DB, GenId0), + + ?assertEqual( + {error, unrecoverable, generation_not_found}, + emqx_ds:make_iterator(DB, Stream0, TopicFilter, StartTime) + ), + + ok. + +t_get_streams_concurrently_with_drop_generation(Config) -> + %% This checks that we can get all streams while a generation is dropped + %% mid-iteration. + + DB = ?FUNCTION_NAME, + ?check_trace( + #{timetrap => 5_000}, + begin + ?assertMatch(ok, emqx_ds:open_db(DB, opts(Config))), + + [GenId0] = maps:keys(emqx_ds:list_generations_with_lifetimes(DB)), + ok = emqx_ds:add_generation(DB), + ok = emqx_ds:add_generation(DB), + + %% All streams + TopicFilter = emqx_topic:words(<<"foo/+">>), + StartTime = 0, + ?assertMatch([_, _, _], emqx_ds:get_streams(DB, TopicFilter, StartTime)), + + ?force_ordering( + #{?snk_kind := dropped_gen}, + #{?snk_kind := get_streams_get_gen} + ), + + spawn_link(fun() -> + {ok, _} = ?block_until(#{?snk_kind := get_streams_all_gens}), + ok = emqx_ds:drop_generation(DB, GenId0), + ?tp(dropped_gen, #{}) + end), + + ?assertMatch([_, _], emqx_ds:get_streams(DB, TopicFilter, StartTime)), + + ok + end, + [] + ). + +%% This testcase verifies the behavior of `store_batch' operation +%% when the underlying code experiences recoverable or unrecoverable +%% problems. +t_store_batch_fail(Config) -> + ?check_trace( + #{timetrap => 15_000}, + try + meck:new(emqx_ds_storage_layer, [passthrough, no_history]), + DB = ?FUNCTION_NAME, + ?assertMatch(ok, emqx_ds:open_db(DB, (opts(Config))#{n_shards => 2})), + %% Success: + Batch1 = [ + message(<<"C1">>, <<"foo/bar">>, <<"1">>, 1), + message(<<"C1">>, <<"foo/bar">>, <<"2">>, 1) + ], + ?assertMatch(ok, emqx_ds:store_batch(DB, Batch1, #{sync => true})), + %% Inject unrecoverable error: + meck:expect(emqx_ds_storage_layer, store_batch, fun(_DB, _Shard, _Messages) -> + {error, unrecoverable, mock} + end), + Batch2 = [ + message(<<"C1">>, <<"foo/bar">>, <<"3">>, 1), + message(<<"C1">>, <<"foo/bar">>, <<"4">>, 1) + ], + ?assertMatch( + {error, unrecoverable, mock}, emqx_ds:store_batch(DB, Batch2, #{sync => true}) + ), + %% Inject a recoveralbe error: + meck:expect(emqx_ds_storage_layer, store_batch, fun(_DB, _Shard, _Messages) -> + {error, recoverable, mock} + end), + Batch3 = [ + message(<<"C1">>, <<"foo/bar">>, <<"5">>, 2), + message(<<"C2">>, <<"foo/bar">>, <<"6">>, 2), + message(<<"C1">>, <<"foo/bar">>, <<"7">>, 3), + message(<<"C2">>, <<"foo/bar">>, <<"8">>, 3) + ], + %% Note: due to idempotency issues the number of retries + %% is currently set to 0: + ?assertMatch( + {error, recoverable, mock}, + emqx_ds:store_batch(DB, Batch3, #{sync => true}) + ), + meck:unload(emqx_ds_storage_layer), + ?assertMatch(ok, emqx_ds:store_batch(DB, Batch3, #{sync => true})), + lists:sort(emqx_ds_test_helpers:consume_per_stream(DB, ['#'], 1)) + after + meck:unload() + end, + [ + {"message ordering", fun(StoredMessages, _Trace) -> + [{_, MessagesFromStream1}, {_, MessagesFromStream2}] = StoredMessages, + emqx_ds_test_helpers:diff_messages( + [payload], + [ + #message{payload = <<"1">>}, + #message{payload = <<"2">>}, + #message{payload = <<"5">>}, + #message{payload = <<"7">>} + ], + MessagesFromStream1 + ), + emqx_ds_test_helpers:diff_messages( + [payload], + [ + #message{payload = <<"6">>}, + #message{payload = <<"8">>} + ], + MessagesFromStream2 + ) + end} + ] + ). + +message(ClientId, Topic, Payload, PublishedAt) -> + Msg = message(Topic, Payload, PublishedAt), + Msg#message{from = ClientId}. + +message(Topic, Payload, PublishedAt) -> + #message{ + topic = Topic, + payload = Payload, + timestamp = PublishedAt, + id = emqx_guid:gen() + }. + +delete(DB, It, Selector, BatchSize) -> + delete(DB, It, Selector, BatchSize, 0). + +delete(DB, It0, Selector, BatchSize, Acc) -> + case emqx_ds:delete_next(DB, It0, Selector, BatchSize) of + {ok, It, 0} -> + {ok, It, Acc}; + {ok, It, NumDeleted} -> + delete(DB, It, BatchSize, Selector, Acc + NumDeleted); + {ok, end_of_stream} -> + {ok, end_of_stream, Acc}; + Ret -> + Ret + end. + +%% CT callbacks + +all() -> + emqx_common_test_helpers:all(?MODULE). + +init_per_suite(Config) -> + emqx_common_test_helpers:clear_screen(), + Apps = emqx_cth_suite:start( + [mria, emqx_ds_builtin_local], + #{work_dir => ?config(priv_dir, Config)} + ), + [{apps, Apps} | Config]. + +end_per_suite(Config) -> + ok = emqx_cth_suite:stop(?config(apps, Config)), + ok. + +init_per_testcase(_TC, Config) -> + application:ensure_all_started(emqx_ds_builtin_local), + Config. + +end_per_testcase(_TC, _Config) -> + snabbkaffe:stop(), + ok = application:stop(emqx_ds_builtin_local), + mria:stop(), + _ = mnesia:delete_schema([node()]), + ok. diff --git a/apps/emqx_ds_builtin_raft/BSL.txt b/apps/emqx_ds_builtin_raft/BSL.txt new file mode 100644 index 000000000..5df4e60aa --- /dev/null +++ b/apps/emqx_ds_builtin_raft/BSL.txt @@ -0,0 +1,94 @@ +Business Source License 1.1 + +Licensor: Hangzhou EMQ Technologies Co., Ltd. +Licensed Work: EMQX Enterprise Edition + The Licensed Work is (c) 2024 + Hangzhou EMQ Technologies Co., Ltd. +Additional Use Grant: Students and educators are granted right to copy, + modify, and create derivative work for research + or education. +Change Date: 2028-06-13 +Change License: Apache License, Version 2.0 + +For information about alternative licensing arrangements for the Software, +please contact Licensor: https://www.emqx.com/en/contact + +Notice + +The Business Source License (this document, or the “License”) is not an Open +Source license. However, the Licensed Work will eventually be made available +under an Open Source License, as stated in this License. + +License text copyright (c) 2017, 2024 MariaDB Corporation Ab, All Rights Reserved. +“Business Source License” is a trademark of MariaDB Corporation Ab. + +----------------------------------------------------------------------------- + +Business Source License 1.1 + +Terms + +The Licensor hereby grants you the right to copy, modify, create derivative +works, redistribute, and make non-production use of the Licensed Work. The +Licensor may make an Additional Use Grant, above, permitting limited +production use. + +Effective on the Change Date, or the fourth anniversary of the first publicly +available distribution of a specific version of the Licensed Work under this +License, whichever comes first, the Licensor hereby grants you rights under +the terms of the Change License, and the rights granted in the paragraph +above terminate. + +If your use of the Licensed Work does not comply with the requirements +currently in effect as described in this License, you must purchase a +commercial license from the Licensor, its affiliated entities, or authorized +resellers, or you must refrain from using the Licensed Work. + +All copies of the original and modified Licensed Work, and derivative works +of the Licensed Work, are subject to this License. This License applies +separately for each version of the Licensed Work and the Change Date may vary +for each version of the Licensed Work released by Licensor. + +You must conspicuously display this License on each original or modified copy +of the Licensed Work. If you receive the Licensed Work in original or +modified form from a third party, the terms and conditions set forth in this +License apply to your use of that work. + +Any use of the Licensed Work in violation of this License will automatically +terminate your rights under this License for the current and all other +versions of the Licensed Work. + +This License does not grant you any right in any trademark or logo of +Licensor or its affiliates (provided that you may use a trademark or logo of +Licensor as expressly required by this License). + +TO THE EXTENT PERMITTED BY APPLICABLE LAW, THE LICENSED WORK IS PROVIDED ON +AN “AS IS” BASIS. LICENSOR HEREBY DISCLAIMS ALL WARRANTIES AND CONDITIONS, +EXPRESS OR IMPLIED, INCLUDING (WITHOUT LIMITATION) WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NON-INFRINGEMENT, AND +TITLE. + +MariaDB hereby grants you permission to use this License’s text to license +your works, and to refer to it using the trademark “Business Source License”, +as long as you comply with the Covenants of Licensor below. + +Covenants of Licensor + +In consideration of the right to use this License’s text and the “Business +Source License” name and trademark, Licensor covenants to MariaDB, and to all +other recipients of the licensed work to be provided by Licensor: + +1. To specify as the Change License the GPL Version 2.0 or any later version, + or a license that is compatible with GPL Version 2.0 or a later version, + where “compatible” means that software provided under the Change License can + be included in a program with software provided under GPL Version 2.0 or a + later version. Licensor may specify additional Change Licenses without + limitation. + +2. To either: (a) specify an additional grant of rights to use that does not + impose any additional restriction on the right granted in this License, as + the Additional Use Grant; or (b) insert the text “None”. + +3. To specify a Change Date. + +4. Not to modify this License in any other way. diff --git a/apps/emqx_ds_builtin_raft/README.md b/apps/emqx_ds_builtin_raft/README.md new file mode 100644 index 000000000..7f468f365 --- /dev/null +++ b/apps/emqx_ds_builtin_raft/README.md @@ -0,0 +1,3 @@ +# `emqx_ds_builtin_raft` + +Replication layer for the builtin EMQX durable storage backend that uses Raft algorithm. diff --git a/apps/emqx_ds_builtin_raft/rebar.config b/apps/emqx_ds_builtin_raft/rebar.config new file mode 100644 index 000000000..2d2671571 --- /dev/null +++ b/apps/emqx_ds_builtin_raft/rebar.config @@ -0,0 +1,6 @@ +%% -*- mode:erlang -*- + +{deps, [ + {emqx_durable_storage, {path, "../emqx_durable_storage"}}, + {ra, "2.7.3"} +]}. diff --git a/apps/emqx_ds_builtin_raft/src/emqx_ds_builtin_raft.app.src b/apps/emqx_ds_builtin_raft/src/emqx_ds_builtin_raft.app.src new file mode 100644 index 000000000..18e84e6b1 --- /dev/null +++ b/apps/emqx_ds_builtin_raft/src/emqx_ds_builtin_raft.app.src @@ -0,0 +1,11 @@ +%% -*- mode: erlang -*- +{application, emqx_ds_builtin_raft, [ + {description, "Raft replication layer for the durable storage"}, + % strict semver, bump manually! + {vsn, "0.1.0"}, + {modules, []}, + {registered, []}, + {applications, [kernel, stdlib, gproc, mria, ra, emqx_durable_storage]}, + {mod, {emqx_ds_builtin_raft_app, []}}, + {env, []} +]}. diff --git a/apps/emqx_ds_builtin_raft/src/emqx_ds_builtin_raft_app.erl b/apps/emqx_ds_builtin_raft/src/emqx_ds_builtin_raft_app.erl new file mode 100644 index 000000000..2b1cae64d --- /dev/null +++ b/apps/emqx_ds_builtin_raft/src/emqx_ds_builtin_raft_app.erl @@ -0,0 +1,11 @@ +%%-------------------------------------------------------------------- +%% Copyright (c) 2020-2024 EMQ Technologies Co., Ltd. All Rights Reserved. +%%-------------------------------------------------------------------- + +-module(emqx_ds_builtin_raft_app). + +-export([start/2]). + +start(_Type, _Args) -> + emqx_ds:register_backend(builtin_raft, emqx_ds_replication_layer), + emqx_ds_builtin_raft_sup:start_top(). diff --git a/apps/emqx_durable_storage/src/emqx_ds_builtin_db_sup.erl b/apps/emqx_ds_builtin_raft/src/emqx_ds_builtin_raft_db_sup.erl similarity index 91% rename from apps/emqx_durable_storage/src/emqx_ds_builtin_db_sup.erl rename to apps/emqx_ds_builtin_raft/src/emqx_ds_builtin_raft_db_sup.erl index 40380ed59..1816e551f 100644 --- a/apps/emqx_durable_storage/src/emqx_ds_builtin_db_sup.erl +++ b/apps/emqx_ds_builtin_raft/src/emqx_ds_builtin_raft_db_sup.erl @@ -1,22 +1,10 @@ %%-------------------------------------------------------------------- %% Copyright (c) 2024 EMQ Technologies Co., Ltd. All Rights Reserved. -%% -%% Licensed under the Apache License, Version 2.0 (the "License"); -%% you may not use this file except in compliance with the License. -%% You may obtain a copy of the License at -%% -%% http://www.apache.org/licenses/LICENSE-2.0 -%% -%% Unless required by applicable law or agreed to in writing, software -%% distributed under the License is distributed on an "AS IS" BASIS, -%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -%% See the License for the specific language governing permissions and -%% limitations under the License. %%-------------------------------------------------------------------- %% @doc Supervisor that contains all the processes that belong to a %% given builtin DS database. --module(emqx_ds_builtin_db_sup). +-module(emqx_ds_builtin_raft_db_sup). -behaviour(supervisor). @@ -150,7 +138,7 @@ get_shard_workers(DB) -> init({#?db_sup{db = DB}, DefaultOpts}) -> %% Spec for the top-level supervisor for the database: logger:notice("Starting DS DB ~p", [DB]), - emqx_ds_builtin_sup:clean_gvars(DB), + emqx_ds_builtin_raft_sup:clean_gvars(DB), emqx_ds_builtin_metrics:init_for_db(DB), Opts = emqx_ds_replication_layer_meta:open_db(DB, DefaultOpts), ok = start_ra_system(DB, Opts), @@ -197,7 +185,7 @@ init({#?shard_sup{db = DB, shard = Shard}, _}) -> {ok, {SupFlags, Children}}. start_ra_system(DB, #{replication_options := ReplicationOpts}) -> - DataDir = filename:join([emqx_ds:base_dir(), DB, dsrepl]), + DataDir = filename:join([emqx_ds_storage_layer:base_dir(), DB, dsrepl]), Config = lists:foldr(fun maps:merge/2, #{}, [ ra_system:default_config(), #{ @@ -279,9 +267,10 @@ shard_allocator_spec(DB) -> }. egress_spec(DB, Shard) -> + Options = #{}, #{ id => Shard, - start => {emqx_ds_replication_layer_egress, start_link, [DB, Shard]}, + start => {emqx_ds_buffer, start_link, [emqx_ds_replication_layer, Options, DB, Shard]}, shutdown => 5_000, restart => permanent, type => worker diff --git a/apps/emqx_durable_storage/src/emqx_ds_builtin_sup.erl b/apps/emqx_ds_builtin_raft/src/emqx_ds_builtin_raft_sup.erl similarity index 81% rename from apps/emqx_durable_storage/src/emqx_ds_builtin_sup.erl rename to apps/emqx_ds_builtin_raft/src/emqx_ds_builtin_raft_sup.erl index 971805351..70c9bbe16 100644 --- a/apps/emqx_durable_storage/src/emqx_ds_builtin_sup.erl +++ b/apps/emqx_ds_builtin_raft/src/emqx_ds_builtin_raft_sup.erl @@ -1,35 +1,23 @@ %%-------------------------------------------------------------------- %% Copyright (c) 2023-2024 EMQ Technologies Co., Ltd. All Rights Reserved. -%% -%% Licensed under the Apache License, Version 2.0 (the "License"); -%% you may not use this file except in compliance with the License. -%% You may obtain a copy of the License at -%% -%% http://www.apache.org/licenses/LICENSE-2.0 -%% -%% Unless required by applicable law or agreed to in writing, software -%% distributed under the License is distributed on an "AS IS" BASIS, -%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -%% See the License for the specific language governing permissions and -%% limitations under the License. %%-------------------------------------------------------------------- %% @doc This supervisor manages the global worker processes needed for %% the functioning of builtin databases, and all builtin database %% attach to it. --module(emqx_ds_builtin_sup). +-module(emqx_ds_builtin_raft_sup). -behaviour(supervisor). %% API: --export([start_db/2, stop_db/1]). +-export([start_top/0, start_db/2, stop_db/1]). -export([set_gvar/3, get_gvar/3, clean_gvars/1]). %% behavior callbacks: -export([init/1]). %% internal exports: --export([start_top/0, start_databases_sup/0]). +-export([start_databases_sup/0]). -export_type([]). @@ -39,7 +27,6 @@ -define(top, ?MODULE). -define(databases, emqx_ds_builtin_databases_sup). - -define(gvar_tab, emqx_ds_builtin_gvar). -record(gvar, { @@ -51,13 +38,16 @@ %% API functions %%================================================================================ +-spec start_top() -> {ok, pid()}. +start_top() -> + supervisor:start_link({local, ?top}, ?MODULE, ?top). + -spec start_db(emqx_ds:db(), emqx_ds_replication_layer:builtin_db_opts()) -> supervisor:startchild_ret(). start_db(DB, Opts) -> - ensure_top(), ChildSpec = #{ id => DB, - start => {emqx_ds_builtin_db_sup, start_db, [DB, Opts]}, + start => {emqx_ds_builtin_raft_db_sup, start_db, [DB, Opts]}, type => supervisor, shutdown => infinity }, @@ -109,7 +99,6 @@ clean_gvars(DB) -> %% Chidren are attached dynamically to this one. init(?top) -> %% Children: - MetricsWorker = emqx_ds_builtin_metrics:child_spec(), MetadataServer = #{ id => metadata_server, start => {emqx_ds_replication_layer_meta, start_link, []}, @@ -132,7 +121,7 @@ init(?top) -> period => 1, auto_shutdown => never }, - {ok, {SupFlags, [MetricsWorker, MetadataServer, DBsSup]}}; + {ok, {SupFlags, [MetadataServer, DBsSup]}}; init(?databases) -> %% Children are added dynamically: SupFlags = #{ @@ -146,17 +135,9 @@ init(?databases) -> %% Internal exports %%================================================================================ --spec start_top() -> {ok, pid()}. -start_top() -> - supervisor:start_link({local, ?top}, ?MODULE, ?top). - start_databases_sup() -> supervisor:start_link({local, ?databases}, ?MODULE, ?databases). %%================================================================================ %% Internal functions %%================================================================================ - -ensure_top() -> - {ok, _} = emqx_ds_sup:attach_backend(builtin, {?MODULE, start_top, []}), - ok. diff --git a/apps/emqx_durable_storage/src/emqx_ds_replication_layer.erl b/apps/emqx_ds_builtin_raft/src/emqx_ds_replication_layer.erl similarity index 95% rename from apps/emqx_durable_storage/src/emqx_ds_replication_layer.erl rename to apps/emqx_ds_builtin_raft/src/emqx_ds_replication_layer.erl index 836e9df07..0a1173e70 100644 --- a/apps/emqx_durable_storage/src/emqx_ds_replication_layer.erl +++ b/apps/emqx_ds_builtin_raft/src/emqx_ds_replication_layer.erl @@ -1,28 +1,18 @@ %%-------------------------------------------------------------------- %% Copyright (c) 2023-2024 EMQ Technologies Co., Ltd. All Rights Reserved. -%% -%% Licensed under the Apache License, Version 2.0 (the "License"); -%% you may not use this file except in compliance with the License. -%% You may obtain a copy of the License at -%% -%% http://www.apache.org/licenses/LICENSE-2.0 -%% -%% Unless required by applicable law or agreed to in writing, software -%% distributed under the License is distributed on an "AS IS" BASIS, -%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -%% See the License for the specific language governing permissions and -%% limitations under the License. %%-------------------------------------------------------------------- %% @doc Replication layer for DS backends that don't support %% replication on their own. -module(emqx_ds_replication_layer). --behaviour(emqx_ds). +%-behaviour(emqx_ds). +-behaviour(emqx_ds_buffer). -export([ list_shards/1, open_db/2, + close_db/1, add_generation/1, update_db_config/2, list_generations_with_lifetimes/1, @@ -36,8 +26,12 @@ update_iterator/3, next/3, delete_next/4, - shard_of_message/3, - current_timestamp/2 + + current_timestamp/2, + + shard_of_message/4, + flush_buffer/4, + init_buffer/3 ]). %% internal exports: @@ -79,7 +73,6 @@ delete_stream/0, iterator/0, delete_iterator/0, - message_id/0, batch/0 ]). @@ -139,8 +132,6 @@ ?enc := emqx_ds_storage_layer:delete_iterator() }. --type message_id() :: emqx_ds:message_id(). - %% TODO: this type is obsolete and is kept only for compatibility with %% BPAPIs. Remove it when emqx_ds_proto_v4 is gone (EMQX 5.6) -type batch() :: #{ @@ -176,7 +167,7 @@ list_shards(DB) -> -spec open_db(emqx_ds:db(), builtin_db_opts()) -> ok | {error, _}. open_db(DB, CreateOpts) -> - case emqx_ds_builtin_sup:start_db(DB, CreateOpts) of + case emqx_ds_builtin_raft_sup:start_db(DB, CreateOpts) of {ok, _} -> ok; {error, {already_started, _}} -> @@ -185,6 +176,10 @@ open_db(DB, CreateOpts) -> {error, Err} end. +-spec close_db(emqx_ds:db()) -> ok. +close_db(DB) -> + emqx_ds_builtin_raft_sup:stop_db(DB). + -spec add_generation(emqx_ds:db()) -> ok | {error, _}. add_generation(DB) -> foreach_shard( @@ -241,7 +236,7 @@ drop_db(DB) -> emqx_ds:store_batch_result(). store_batch(DB, Messages, Opts) -> try - emqx_ds_replication_layer_egress:store_batch(DB, Messages, Opts) + emqx_ds_buffer:store_batch(DB, Messages, Opts) catch error:{Reason, _Call} when Reason == timeout; Reason == noproc -> {error, recoverable, Reason} @@ -357,17 +352,6 @@ delete_next(DB, Iter0, Selector, BatchSize) -> Other end. --spec shard_of_message(emqx_ds:db(), emqx_types:message(), clientid | topic) -> - emqx_ds_replication_layer:shard_id(). -shard_of_message(DB, #message{from = From, topic = Topic}, SerializeBy) -> - N = emqx_ds_replication_shard_allocator:n_shards(DB), - Hash = - case SerializeBy of - clientid -> erlang:phash2(From, N); - topic -> erlang:phash2(Topic, N) - end, - integer_to_binary(Hash). - -spec foreach_shard(emqx_ds:db(), fun((shard_id()) -> _)) -> ok. foreach_shard(DB, Fun) -> lists:foreach(Fun, list_shards(DB)). @@ -376,12 +360,41 @@ foreach_shard(DB, Fun) -> %% local server -spec current_timestamp(emqx_ds:db(), emqx_ds_replication_layer:shard_id()) -> emqx_ds:time(). current_timestamp(DB, Shard) -> - emqx_ds_builtin_sup:get_gvar(DB, ?gv_timestamp(Shard), 0). + emqx_ds_builtin_raft_sup:get_gvar(DB, ?gv_timestamp(Shard), 0). %%================================================================================ -%% behavior callbacks +%% emqx_ds_buffer callbacks %%================================================================================ +-record(bs, {}). +-type egress_state() :: #bs{}. + +-spec init_buffer(emqx_ds:db(), shard_id(), _Options) -> {ok, egress_state()}. +init_buffer(_DB, _Shard, _Options) -> + {ok, #bs{}}. + +-spec flush_buffer(emqx_ds:db(), shard_id(), [emqx_types:message()], egress_state()) -> + {egress_state(), ok | {error, recoverable | unrecoverable, _}}. +flush_buffer(DB, Shard, Messages, State) -> + case ra_store_batch(DB, Shard, Messages) of + {timeout, ServerId} -> + Result = {error, recoverable, {timeout, ServerId}}; + Result -> + ok + end, + {State, Result}. + +-spec shard_of_message(emqx_ds:db(), emqx_types:message(), clientid | topic, _Options) -> + emqx_ds_replication_layer:shard_id(). +shard_of_message(DB, #message{from = From, topic = Topic}, SerializeBy, _Options) -> + N = emqx_ds_replication_shard_allocator:n_shards(DB), + Hash = + case SerializeBy of + clientid -> erlang:phash2(From, N); + topic -> erlang:phash2(Topic, N) + end, + integer_to_binary(Hash). + %%================================================================================ %% Internal exports (RPC targets) %%================================================================================ @@ -402,7 +415,7 @@ current_timestamp(DB, Shard) -> -spec do_drop_db_v1(emqx_ds:db()) -> ok | {error, _}. do_drop_db_v1(DB) -> MyShards = emqx_ds_replication_layer_meta:my_shards(DB), - emqx_ds_builtin_sup:stop_db(DB), + emqx_ds_builtin_raft_sup:stop_db(DB), lists:foreach( fun(Shard) -> emqx_ds_storage_layer:drop_shard({DB, Shard}) @@ -874,4 +887,4 @@ handle_custom_event(DBShard, Latest, Event) -> end. set_ts({DB, Shard}, TS) -> - emqx_ds_builtin_sup:set_gvar(DB, ?gv_timestamp(Shard), TS). + emqx_ds_builtin_raft_sup:set_gvar(DB, ?gv_timestamp(Shard), TS). diff --git a/apps/emqx_durable_storage/src/emqx_ds_replication_layer.hrl b/apps/emqx_ds_builtin_raft/src/emqx_ds_replication_layer.hrl similarity index 59% rename from apps/emqx_durable_storage/src/emqx_ds_replication_layer.hrl rename to apps/emqx_ds_builtin_raft/src/emqx_ds_replication_layer.hrl index 4472b5a47..f33090c46 100644 --- a/apps/emqx_durable_storage/src/emqx_ds_replication_layer.hrl +++ b/apps/emqx_ds_builtin_raft/src/emqx_ds_replication_layer.hrl @@ -1,17 +1,5 @@ %%-------------------------------------------------------------------- %% Copyright (c) 2022, 2024 EMQ Technologies Co., Ltd. All Rights Reserved. -%% -%% Licensed under the Apache License, Version 2.0 (the "License"); -%% you may not use this file except in compliance with the License. -%% You may obtain a copy of the License at -%% -%% http://www.apache.org/licenses/LICENSE-2.0 -%% -%% Unless required by applicable law or agreed to in writing, software -%% distributed under the License is distributed on an "AS IS" BASIS, -%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -%% See the License for the specific language governing permissions and -%% limitations under the License. %%-------------------------------------------------------------------- -ifndef(EMQX_DS_REPLICATION_LAYER_HRL). -define(EMQX_DS_REPLICATION_LAYER_HRL, true). diff --git a/apps/emqx_durable_storage/src/emqx_ds_replication_layer_meta.erl b/apps/emqx_ds_builtin_raft/src/emqx_ds_replication_layer_meta.erl similarity index 97% rename from apps/emqx_durable_storage/src/emqx_ds_replication_layer_meta.erl rename to apps/emqx_ds_builtin_raft/src/emqx_ds_replication_layer_meta.erl index 09e24e23f..2348d7c2d 100644 --- a/apps/emqx_durable_storage/src/emqx_ds_replication_layer_meta.erl +++ b/apps/emqx_ds_builtin_raft/src/emqx_ds_replication_layer_meta.erl @@ -1,17 +1,5 @@ %%-------------------------------------------------------------------- %% Copyright (c) 2023-2024 EMQ Technologies Co., Ltd. All Rights Reserved. -%% -%% Licensed under the Apache License, Version 2.0 (the "License"); -%% you may not use this file except in compliance with the License. -%% You may obtain a copy of the License at -%% -%% http://www.apache.org/licenses/LICENSE-2.0 -%% -%% Unless required by applicable law or agreed to in writing, software -%% distributed under the License is distributed on an "AS IS" BASIS, -%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -%% See the License for the specific language governing permissions and -%% limitations under the License. %%-------------------------------------------------------------------- %% @doc Metadata storage for the builtin sharded database. @@ -678,7 +666,7 @@ ensure_tables() -> ok = mria:wait_for_tables([?META_TAB, ?NODE_TAB, ?SHARD_TAB]). ensure_site() -> - Filename = filename:join(emqx_ds:base_dir(), "emqx_ds_builtin_site.eterm"), + Filename = filename:join(emqx_ds_storage_layer:base_dir(), "emqx_ds_builtin_site.eterm"), case file:consult(Filename) of {ok, [Site]} -> ok; diff --git a/apps/emqx_durable_storage/src/emqx_ds_replication_layer_shard.erl b/apps/emqx_ds_builtin_raft/src/emqx_ds_replication_layer_shard.erl similarity index 96% rename from apps/emqx_durable_storage/src/emqx_ds_replication_layer_shard.erl rename to apps/emqx_ds_builtin_raft/src/emqx_ds_replication_layer_shard.erl index 1070fbde0..b43373c43 100644 --- a/apps/emqx_durable_storage/src/emqx_ds_replication_layer_shard.erl +++ b/apps/emqx_ds_builtin_raft/src/emqx_ds_replication_layer_shard.erl @@ -1,17 +1,5 @@ %%-------------------------------------------------------------------- %% Copyright (c) 2024 EMQ Technologies Co., Ltd. All Rights Reserved. -%% -%% Licensed under the Apache License, Version 2.0 (the "License"); -%% you may not use this file except in compliance with the License. -%% You may obtain a copy of the License at -%% -%% http://www.apache.org/licenses/LICENSE-2.0 -%% -%% Unless required by applicable law or agreed to in writing, software -%% distributed under the License is distributed on an "AS IS" BASIS, -%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -%% See the License for the specific language governing permissions and -%% limitations under the License. %%-------------------------------------------------------------------- -module(emqx_ds_replication_layer_shard). diff --git a/apps/emqx_durable_storage/src/emqx_ds_replication_shard_allocator.erl b/apps/emqx_ds_builtin_raft/src/emqx_ds_replication_shard_allocator.erl similarity index 95% rename from apps/emqx_durable_storage/src/emqx_ds_replication_shard_allocator.erl rename to apps/emqx_ds_builtin_raft/src/emqx_ds_replication_shard_allocator.erl index cbaafc718..fa6814572 100644 --- a/apps/emqx_durable_storage/src/emqx_ds_replication_shard_allocator.erl +++ b/apps/emqx_ds_builtin_raft/src/emqx_ds_replication_shard_allocator.erl @@ -1,17 +1,5 @@ %%-------------------------------------------------------------------- %% Copyright (c) 2024 EMQ Technologies Co., Ltd. All Rights Reserved. -%% -%% Licensed under the Apache License, Version 2.0 (the "License"); -%% you may not use this file except in compliance with the License. -%% You may obtain a copy of the License at -%% -%% http://www.apache.org/licenses/LICENSE-2.0 -%% -%% Unless required by applicable law or agreed to in writing, software -%% distributed under the License is distributed on an "AS IS" BASIS, -%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -%% See the License for the specific language governing permissions and -%% limitations under the License. %%-------------------------------------------------------------------- -module(emqx_ds_replication_shard_allocator). @@ -297,7 +285,7 @@ trans_drop_local(DB, Shard, {del, Site}) -> do_drop_local(DB, Shard) -> case emqx_ds_replication_layer_shard:drop_local_server(DB, Shard) of ok -> - ok = emqx_ds_builtin_db_sup:stop_shard({DB, Shard}), + ok = emqx_ds_builtin_raft_db_sup:stop_shard({DB, Shard}), ok = emqx_ds_storage_layer:drop_shard({DB, Shard}), logger:info(#{msg => "Local shard replica dropped"}); {error, recoverable, Reason} -> @@ -428,7 +416,7 @@ start_shards(DB, Shards) -> lists:foreach(fun(Shard) -> start_shard(DB, Shard) end, Shards). start_shard(DB, Shard) -> - ok = emqx_ds_builtin_db_sup:ensure_shard({DB, Shard}), + ok = emqx_ds_builtin_raft_db_sup:ensure_shard({DB, Shard}), ok = logger:info(#{msg => "Shard started", shard => Shard}), ok. @@ -436,7 +424,7 @@ start_egresses(DB, Shards) -> lists:foreach(fun(Shard) -> start_egress(DB, Shard) end, Shards). start_egress(DB, Shard) -> - ok = emqx_ds_builtin_db_sup:ensure_egress({DB, Shard}), + ok = emqx_ds_builtin_raft_db_sup:ensure_egress({DB, Shard}), ok = logger:info(#{msg => "Egress started", shard => Shard}), ok. diff --git a/apps/emqx_durable_storage/src/emqx_ds_replication_snapshot.erl b/apps/emqx_ds_builtin_raft/src/emqx_ds_replication_snapshot.erl similarity index 98% rename from apps/emqx_durable_storage/src/emqx_ds_replication_snapshot.erl rename to apps/emqx_ds_builtin_raft/src/emqx_ds_replication_snapshot.erl index c90c71688..9267aee77 100644 --- a/apps/emqx_durable_storage/src/emqx_ds_replication_snapshot.erl +++ b/apps/emqx_ds_builtin_raft/src/emqx_ds_replication_snapshot.erl @@ -195,7 +195,7 @@ start_snapshot_writer(WS) -> msg => "dsrepl_snapshot_write_started", shard => ShardId }), - _ = emqx_ds_builtin_db_sup:terminate_storage(ShardId), + _ = emqx_ds_builtin_raft_db_sup:terminate_storage(ShardId), {ok, SnapWriter} = emqx_ds_storage_layer:accept_snapshot(ShardId), {ok, WS#ws{phase = storage_snapshot, writer = SnapWriter}}. @@ -223,7 +223,7 @@ complete_accept(WS = #ws{started_at = StartedAt, writer = SnapWriter}) -> duration_ms => erlang:monotonic_time(millisecond) - StartedAt, bytes_written => emqx_ds_storage_snapshot:writer_info(bytes_written, SnapWriter) }), - {ok, _} = emqx_ds_builtin_db_sup:restart_storage(ShardId), + {ok, _} = emqx_ds_builtin_raft_db_sup:restart_storage(ShardId), write_machine_snapshot(WS). write_machine_snapshot(#ws{dir = Dir, meta = Meta, state = MachineState}) -> diff --git a/apps/emqx_durable_storage/src/proto/emqx_ds_proto_v1.erl b/apps/emqx_ds_builtin_raft/src/proto/emqx_ds_proto_v1.erl similarity index 82% rename from apps/emqx_durable_storage/src/proto/emqx_ds_proto_v1.erl rename to apps/emqx_ds_builtin_raft/src/proto/emqx_ds_proto_v1.erl index 77d5693d5..d2c4e1c0a 100644 --- a/apps/emqx_durable_storage/src/proto/emqx_ds_proto_v1.erl +++ b/apps/emqx_ds_builtin_raft/src/proto/emqx_ds_proto_v1.erl @@ -1,17 +1,5 @@ %%-------------------------------------------------------------------- %% Copyright (c) 2023-2024 EMQ Technologies Co., Ltd. All Rights Reserved. -%% -%% Licensed under the Apache License, Version 2.0 (the "License"); -%% you may not use this file except in compliance with the License. -%% You may obtain a copy of the License at -%% -%% http://www.apache.org/licenses/LICENSE-2.0 -%% -%% Unless required by applicable law or agreed to in writing, software -%% distributed under the License is distributed on an "AS IS" BASIS, -%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -%% See the License for the specific language governing permissions and -%% limitations under the License. %%-------------------------------------------------------------------- -module(emqx_ds_proto_v1). diff --git a/apps/emqx_durable_storage/src/proto/emqx_ds_proto_v2.erl b/apps/emqx_ds_builtin_raft/src/proto/emqx_ds_proto_v2.erl similarity index 86% rename from apps/emqx_durable_storage/src/proto/emqx_ds_proto_v2.erl rename to apps/emqx_ds_builtin_raft/src/proto/emqx_ds_proto_v2.erl index 836bfbc86..259e9cb4e 100644 --- a/apps/emqx_durable_storage/src/proto/emqx_ds_proto_v2.erl +++ b/apps/emqx_ds_builtin_raft/src/proto/emqx_ds_proto_v2.erl @@ -1,17 +1,5 @@ %%-------------------------------------------------------------------- %% Copyright (c) 2023-2024 EMQ Technologies Co., Ltd. All Rights Reserved. -%% -%% Licensed under the Apache License, Version 2.0 (the "License"); -%% you may not use this file except in compliance with the License. -%% You may obtain a copy of the License at -%% -%% http://www.apache.org/licenses/LICENSE-2.0 -%% -%% Unless required by applicable law or agreed to in writing, software -%% distributed under the License is distributed on an "AS IS" BASIS, -%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -%% See the License for the specific language governing permissions and -%% limitations under the License. %%-------------------------------------------------------------------- -module(emqx_ds_proto_v2). diff --git a/apps/emqx_durable_storage/src/proto/emqx_ds_proto_v3.erl b/apps/emqx_ds_builtin_raft/src/proto/emqx_ds_proto_v3.erl similarity index 88% rename from apps/emqx_durable_storage/src/proto/emqx_ds_proto_v3.erl rename to apps/emqx_ds_builtin_raft/src/proto/emqx_ds_proto_v3.erl index f55ef9fb2..4c49906da 100644 --- a/apps/emqx_durable_storage/src/proto/emqx_ds_proto_v3.erl +++ b/apps/emqx_ds_builtin_raft/src/proto/emqx_ds_proto_v3.erl @@ -1,17 +1,5 @@ %%-------------------------------------------------------------------- %% Copyright (c) 2024 EMQ Technologies Co., Ltd. All Rights Reserved. -%% -%% Licensed under the Apache License, Version 2.0 (the "License"); -%% you may not use this file except in compliance with the License. -%% You may obtain a copy of the License at -%% -%% http://www.apache.org/licenses/LICENSE-2.0 -%% -%% Unless required by applicable law or agreed to in writing, software -%% distributed under the License is distributed on an "AS IS" BASIS, -%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -%% See the License for the specific language governing permissions and -%% limitations under the License. %%-------------------------------------------------------------------- -module(emqx_ds_proto_v3). diff --git a/apps/emqx_durable_storage/src/proto/emqx_ds_proto_v4.erl b/apps/emqx_ds_builtin_raft/src/proto/emqx_ds_proto_v4.erl similarity index 90% rename from apps/emqx_durable_storage/src/proto/emqx_ds_proto_v4.erl rename to apps/emqx_ds_builtin_raft/src/proto/emqx_ds_proto_v4.erl index 73285247f..9f66f2c32 100644 --- a/apps/emqx_durable_storage/src/proto/emqx_ds_proto_v4.erl +++ b/apps/emqx_ds_builtin_raft/src/proto/emqx_ds_proto_v4.erl @@ -1,17 +1,5 @@ %%-------------------------------------------------------------------- %% Copyright (c) 2024 EMQ Technologies Co., Ltd. All Rights Reserved. -%% -%% Licensed under the Apache License, Version 2.0 (the "License"); -%% you may not use this file except in compliance with the License. -%% You may obtain a copy of the License at -%% -%% http://www.apache.org/licenses/LICENSE-2.0 -%% -%% Unless required by applicable law or agreed to in writing, software -%% distributed under the License is distributed on an "AS IS" BASIS, -%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -%% See the License for the specific language governing permissions and -%% limitations under the License. %%-------------------------------------------------------------------- -module(emqx_ds_proto_v4). diff --git a/apps/emqx_durable_storage/test/emqx_ds_replication_SUITE.erl b/apps/emqx_ds_builtin_raft/test/emqx_ds_replication_SUITE.erl similarity index 66% rename from apps/emqx_durable_storage/test/emqx_ds_replication_SUITE.erl rename to apps/emqx_ds_builtin_raft/test/emqx_ds_replication_SUITE.erl index 1b2a21105..abe154807 100644 --- a/apps/emqx_durable_storage/test/emqx_ds_replication_SUITE.erl +++ b/apps/emqx_ds_builtin_raft/test/emqx_ds_replication_SUITE.erl @@ -35,7 +35,7 @@ opts() -> opts(Overrides) -> maps:merge( #{ - backend => builtin, + backend => builtin_raft, %% storage => {emqx_ds_storage_reference, #{}}, storage => {emqx_ds_storage_bitfield_lts, #{epoch_bits => 10}}, n_shards => 16, @@ -56,8 +56,52 @@ appspec(emqx_durable_storage) -> override_env => [{egress_flush_interval, 1}] }}. +t_metadata(init, Config) -> + Apps = emqx_cth_suite:start([emqx_ds_builtin_raft], #{ + work_dir => emqx_cth_suite:work_dir(?FUNCTION_NAME, Config) + }), + [{apps, Apps} | Config]; +t_metadata('end', Config) -> + emqx_cth_suite:stop(?config(apps, Config)), + Config. + +t_metadata(_Config) -> + DB = ?FUNCTION_NAME, + NShards = 1, + Options = #{ + backend => builtin_raft, + storage => {emqx_ds_storage_reference, #{}}, + n_shards => NShards, + n_sites => 1, + replication_factor => 1, + replication_options => #{} + }, + try + ?assertMatch(ok, emqx_ds:open_db(DB, Options)), + %% Check metadata: + %% We have only one site: + [Site] = emqx_ds_replication_layer_meta:sites(), + %% Check all shards: + Shards = emqx_ds_replication_layer_meta:shards(DB), + %% Since there is only one site all shards should be allocated + %% to this site: + MyShards = emqx_ds_replication_layer_meta:my_shards(DB), + ?assertEqual(NShards, length(Shards)), + lists:foreach( + fun(Shard) -> + ?assertEqual( + [Site], emqx_ds_replication_layer_meta:replica_set(DB, Shard) + ) + end, + Shards + ), + ?assertEqual(lists:sort(Shards), lists:sort(MyShards)) + after + ?assertMatch(ok, emqx_ds:drop_db(DB)) + end. + t_replication_transfers_snapshots(init, Config) -> - Apps = [appspec(emqx_durable_storage)], + Apps = [appspec(emqx_durable_storage), emqx_ds_builtin_raft], NodeSpecs = emqx_cth_cluster:mk_nodespecs( [ {t_replication_transfers_snapshots1, #{apps => Apps}}, @@ -130,7 +174,7 @@ t_replication_transfers_snapshots(Config) -> ). t_rebalance(init, Config) -> - Apps = [appspec(emqx_durable_storage)], + Apps = [appspec(emqx_durable_storage), emqx_ds_builtin_raft], Nodes = emqx_cth_cluster:start( [ {t_rebalance1, #{apps => Apps}}, @@ -159,18 +203,23 @@ t_rebalance(Config) -> ?check_trace( #{timetrap => 30_000}, begin + Sites = [S1, S2 | _] = [ds_repl_meta(N, this_site) || N <- Nodes], %% 1. Initialize DB on the first node. Opts = opts(#{n_shards => 16, n_sites => 1, replication_factor => 3}), - ?assertEqual(ok, ?ON(N1, emqx_ds:open_db(?DB, Opts))), - ?assertMatch(Shards when length(Shards) == 16, shards_online(N1, ?DB)), - - %% 1.1 Open DB on the rest of the nodes: [ ?assertEqual(ok, ?ON(Node, emqx_ds:open_db(?DB, Opts))) || Node <- Nodes ], - Sites = [S1, S2 | _] = [ds_repl_meta(N, this_site) || N <- Nodes], + %% 1.1 Kick all sites except S1 from the replica set as + %% the initial condition: + ?assertMatch( + {ok, [_]}, + ?ON(N1, emqx_ds_replication_layer_meta:assign_db_sites(?DB, [S1])) + ), + ?retry(1000, 10, ?assertEqual([], emqx_ds_test_helpers:transitions(N1, ?DB))), + ?retry(500, 10, ?assertMatch(Shards when length(Shards) == 16, shards_online(N1, ?DB))), + ct:pal("Sites: ~p~n", [Sites]), Sequence = [ @@ -260,7 +309,7 @@ t_rebalance(Config) -> ). t_join_leave_errors(init, Config) -> - Apps = [appspec(emqx_durable_storage)], + Apps = [appspec(emqx_durable_storage), emqx_ds_builtin_raft], Nodes = emqx_cth_cluster:start( [ {t_join_leave_errors1, #{apps => Apps}}, @@ -275,16 +324,15 @@ t_join_leave_errors('end', Config) -> t_join_leave_errors(Config) -> %% This testcase verifies that logical errors arising during handling of %% join/leave operations are reported correctly. - [N1, N2] = ?config(nodes, Config), Opts = opts(#{n_shards => 16, n_sites => 1, replication_factor => 3}), - ?assertEqual(ok, erpc:call(N1, emqx_ds, open_db, [?DB, Opts])), - ?assertEqual(ok, erpc:call(N2, emqx_ds, open_db, [?DB, Opts])), + ?assertEqual(ok, erpc:call(N1, emqx_ds, open_db, [?FUNCTION_NAME, Opts])), + ?assertEqual(ok, erpc:call(N2, emqx_ds, open_db, [?FUNCTION_NAME, Opts])), [S1, S2] = [ds_repl_meta(N, this_site) || N <- [N1, N2]], - ?assertEqual([S1], ds_repl_meta(N1, db_sites, [?DB])), + ?assertEqual(lists:sort([S1, S2]), lists:sort(ds_repl_meta(N1, db_sites, [?FUNCTION_NAME]))), %% Attempts to join a nonexistent DB / site. ?assertEqual( @@ -293,36 +341,43 @@ t_join_leave_errors(Config) -> ), ?assertEqual( {error, {nonexistent_sites, [<<"NO-MANS-SITE">>]}}, - ds_repl_meta(N1, join_db_site, [?DB, <<"NO-MANS-SITE">>]) + ds_repl_meta(N1, join_db_site, [?FUNCTION_NAME, <<"NO-MANS-SITE">>]) ), %% NOTE: Leaving a non-existent site is not an error. ?assertEqual( {ok, unchanged}, - ds_repl_meta(N1, leave_db_site, [?DB, <<"NO-MANS-SITE">>]) + ds_repl_meta(N1, leave_db_site, [?FUNCTION_NAME, <<"NO-MANS-SITE">>]) ), %% Should be no-op. - ?assertEqual({ok, unchanged}, ds_repl_meta(N1, join_db_site, [?DB, S1])), - ?assertEqual([], emqx_ds_test_helpers:transitions(N1, ?DB)), + ?assertEqual({ok, unchanged}, ds_repl_meta(N1, join_db_site, [?FUNCTION_NAME, S1])), + ?assertEqual([], emqx_ds_test_helpers:transitions(N1, ?FUNCTION_NAME)), - %% Impossible to leave the last site. + %% Leave S2: + ?assertEqual( + {ok, [S1]}, + ds_repl_meta(N1, leave_db_site, [?FUNCTION_NAME, S2]) + ), + %% Impossible to leave the last site: ?assertEqual( {error, {too_few_sites, []}}, - ds_repl_meta(N1, leave_db_site, [?DB, S1]) + ds_repl_meta(N1, leave_db_site, [?FUNCTION_NAME, S1]) ), %% "Move" the DB to the other node. - ?assertMatch({ok, _}, ds_repl_meta(N1, join_db_site, [?DB, S2])), - ?assertMatch({ok, _}, ds_repl_meta(N2, leave_db_site, [?DB, S1])), - ?assertMatch([_ | _], emqx_ds_test_helpers:transitions(N1, ?DB)), - ?retry(1000, 10, ?assertEqual([], emqx_ds_test_helpers:transitions(N1, ?DB))), + ?assertMatch({ok, _}, ds_repl_meta(N1, join_db_site, [?FUNCTION_NAME, S2])), + ?assertMatch({ok, _}, ds_repl_meta(N2, leave_db_site, [?FUNCTION_NAME, S1])), + ?assertMatch([_ | _], emqx_ds_test_helpers:transitions(N1, ?FUNCTION_NAME)), + ?retry( + 1000, 10, ?assertEqual([], emqx_ds_test_helpers:transitions(N1, ?FUNCTION_NAME)) + ), %% Should be no-op. - ?assertMatch({ok, _}, ds_repl_meta(N2, leave_db_site, [?DB, S1])), - ?assertEqual([], emqx_ds_test_helpers:transitions(N1, ?DB)). + ?assertMatch({ok, _}, ds_repl_meta(N2, leave_db_site, [?FUNCTION_NAME, S1])), + ?assertEqual([], emqx_ds_test_helpers:transitions(N1, ?FUNCTION_NAME)). t_rebalance_chaotic_converges(init, Config) -> - Apps = [appspec(emqx_durable_storage)], + Apps = [appspec(emqx_durable_storage), emqx_ds_builtin_raft], Nodes = emqx_cth_cluster:start( [ {t_rebalance_chaotic_converges1, #{apps => Apps}}, @@ -351,23 +406,24 @@ t_rebalance_chaotic_converges(Config) -> ?check_trace( #{}, begin + Sites = [S1, S2, S3] = [ds_repl_meta(N, this_site) || N <- Nodes], + ct:pal("Sites: ~p~n", [Sites]), + %% Initialize DB on first two nodes. Opts = opts(#{n_shards => 16, n_sites => 2, replication_factor => 3}), + %% Open DB: ?assertEqual( - [{ok, ok}, {ok, ok}], - erpc:multicall([N1, N2], emqx_ds, open_db, [?DB, Opts]) + [{ok, ok}, {ok, ok}, {ok, ok}], + erpc:multicall([N1, N2, N3], emqx_ds, open_db, [?DB, Opts]) ), - %% Open DB on the last node. - ?assertEqual( - ok, - erpc:call(N3, emqx_ds, open_db, [?DB, Opts]) + %% Kick N3 from the replica set as the initial condition: + ?assertMatch( + {ok, [_, _]}, + ?ON(N1, emqx_ds_replication_layer_meta:assign_db_sites(?DB, [S1, S2])) ), - - %% Find out which sites there are. - Sites = [S1, S2, S3] = [ds_repl_meta(N, this_site) || N <- Nodes], - ct:pal("Sites: ~p~n", [Sites]), + ?retry(1000, 10, ?assertEqual([], emqx_ds_test_helpers:transitions(N1, ?DB))), Sequence = [ {N1, join_db_site, S3}, @@ -418,7 +474,7 @@ t_rebalance_chaotic_converges(Config) -> ). t_rebalance_offline_restarts(init, Config) -> - Apps = [appspec(emqx_durable_storage)], + Apps = [appspec(emqx_durable_storage), emqx_ds_builtin_raft], Specs = emqx_cth_cluster:mk_nodespecs( [ {t_rebalance_offline_restarts1, #{apps => Apps}}, @@ -435,6 +491,7 @@ t_rebalance_offline_restarts('end', Config) -> t_rebalance_offline_restarts(Config) -> %% This testcase verifies that rebalancing progresses if nodes restart or %% go offline and never come back. + ok = snabbkaffe:start_trace(), Nodes = [N1, N2, N3] = ?config(nodes, Config), _Specs = [NS1, NS2, _] = ?config(nodespecs, Config), @@ -477,7 +534,7 @@ t_rebalance_offline_restarts(Config) -> ?assertEqual(lists:sort([S1, S2]), ds_repl_meta(N1, db_sites, [?DB])). t_drop_generation(Config) -> - Apps = [appspec(emqx_durable_storage)], + Apps = [appspec(emqx_durable_storage), emqx_ds_builtin_raft], [_, _, NS3] = NodeSpecs = emqx_cth_cluster:mk_nodespecs( [ @@ -554,6 +611,189 @@ t_drop_generation(Config) -> end ). +t_error_mapping_replication_layer(init, Config) -> + Apps = emqx_cth_suite:start([emqx_ds_builtin_raft], #{ + work_dir => emqx_cth_suite:work_dir(?FUNCTION_NAME, Config) + }), + [{apps, Apps} | Config]; +t_error_mapping_replication_layer('end', Config) -> + emqx_cth_suite:stop(?config(apps, Config)), + Config. + +t_error_mapping_replication_layer(_Config) -> + %% This checks that the replication layer maps recoverable errors correctly. + + ok = emqx_ds_test_helpers:mock_rpc(), + ok = snabbkaffe:start_trace(), + + DB = ?FUNCTION_NAME, + ?assertMatch(ok, emqx_ds:open_db(DB, (opts())#{n_shards => 2})), + [Shard1, Shard2] = emqx_ds_replication_layer_meta:shards(DB), + + TopicFilter = emqx_topic:words(<<"foo/#">>), + Msgs = [ + message(<<"C1">>, <<"foo/bar">>, <<"1">>, 0), + message(<<"C1">>, <<"foo/baz">>, <<"2">>, 1), + message(<<"C2">>, <<"foo/foo">>, <<"3">>, 2), + message(<<"C3">>, <<"foo/xyz">>, <<"4">>, 3), + message(<<"C4">>, <<"foo/bar">>, <<"5">>, 4), + message(<<"C5">>, <<"foo/oof">>, <<"6">>, 5) + ], + + ?assertMatch(ok, emqx_ds:store_batch(DB, Msgs)), + + ?block_until(#{?snk_kind := emqx_ds_buffer_flush, shard := Shard1}), + ?block_until(#{?snk_kind := emqx_ds_buffer_flush, shard := Shard2}), + + Streams0 = emqx_ds:get_streams(DB, TopicFilter, 0), + Iterators0 = lists:map( + fun({_Rank, S}) -> + {ok, Iter} = emqx_ds:make_iterator(DB, S, TopicFilter, 0), + Iter + end, + Streams0 + ), + + %% Disrupt the link to the second shard. + ok = emqx_ds_test_helpers:mock_rpc_result( + fun(_Node, emqx_ds_replication_layer, _Function, Args) -> + case Args of + [DB, Shard1 | _] -> passthrough; + [DB, Shard2 | _] -> unavailable + end + end + ), + + %% Result of `emqx_ds:get_streams/3` will just contain partial results, not an error. + Streams1 = emqx_ds:get_streams(DB, TopicFilter, 0), + ?assert( + length(Streams1) > 0 andalso length(Streams1) =< length(Streams0), + Streams1 + ), + + %% At least one of `emqx_ds:make_iterator/4` will end in an error. + Results1 = lists:map( + fun({_Rank, S}) -> + case emqx_ds:make_iterator(DB, S, TopicFilter, 0) of + Ok = {ok, _Iter} -> + Ok; + Error = {error, recoverable, {erpc, _}} -> + Error; + Other -> + ct:fail({unexpected_result, Other}) + end + end, + Streams0 + ), + ?assert( + length([error || {error, _, _} <- Results1]) > 0, + Results1 + ), + + %% At least one of `emqx_ds:next/3` over initial set of iterators will end in an error. + Results2 = lists:map( + fun(Iter) -> + case emqx_ds:next(DB, Iter, _BatchSize = 42) of + Ok = {ok, _Iter, [_ | _]} -> + Ok; + Error = {error, recoverable, {badrpc, _}} -> + Error; + Other -> + ct:fail({unexpected_result, Other}) + end + end, + Iterators0 + ), + ?assert( + length([error || {error, _, _} <- Results2]) > 0, + Results2 + ), + meck:unload(). + +%% This testcase verifies the behavior of `store_batch' operation +%% when the underlying code experiences recoverable or unrecoverable +%% problems. +t_store_batch_fail(init, Config) -> + Apps = emqx_cth_suite:start([emqx_ds_builtin_raft], #{ + work_dir => emqx_cth_suite:work_dir(?FUNCTION_NAME, Config) + }), + [{apps, Apps} | Config]; +t_store_batch_fail('end', Config) -> + emqx_cth_suite:stop(?config(apps, Config)), + Config. + +t_store_batch_fail(_Config) -> + ?check_trace( + #{timetrap => 15_000}, + try + meck:new(emqx_ds_storage_layer, [passthrough, no_history]), + DB = ?FUNCTION_NAME, + ?assertMatch(ok, emqx_ds:open_db(DB, (opts())#{n_shards => 2})), + %% Success: + Batch1 = [ + message(<<"C1">>, <<"foo/bar">>, <<"1">>, 1), + message(<<"C1">>, <<"foo/bar">>, <<"2">>, 1) + ], + ?assertMatch(ok, emqx_ds:store_batch(DB, Batch1, #{sync => true})), + %% Inject unrecoverable error: + meck:expect(emqx_ds_storage_layer, store_batch, fun(_DB, _Shard, _Messages) -> + {error, unrecoverable, mock} + end), + Batch2 = [ + message(<<"C1">>, <<"foo/bar">>, <<"3">>, 1), + message(<<"C1">>, <<"foo/bar">>, <<"4">>, 1) + ], + ?assertMatch( + {error, unrecoverable, mock}, emqx_ds:store_batch(DB, Batch2, #{sync => true}) + ), + meck:unload(emqx_ds_storage_layer), + %% Inject a recoveralbe error: + meck:new(ra, [passthrough, no_history]), + meck:expect(ra, process_command, fun(Servers, Shard, Command) -> + ?tp(ra_command, #{servers => Servers, shard => Shard, command => Command}), + {timeout, mock} + end), + Batch3 = [ + message(<<"C1">>, <<"foo/bar">>, <<"5">>, 2), + message(<<"C2">>, <<"foo/bar">>, <<"6">>, 2), + message(<<"C1">>, <<"foo/bar">>, <<"7">>, 3), + message(<<"C2">>, <<"foo/bar">>, <<"8">>, 3) + ], + %% Note: due to idempotency issues the number of retries + %% is currently set to 0: + ?assertMatch( + {error, recoverable, {timeout, mock}}, + emqx_ds:store_batch(DB, Batch3, #{sync => true}) + ), + meck:unload(ra), + ?assertMatch(ok, emqx_ds:store_batch(DB, Batch3, #{sync => true})), + lists:sort(emqx_ds_test_helpers:consume_per_stream(DB, ['#'], 1)) + after + meck:unload() + end, + [ + {"message ordering", fun(StoredMessages, _Trace) -> + [{_, Stream1}, {_, Stream2}] = StoredMessages, + ?assertMatch( + [ + #message{payload = <<"1">>}, + #message{payload = <<"2">>}, + #message{payload = <<"5">>}, + #message{payload = <<"7">>} + ], + Stream1 + ), + ?assertMatch( + [ + #message{payload = <<"6">>}, + #message{payload = <<"8">>} + ], + Stream2 + ) + end} + ] + ). + %% shard_server_info(Node, DB, Shard, Site, Info) -> @@ -583,7 +823,7 @@ shards(Node, DB) -> erpc:call(Node, emqx_ds_replication_layer_meta, shards, [DB]). shards_online(Node, DB) -> - erpc:call(Node, emqx_ds_builtin_db_sup, which_shards, [DB]). + erpc:call(Node, emqx_ds_builtin_raft_db_sup, which_shards, [DB]). n_shards_online(Node, DB) -> length(shards_online(Node, DB)). @@ -635,7 +875,6 @@ all() -> emqx_common_test_helpers:all(?MODULE). init_per_testcase(TCName, Config0) -> Config = emqx_common_test_helpers:init_per_testcase(?MODULE, TCName, Config0), - ok = snabbkaffe:start_trace(), Config. end_per_testcase(TCName, Config) -> diff --git a/apps/emqx_ds_shared_sub/test/emqx_ds_shared_sub_SUITE.erl b/apps/emqx_ds_shared_sub/test/emqx_ds_shared_sub_SUITE.erl index bca8eb0eb..f18114918 100644 --- a/apps/emqx_ds_shared_sub/test/emqx_ds_shared_sub_SUITE.erl +++ b/apps/emqx_ds_shared_sub/test/emqx_ds_shared_sub_SUITE.erl @@ -26,7 +26,7 @@ init_per_suite(Config) -> }, <<"durable_storage">> => #{ <<"messages">> => #{ - <<"backend">> => <<"builtin">> + <<"backend">> => <<"builtin_raft">> } } } diff --git a/apps/emqx_durable_storage/README.md b/apps/emqx_durable_storage/README.md index 362ad47a3..739cbdc64 100644 --- a/apps/emqx_durable_storage/README.md +++ b/apps/emqx_durable_storage/README.md @@ -103,7 +103,7 @@ Consumption of messages is done in several stages: # Documentation links -TBD +https://docs.emqx.com/en/enterprise/latest/durability/durability_introduction.html # Usage @@ -146,7 +146,39 @@ The following REST APIs are available for managing the builtin durable storages: - `/ds/storages/:ds/replicas/:site` — add or remove replica of the durable storage on the site # Other -TBD + +Note: this application contains main interface module and some common utility modules used by the backends, but it doesn't contain any ready-to-use DS backends. +The backends are instead implemented as separate OTP applications, such as `emqx_ds_backend_local` and `emqx_ds_backend_raft`. + +There is a helper placeholder application `emqx_ds_backends` that depends on all backend applications available in the release. +Business logic applications must have `emqx_ds_backends` as a dependency. + +The dependency diagram is the following: + +``` + +------------------------+ + | emqx_durable_storage | + +------------------------+ + / | \ + / | \ + / | \ + +------------------------+ +----------------------+ +------+ + | emqx_ds_backend_local | | emqx_ds_builtin_raft | | ... | + +------------------------+ +-----------+----------+ +------+ + \ | / + \ | / + \ | / + +-------------------------+ + | emqx_ds_backends | + +-------------------------+ + / \ + / \ + ......................../.. business apps .\........................ + / \ + +------+ +-------+ + | emqx | | ... | + +------+ +-------+ +``` # Contributing Please see our [contributing.md](../../CONTRIBUTING.md). diff --git a/apps/emqx_durable_storage/include/emqx_ds_metrics.hrl b/apps/emqx_durable_storage/include/emqx_ds_metrics.hrl index 0a82a6682..a76289eb9 100644 --- a/apps/emqx_durable_storage/include/emqx_ds_metrics.hrl +++ b/apps/emqx_durable_storage/include/emqx_ds_metrics.hrl @@ -19,17 +19,17 @@ %%%% Egress metrics: %% Number of successfully flushed batches: --define(DS_EGRESS_BATCHES, emqx_ds_egress_batches). +-define(DS_BUFFER_BATCHES, emqx_ds_buffer_batches). %% Number of batch flush retries: --define(DS_EGRESS_BATCHES_RETRY, emqx_ds_egress_batches_retry). +-define(DS_BUFFER_BATCHES_RETRY, emqx_ds_buffer_batches_retry). %% Number of batches that weren't flushed due to unrecoverable errors: --define(DS_EGRESS_BATCHES_FAILED, emqx_ds_egress_batches_failed). +-define(DS_BUFFER_BATCHES_FAILED, emqx_ds_buffer_batches_failed). %% Total number of messages that were successfully committed to the storage: --define(DS_EGRESS_MESSAGES, emqx_ds_egress_messages). +-define(DS_BUFFER_MESSAGES, emqx_ds_buffer_messages). %% Total size of payloads that were successfully committed to the storage: --define(DS_EGRESS_BYTES, emqx_ds_egress_bytes). +-define(DS_BUFFER_BYTES, emqx_ds_buffer_bytes). %% Sliding average of flush time (microseconds): --define(DS_EGRESS_FLUSH_TIME, emqx_ds_egress_flush_time). +-define(DS_BUFFER_FLUSH_TIME, emqx_ds_buffer_flush_time). %%%% Storage layer metrics: -define(DS_STORE_BATCH_TIME, emqx_ds_store_batch_time). diff --git a/apps/emqx_durable_storage/src/emqx_ds.erl b/apps/emqx_durable_storage/src/emqx_ds.erl index 2b903f5cf..7f6996bd7 100644 --- a/apps/emqx_durable_storage/src/emqx_ds.erl +++ b/apps/emqx_durable_storage/src/emqx_ds.erl @@ -16,15 +16,16 @@ %% @doc Main interface module for `emqx_durable_storage' application. %% -%% It takes care of forwarding calls to the underlying DBMS. Currently -%% only the embedded `emqx_ds_replication_layer' storage is supported, -%% so all the calls are simply passed through. +%% It takes care of forwarding calls to the underlying DBMS. -module(emqx_ds). %% Management API: -export([ - base_dir/0, + register_backend/2, + open_db/2, + close_db/1, + which_dbs/0, update_db_config/2, add_generation/1, list_generations_with_lifetimes/1, @@ -60,7 +61,6 @@ iterator/0, delete_iterator/0, iterator_id/0, - message_id/0, message_key/0, message_store_opts/0, next_result/1, next_result/0, @@ -136,7 +136,7 @@ -type ds_specific_delete_stream() :: term(). --type make_delete_iterator_result(DeleteIterator) :: {ok, DeleteIterator} | {error, term()}. +-type make_delete_iterator_result(DeleteIterator) :: {ok, DeleteIterator} | error(_). -type make_delete_iterator_result() :: make_delete_iterator_result(delete_iterator()). @@ -173,10 +173,7 @@ _ => _ }. --type create_db_opts() :: - emqx_ds_replication_layer:builtin_db_opts() | generic_db_opts(). - --type message_id() :: emqx_ds_replication_layer:message_id(). +-type create_db_opts() :: generic_db_opts(). %% An opaque term identifying a generation. Each implementation will possibly add %% information to this term to match its inner structure (e.g.: by embedding the shard id, @@ -199,6 +196,8 @@ -callback open_db(db(), create_db_opts()) -> ok | {error, _}. +-callback close_db(db()) -> ok. + -callback add_generation(db()) -> ok | {error, _}. -callback update_db_config(db(), create_db_opts()) -> ok | {error, _}. @@ -247,21 +246,32 @@ %% API functions %%================================================================================ --spec base_dir() -> file:filename(). -base_dir() -> - application:get_env(?APP, db_data_dir, emqx:data_dir()). +%% @doc Register DS backend. +-spec register_backend(atom(), module()) -> ok. +register_backend(Name, Module) -> + persistent_term:put({emqx_ds_backend_module, Name}, Module). %% @doc Different DBs are completely independent from each other. They %% could represent something like different tenants. -spec open_db(db(), create_db_opts()) -> ok. -open_db(DB, Opts = #{backend := Backend}) when Backend =:= builtin orelse Backend =:= fdb -> - Module = - case Backend of - builtin -> emqx_ds_replication_layer; - fdb -> emqx_fdb_ds - end, - persistent_term:put(?persistent_term(DB), Module), - ?module(DB):open_db(DB, Opts). +open_db(DB, Opts = #{backend := Backend}) -> + case persistent_term:get({emqx_ds_backend_module, Backend}, undefined) of + undefined -> + error({no_such_backend, Backend}); + Module -> + persistent_term:put(?persistent_term(DB), Module), + emqx_ds_sup:register_db(DB, Backend), + ?module(DB):open_db(DB, Opts) + end. + +-spec close_db(db()) -> ok. +close_db(DB) -> + emqx_ds_sup:unregister_db(DB), + ?module(DB):close_db(DB). + +-spec which_dbs() -> [{db(), _Backend :: atom()}]. +which_dbs() -> + emqx_ds_sup:which_dbs(). -spec add_generation(db()) -> ok. add_generation(DB) -> @@ -286,9 +296,6 @@ drop_generation(DB, GenId) -> {error, not_implemented} end. -%% @doc TODO: currently if one or a few shards are down, they won't be - -%% deleted. -spec drop_db(db()) -> ok. drop_db(DB) -> case persistent_term:get(?persistent_term(DB), undefined) of diff --git a/apps/emqx_durable_storage/src/emqx_ds_replication_layer_egress.erl b/apps/emqx_durable_storage/src/emqx_ds_buffer.erl similarity index 80% rename from apps/emqx_durable_storage/src/emqx_ds_replication_layer_egress.erl rename to apps/emqx_durable_storage/src/emqx_ds_buffer.erl index 1d0efca6f..e93bb33be 100644 --- a/apps/emqx_durable_storage/src/emqx_ds_replication_layer_egress.erl +++ b/apps/emqx_durable_storage/src/emqx_ds_buffer.erl @@ -14,23 +14,15 @@ %% limitations under the License. %%-------------------------------------------------------------------- -%% @doc Egress servers are responsible for proxing the outcoming -%% `store_batch' requests towards EMQX DS shards. -%% -%% They re-assemble messages from different local processes into -%% fixed-sized batches, and introduce centralized channels between the -%% nodes. They are also responsible for maintaining backpressure -%% towards the local publishers. -%% -%% There is (currently) one egress process for each shard running on -%% each node, but it should be possible to have a pool of egress -%% servers, if needed. --module(emqx_ds_replication_layer_egress). +%% @doc Buffer servers are responsible for collecting batches from the +%% local processes, sharding and repackaging them. +-module(emqx_ds_buffer). -behaviour(gen_server). %% API: --export([start_link/2, store_batch/3]). +-export([start_link/4, store_batch/3, shard_of_message/3]). +-export([ls/0]). %% behavior callbacks: -export([init/1, format_status/1, handle_call/3, handle_cast/2, handle_info/2, terminate/2]). @@ -47,9 +39,12 @@ %% Type declarations %%================================================================================ --define(via(DB, Shard), {via, gproc, {n, l, {?MODULE, DB, Shard}}}). +-define(name(DB, SHARD), {n, l, {?MODULE, DB, SHARD}}). +-define(via(DB, SHARD), {via, gproc, ?name(DB, SHARD)}). -define(flush, flush). +-define(cbm(DB), {?MODULE, DB}). + -record(enqueue_req, { messages :: [emqx_types:message()], sync :: boolean(), @@ -58,13 +53,29 @@ payload_bytes :: non_neg_integer() }). +-callback init_buffer(emqx_ds:db(), _Shard, _Options) -> {ok, _State}. + +-callback flush_buffer(emqx_ds:db(), _Shard, [emqx_types:message()], State) -> + {State, ok | {error, recoverable | unrecoverable, _}}. + +-callback shard_of_message(emqx_ds:db(), emqx_types:message(), topic | clientid, _Options) -> + _Shard. + %%================================================================================ %% API functions %%================================================================================ --spec start_link(emqx_ds:db(), emqx_ds_replication_layer:shard_id()) -> {ok, pid()}. -start_link(DB, Shard) -> - gen_server:start_link(?via(DB, Shard), ?MODULE, [DB, Shard], []). +-spec ls() -> [{emqx_ds:db(), _Shard}]. +ls() -> + MS = {{?name('$1', '$2'), '_', '_'}, [], [{{'$1', '$2'}}]}, + gproc:select({local, names}, [MS]). + +-spec start_link(module(), _CallbackOptions, emqx_ds:db(), _ShardId) -> + {ok, pid()}. +start_link(CallbackModule, CallbackOptions, DB, Shard) -> + gen_server:start_link( + ?via(DB, Shard), ?MODULE, [CallbackModule, CallbackOptions, DB, Shard], [] + ). -spec store_batch(emqx_ds:db(), [emqx_types:message()], emqx_ds:message_store_opts()) -> emqx_ds:store_batch_result(). @@ -95,13 +106,20 @@ store_batch(DB, Messages, Opts) -> repackage_messages(DB, Messages, Sync) end. +-spec shard_of_message(emqx_ds:db(), emqx_types:message(), clientid | topic) -> _Shard. +shard_of_message(DB, Message, ShardBy) -> + {CBM, Options} = persistent_term:get(?cbm(DB)), + CBM:shard_of_message(DB, Message, ShardBy, Options). + %%================================================================================ %% behavior callbacks %%================================================================================ -record(s, { + callback_module :: module(), + callback_state :: term(), db :: emqx_ds:db(), - shard :: emqx_ds_replication_layer:shard_id(), + shard :: _ShardId, metrics_id :: emqx_ds_builtin_metrics:shard_metrics_id(), n_retries = 0 :: non_neg_integer(), %% FIXME: Currently max_retries is always 0, because replication @@ -115,18 +133,22 @@ store_batch(DB, Messages, Opts) -> pending_replies = [] :: [gen_server:from()] }). -init([DB, Shard]) -> +init([CBM, CBMOptions, DB, Shard]) -> process_flag(trap_exit, true), process_flag(message_queue_data, off_heap), - logger:update_process_metadata(#{domain => [emqx, ds, egress, DB]}), + logger:update_process_metadata(#{domain => [emqx, ds, buffer, DB]}), MetricsId = emqx_ds_builtin_metrics:shard_metric_id(DB, Shard), ok = emqx_ds_builtin_metrics:init_for_shard(MetricsId), + {ok, CallbackS} = CBM:init_buffer(DB, Shard, CBMOptions), S = #s{ + callback_module = CBM, + callback_state = CallbackS, db = DB, shard = Shard, metrics_id = MetricsId, queue = queue:new() }, + persistent_term:put(?cbm(DB), {CBM, CBMOptions}), {ok, S}. format_status(Status) -> @@ -179,7 +201,8 @@ handle_info(?flush, S) -> handle_info(_Info, S) -> {noreply, S}. -terminate(_Reason, _S) -> +terminate(_Reason, #s{db = DB}) -> + persistent_term:erase(?cbm(DB)), ok. %%================================================================================ @@ -234,7 +257,9 @@ flush(S) -> do_flush(S0 = #s{n = 0}) -> S0; do_flush( - S = #s{ + S0 = #s{ + callback_module = CBM, + callback_state = CallbackS0, queue = Q, pending_replies = Replies, db = DB, @@ -246,16 +271,17 @@ do_flush( ) -> Messages = queue:to_list(Q), T0 = erlang:monotonic_time(microsecond), - Result = emqx_ds_replication_layer:ra_store_batch(DB, Shard, Messages), + {CallbackS, Result} = CBM:flush_buffer(DB, Shard, Messages, CallbackS0), + S = S0#s{callback_state = CallbackS}, T1 = erlang:monotonic_time(microsecond), - emqx_ds_builtin_metrics:observe_egress_flush_time(Metrics, T1 - T0), + emqx_ds_builtin_metrics:observe_buffer_flush_time(Metrics, T1 - T0), case Result of ok -> - emqx_ds_builtin_metrics:inc_egress_batches(Metrics), - emqx_ds_builtin_metrics:inc_egress_messages(Metrics, S#s.n), - emqx_ds_builtin_metrics:inc_egress_bytes(Metrics, S#s.n_bytes), + emqx_ds_builtin_metrics:inc_buffer_batches(Metrics), + emqx_ds_builtin_metrics:inc_buffer_messages(Metrics, S#s.n), + emqx_ds_builtin_metrics:inc_buffer_bytes(Metrics, S#s.n_bytes), ?tp( - emqx_ds_replication_layer_egress_flush, + emqx_ds_buffer_flush, #{db => DB, shard => Shard, batch => Messages} ), lists:foreach(fun(From) -> gen_server:reply(From, ok) end, Replies), @@ -266,7 +292,7 @@ do_flush( queue = queue:new(), pending_replies = [] }; - {timeout, ServerId} when Retries < MaxRetries -> + {error, recoverable, Err} when Retries < MaxRetries -> %% Note: this is a hot loop, so we report error messages %% with `debug' level to avoid wiping the logs. Instead, %% error the detection must rely on the metrics. Debug @@ -274,11 +300,11 @@ do_flush( %% via logger domain. ?tp( debug, - emqx_ds_replication_layer_egress_flush_retry, - #{db => DB, shard => Shard, reason => timeout, server_id => ServerId} + emqx_ds_buffer_flush_retry, + #{db => DB, shard => Shard, reason => Err} ), %% Retry sending the batch: - emqx_ds_builtin_metrics:inc_egress_batches_retry(Metrics), + emqx_ds_builtin_metrics:inc_buffer_batches_retry(Metrics), erlang:garbage_collect(), %% We block the gen_server until the next retry. BlockTime = ?COOLDOWN_MIN + rand:uniform(?COOLDOWN_MAX - ?COOLDOWN_MIN), @@ -287,10 +313,10 @@ do_flush( Err -> ?tp( debug, - emqx_ds_replication_layer_egress_flush_failed, + emqx_ds_buffer_flush_failed, #{db => DB, shard => Shard, error => Err} ), - emqx_ds_builtin_metrics:inc_egress_batches_failed(Metrics), + emqx_ds_builtin_metrics:inc_buffer_batches_failed(Metrics), Reply = case Err of {error, _, _} -> Err; @@ -311,7 +337,7 @@ do_flush( end. -spec shards_of_batch(emqx_ds:db(), [emqx_types:message()]) -> - [{emqx_ds_replication_layer:shard_id(), {NMessages, NBytes}}] + [{_ShardId, {NMessages, NBytes}}] when NMessages :: non_neg_integer(), NBytes :: non_neg_integer(). @@ -320,7 +346,7 @@ shards_of_batch(DB, Messages) -> lists:foldl( fun(Message, Acc) -> %% TODO: sharding strategy must be part of the DS DB schema: - Shard = emqx_ds_replication_layer:shard_of_message(DB, Message, clientid), + Shard = shard_of_message(DB, Message, clientid), Size = payload_size(Message), maps:update_with( Shard, @@ -339,7 +365,7 @@ shards_of_batch(DB, Messages) -> repackage_messages(DB, Messages, Sync) -> Batches = lists:foldl( fun(Message, Acc) -> - Shard = emqx_ds_replication_layer:shard_of_message(DB, Message, clientid), + Shard = shard_of_message(DB, Message, clientid), Size = payload_size(Message), maps:update_with( Shard, diff --git a/apps/emqx_durable_storage/src/emqx_ds_builtin_metrics.erl b/apps/emqx_durable_storage/src/emqx_ds_builtin_metrics.erl index 06bf7f045..d48cd0e34 100644 --- a/apps/emqx_durable_storage/src/emqx_ds_builtin_metrics.erl +++ b/apps/emqx_durable_storage/src/emqx_ds_builtin_metrics.erl @@ -22,13 +22,13 @@ -export([prometheus_meta/0, prometheus_collect/1]). -export([ - inc_egress_batches/1, - inc_egress_batches_retry/1, - inc_egress_batches_failed/1, - inc_egress_messages/2, - inc_egress_bytes/2, + inc_buffer_batches/1, + inc_buffer_batches_retry/1, + inc_buffer_batches_failed/1, + inc_buffer_messages/2, + inc_buffer_bytes/2, - observe_egress_flush_time/2, + observe_buffer_flush_time/2, observe_store_batch_time/2, @@ -68,16 +68,16 @@ -define(DB_METRICS, ?STORAGE_LAYER_METRICS ++ ?FETCH_METRICS). --define(EGRESS_METRICS, [ - {counter, ?DS_EGRESS_BATCHES}, - {counter, ?DS_EGRESS_BATCHES_RETRY}, - {counter, ?DS_EGRESS_BATCHES_FAILED}, - {counter, ?DS_EGRESS_MESSAGES}, - {counter, ?DS_EGRESS_BYTES}, - {slide, ?DS_EGRESS_FLUSH_TIME} +-define(BUFFER_METRICS, [ + {counter, ?DS_BUFFER_BATCHES}, + {counter, ?DS_BUFFER_BATCHES_RETRY}, + {counter, ?DS_BUFFER_BATCHES_FAILED}, + {counter, ?DS_BUFFER_MESSAGES}, + {counter, ?DS_BUFFER_BYTES}, + {slide, ?DS_BUFFER_FLUSH_TIME} ]). --define(SHARD_METRICS, ?EGRESS_METRICS). +-define(SHARD_METRICS, ?BUFFER_METRICS). -type shard_metrics_id() :: binary(). @@ -96,7 +96,7 @@ child_spec() -> init_for_db(DB) -> emqx_metrics_worker:create_metrics(?WORKER, DB, ?DB_METRICS, []). --spec shard_metric_id(emqx_ds:db(), emqx_ds_replication_layer:shard_id()) -> shard_metrics_id(). +-spec shard_metric_id(emqx_ds:db(), binary()) -> shard_metrics_id(). shard_metric_id(DB, ShardId) -> iolist_to_binary([atom_to_list(DB), $/, ShardId]). @@ -106,37 +106,37 @@ init_for_shard(ShardId) -> emqx_metrics_worker:create_metrics(?WORKER, ShardId, ?SHARD_METRICS, []). %% @doc Increase the number of successfully flushed batches --spec inc_egress_batches(shard_metrics_id()) -> ok. -inc_egress_batches(Id) -> - catch emqx_metrics_worker:inc(?WORKER, Id, ?DS_EGRESS_BATCHES). +-spec inc_buffer_batches(shard_metrics_id()) -> ok. +inc_buffer_batches(Id) -> + catch emqx_metrics_worker:inc(?WORKER, Id, ?DS_BUFFER_BATCHES). -%% @doc Increase the number of time the egress worker had to retry +%% @doc Increase the number of time the buffer worker had to retry %% flushing the batch --spec inc_egress_batches_retry(shard_metrics_id()) -> ok. -inc_egress_batches_retry(Id) -> - catch emqx_metrics_worker:inc(?WORKER, Id, ?DS_EGRESS_BATCHES_RETRY). +-spec inc_buffer_batches_retry(shard_metrics_id()) -> ok. +inc_buffer_batches_retry(Id) -> + catch emqx_metrics_worker:inc(?WORKER, Id, ?DS_BUFFER_BATCHES_RETRY). -%% @doc Increase the number of time the egress worker encountered an +%% @doc Increase the number of time the buffer worker encountered an %% unrecoverable error while trying to flush the batch --spec inc_egress_batches_failed(shard_metrics_id()) -> ok. -inc_egress_batches_failed(Id) -> - catch emqx_metrics_worker:inc(?WORKER, Id, ?DS_EGRESS_BATCHES_FAILED). +-spec inc_buffer_batches_failed(shard_metrics_id()) -> ok. +inc_buffer_batches_failed(Id) -> + catch emqx_metrics_worker:inc(?WORKER, Id, ?DS_BUFFER_BATCHES_FAILED). %% @doc Increase the number of messages successfully saved to the shard --spec inc_egress_messages(shard_metrics_id(), non_neg_integer()) -> ok. -inc_egress_messages(Id, NMessages) -> - catch emqx_metrics_worker:inc(?WORKER, Id, ?DS_EGRESS_MESSAGES, NMessages). +-spec inc_buffer_messages(shard_metrics_id(), non_neg_integer()) -> ok. +inc_buffer_messages(Id, NMessages) -> + catch emqx_metrics_worker:inc(?WORKER, Id, ?DS_BUFFER_MESSAGES, NMessages). %% @doc Increase the number of messages successfully saved to the shard --spec inc_egress_bytes(shard_metrics_id(), non_neg_integer()) -> ok. -inc_egress_bytes(Id, NMessages) -> - catch emqx_metrics_worker:inc(?WORKER, Id, ?DS_EGRESS_BYTES, NMessages). +-spec inc_buffer_bytes(shard_metrics_id(), non_neg_integer()) -> ok. +inc_buffer_bytes(Id, NMessages) -> + catch emqx_metrics_worker:inc(?WORKER, Id, ?DS_BUFFER_BYTES, NMessages). -%% @doc Add a sample of elapsed time spent flushing the egress to the +%% @doc Add a sample of elapsed time spent flushing the buffer to the %% Raft log (in microseconds) --spec observe_egress_flush_time(shard_metrics_id(), non_neg_integer()) -> ok. -observe_egress_flush_time(Id, FlushTime) -> - catch emqx_metrics_worker:observe(?WORKER, Id, ?DS_EGRESS_FLUSH_TIME, FlushTime). +-spec observe_buffer_flush_time(shard_metrics_id(), non_neg_integer()) -> ok. +observe_buffer_flush_time(Id, FlushTime) -> + catch emqx_metrics_worker:observe(?WORKER, Id, ?DS_BUFFER_FLUSH_TIME, FlushTime). -spec observe_store_batch_time(emqx_ds_storage_layer:shard_id(), non_neg_integer()) -> ok. observe_store_batch_time({DB, _}, StoreTime) -> @@ -176,11 +176,14 @@ prometheus_collect(NodeOrAggr) -> prometheus_per_db(NodeOrAggr) -> lists:foldl( - fun(DB, Acc) -> - prometheus_per_db(NodeOrAggr, DB, Acc) + fun + ({DB, Backend}, Acc) when Backend =:= builtin_local; Backend =:= builtin_raft -> + prometheus_per_db(NodeOrAggr, DB, Acc); + ({_, _}, Acc) -> + Acc end, #{}, - emqx_ds_builtin_db_sup:which_dbs() + emqx_ds:which_dbs() ). %% This function returns the data in the following format: @@ -221,13 +224,13 @@ prometheus_per_db(NodeOrAggr, DB, Acc0) -> %% This function returns the data in the following format: %% ``` -%% #{emqx_ds_egress_batches => +%% #{emqx_ds_buffer_batches => %% [{[{db,messages},{shard,<<"1">>}],99408}, %% {[{db,messages},{shard,<<"0">>}],99409}], -%% emqx_ds_egress_batches_retry => +%% emqx_ds_buffer_batches_retry => %% [{[{db,messages},{shard,<<"1">>}],0}, %% {[{db,messages},{shard,<<"0">>}],0}], -%% emqx_ds_egress_messages => +%% emqx_ds_buffer_messages => %% ... %% } %% ''' @@ -235,18 +238,15 @@ prometheus_per_db(NodeOrAggr, DB, Acc0) -> %% If `NodeOrAggr' = `node' then node name is appended to the list of %% labels. prometheus_per_shard(NodeOrAggr) -> + prometheus_buffer_metrics(NodeOrAggr). + +prometheus_buffer_metrics(NodeOrAggr) -> lists:foldl( - fun(DB, Acc0) -> - lists:foldl( - fun(Shard, Acc) -> - prometheus_per_shard(NodeOrAggr, DB, Shard, Acc) - end, - Acc0, - emqx_ds_replication_layer_meta:shards(DB) - ) + fun({DB, Shard}, Acc) -> + prometheus_per_shard(NodeOrAggr, DB, Shard, Acc) end, #{}, - emqx_ds_builtin_db_sup:which_dbs() + emqx_ds_buffer:ls() ). prometheus_per_shard(NodeOrAggr, DB, Shard, Acc0) -> diff --git a/apps/emqx_durable_storage/src/emqx_ds_storage_layer.erl b/apps/emqx_durable_storage/src/emqx_ds_storage_layer.erl index 47fe047fc..818d0bcb7 100644 --- a/apps/emqx_durable_storage/src/emqx_ds_storage_layer.erl +++ b/apps/emqx_durable_storage/src/emqx_ds_storage_layer.erl @@ -55,7 +55,7 @@ -export([init/1, format_status/1, handle_call/3, handle_cast/2, handle_info/2, terminate/2]). %% internal exports: --export([db_dir/1]). +-export([db_dir/1, base_dir/0]). -export_type([ gen_id/0, @@ -87,6 +87,8 @@ %% Type declarations %%================================================================================ +-define(APP, emqx_durable_storage). + %% # "Record" integer keys. We use maps with integer keys to avoid persisting and sending %% records over the wire. %% tags: @@ -104,7 +106,7 @@ {emqx_ds_storage_reference, emqx_ds_storage_reference:options()} | {emqx_ds_storage_bitfield_lts, emqx_ds_storage_bitfield_lts:options()}. --type shard_id() :: {emqx_ds:db(), emqx_ds_replication_layer:shard_id()}. +-type shard_id() :: {emqx_ds:db(), binary()}. -type cf_refs() :: [{string(), rocksdb:cf_handle()}]. @@ -424,11 +426,11 @@ make_delete_iterator( ?generation => GenId, ?enc => Iter }}; - {error, _} = Err -> - Err + {error, Err} -> + {error, unrecoverable, Err} end; not_found -> - {error, end_of_stream} + {error, unrecoverable, generation_not_found} end. -spec update_iterator(shard_id(), iterator(), emqx_ds:message_key()) -> @@ -447,8 +449,8 @@ update_iterator( ?generation => GenId, ?enc => Iter }}; - {error, _} = Err -> - Err + {error, Err} -> + {error, unrecoverable, Err} end; not_found -> {error, unrecoverable, generation_not_found} @@ -889,13 +891,17 @@ rocksdb_open(Shard, Options) -> Error end. +-spec base_dir() -> file:filename(). +base_dir() -> + application:get_env(?APP, db_data_dir, emqx:data_dir()). + -spec db_dir(shard_id()) -> file:filename(). db_dir({DB, ShardId}) -> - filename:join([emqx_ds:base_dir(), DB, binary_to_list(ShardId)]). + filename:join([base_dir(), DB, binary_to_list(ShardId)]). -spec checkpoints_dir(shard_id()) -> file:filename(). checkpoints_dir({DB, ShardId}) -> - filename:join([emqx_ds:base_dir(), DB, checkpoints, binary_to_list(ShardId)]). + filename:join([base_dir(), DB, checkpoints, binary_to_list(ShardId)]). -spec checkpoint_dir(shard_id(), _Name :: file:name()) -> file:filename(). checkpoint_dir(ShardId, Name) -> diff --git a/apps/emqx_durable_storage/src/emqx_ds_sup.erl b/apps/emqx_durable_storage/src/emqx_ds_sup.erl index c4bd0e873..0a8d3c2ba 100644 --- a/apps/emqx_durable_storage/src/emqx_ds_sup.erl +++ b/apps/emqx_durable_storage/src/emqx_ds_sup.erl @@ -18,7 +18,8 @@ -behaviour(supervisor). %% API: --export([start_link/0, attach_backend/2]). +-export([start_link/0]). +-export([register_db/2, unregister_db/1, which_dbs/0]). %% behaviour callbacks: -export([init/1]). @@ -28,6 +29,7 @@ %%================================================================================ -define(SUP, ?MODULE). +-define(TAB, ?MODULE). %%================================================================================ %% API functions @@ -37,33 +39,24 @@ start_link() -> supervisor:start_link({local, ?SUP}, ?MODULE, top). -%% @doc Attach a child backend-specific supervisor to the top -%% application supervisor, if not yet present --spec attach_backend(_BackendId, {module(), atom(), list()}) -> - {ok, pid()} | {error, _}. -attach_backend(Backend, Start) -> - Spec = #{ - id => Backend, - start => Start, - significant => false, - shutdown => infinity, - type => supervisor - }, - case supervisor:start_child(?SUP, Spec) of - {ok, Pid} -> - {ok, Pid}; - {error, {already_started, Pid}} -> - {ok, Pid}; - {error, Err} -> - {error, Err} - end. +register_db(DB, Backend) -> + ets:insert(?TAB, {DB, Backend}), + ok. + +unregister_db(DB) -> + ets:delete(?TAB, DB), + ok. + +which_dbs() -> + ets:tab2list(?TAB). %%================================================================================ %% behaviour callbacks %%================================================================================ init(top) -> - Children = [], + _ = ets:new(?TAB, [public, set, named_table]), + Children = [emqx_ds_builtin_metrics:child_spec()], SupFlags = #{ strategy => one_for_one, intensity => 10, diff --git a/apps/emqx_durable_storage/src/emqx_durable_storage.app.src b/apps/emqx_durable_storage/src/emqx_durable_storage.app.src index 7a20577d4..7bfa6efd3 100644 --- a/apps/emqx_durable_storage/src/emqx_durable_storage.app.src +++ b/apps/emqx_durable_storage/src/emqx_durable_storage.app.src @@ -2,10 +2,10 @@ {application, emqx_durable_storage, [ {description, "Message persistence and subscription replays for EMQX"}, % strict semver, bump manually! - {vsn, "0.2.1"}, + {vsn, "0.3.0"}, {modules, []}, {registered, []}, - {applications, [kernel, stdlib, rocksdb, gproc, mria, ra, emqx_utils]}, + {applications, [kernel, stdlib, rocksdb, gproc, mria, emqx_utils]}, {mod, {emqx_ds_app, []}}, {env, []} ]}. diff --git a/apps/emqx_durable_storage/test/emqx_ds_storage_bitfield_lts_SUITE.erl b/apps/emqx_durable_storage/test/emqx_ds_storage_bitfield_lts_SUITE.erl index 004096431..bd0f382b2 100644 --- a/apps/emqx_durable_storage/test/emqx_ds_storage_bitfield_lts_SUITE.erl +++ b/apps/emqx_durable_storage/test/emqx_ds_storage_bitfield_lts_SUITE.erl @@ -26,16 +26,13 @@ -define(SHARD, shard(?FUNCTION_NAME)). -define(DEFAULT_CONFIG, #{ - backend => builtin, + backend => builtin_local, storage => {emqx_ds_storage_bitfield_lts, #{}}, - n_shards => 1, - n_sites => 1, - replication_factor => 1, - replication_options => #{} + n_shards => 1 }). -define(COMPACT_CONFIG, #{ - backend => builtin, + backend => builtin_local, storage => {emqx_ds_storage_bitfield_lts, #{ bits_per_wildcard_level => 8 @@ -138,8 +135,8 @@ t_get_streams(_Config) -> [FooBarBaz] = GetStream(<<"foo/bar/baz">>), [A] = GetStream(<<"a">>), %% Restart shard to make sure trie is persisted and restored: - ok = emqx_ds_builtin_sup:stop_db(?FUNCTION_NAME), - {ok, _} = emqx_ds_builtin_sup:start_db(?FUNCTION_NAME, #{}), + ok = emqx_ds:close_db(?FUNCTION_NAME), + ok = emqx_ds:open_db(?FUNCTION_NAME, ?DEFAULT_CONFIG), %% Verify that there are no "ghost streams" for topics that don't %% have any messages: [] = GetStream(<<"bar/foo">>), @@ -188,8 +185,7 @@ t_new_generation_inherit_trie(_Config) -> %% learned trie. ok = emqx_ds_storage_layer:add_generation(?SHARD, _Since = 1_000), %% Restart the shard, to verify that LTS is persisted. - ok = application:stop(emqx_durable_storage), - ok = application:start(emqx_durable_storage), + ok = emqx_ds:close_db(?FUNCTION_NAME), ok = emqx_ds:open_db(?FUNCTION_NAME, ?DEFAULT_CONFIG), %% Store a batch of messages with the same set of topics. TS2 = 1_500, @@ -241,8 +237,8 @@ t_replay(_Config) -> ?assert(check(?SHARD, <<"+/+/+">>, 0, Messages)), ?assert(check(?SHARD, <<"+/+/baz">>, 0, Messages)), %% Restart the DB to make sure trie is persisted and restored: - ok = emqx_ds_builtin_sup:stop_db(?FUNCTION_NAME), - {ok, _} = emqx_ds_builtin_sup:start_db(?FUNCTION_NAME, #{}), + ok = emqx_ds:close_db(?FUNCTION_NAME), + ok = emqx_ds:open_db(?FUNCTION_NAME, ?DEFAULT_CONFIG), %% Learned wildcard topics: ?assertNot(check(?SHARD, <<"wildcard/1000/suffix/foo">>, 0, [])), ?assert(check(?SHARD, <<"wildcard/1/suffix/foo">>, 0, Messages)), @@ -279,7 +275,7 @@ t_atomic_store_batch(_Config) -> %% Must contain exactly one flush with all messages. ?assertMatch( [#{batch := [_, _, _]}], - ?of_kind(emqx_ds_replication_layer_egress_flush, Trace) + ?of_kind(emqx_ds_buffer_flush, Trace) ), ok end @@ -308,7 +304,7 @@ t_non_atomic_store_batch(_Config) -> end, fun(ExpectedMsgs, Trace) -> ProcessedMsgs = lists:append( - ?projection(batch, ?of_kind(emqx_ds_replication_layer_egress_flush, Trace)) + ?projection(batch, ?of_kind(emqx_ds_buffer_flush, Trace)) ), ?assertEqual( ExpectedMsgs, @@ -512,7 +508,7 @@ suite() -> [{timetrap, {seconds, 20}}]. init_per_suite(Config) -> emqx_common_test_helpers:clear_screen(), Apps = emqx_cth_suite:start( - [emqx_durable_storage], + [emqx_ds_builtin_local], #{work_dir => emqx_cth_suite:work_dir(Config)} ), [{apps, Apps} | Config]. diff --git a/apps/emqx_durable_storage/test/emqx_ds_test_helpers.erl b/apps/emqx_durable_storage/test/emqx_ds_test_helpers.erl index 4fed1d57b..ba9589e97 100644 --- a/apps/emqx_durable_storage/test/emqx_ds_test_helpers.erl +++ b/apps/emqx_durable_storage/test/emqx_ds_test_helpers.erl @@ -26,6 +26,17 @@ emqx_ds_test_helpers:on(NODE, fun() -> BODY end) ). +skip_if_norepl() -> + try emqx_release:edition() of + ee -> + false; + _ -> + {skip, no_ds_replication} + catch + error:undef -> + {skip, standalone_not_supported} + end. + -spec on([node()] | node(), fun(() -> A)) -> A | [A]. on(Node, Fun) when is_atom(Node) -> [Ret] = on([Node], Fun), @@ -217,9 +228,13 @@ transitions(Node, DB) -> %% Stream comparison -message_eq(Msg1, {_Key, Msg2}) -> - %% Timestamps can be modified by the replication layer, ignore them: - Msg1#message{timestamp = 0} =:= Msg2#message{timestamp = 0}. +message_eq(Fields, {_Key, Msg1 = #message{}}, Msg2) -> + message_eq(Fields, Msg1, Msg2); +message_eq(Fields, Msg1, {_Key, Msg2 = #message{}}) -> + message_eq(Fields, Msg1, Msg2); +message_eq(Fields, Msg1 = #message{}, Msg2 = #message{}) -> + maps:with(Fields, emqx_message:to_map(Msg1)) =:= + maps:with(Fields, emqx_message:to_map(Msg2)). %% Consuming streams and iterators @@ -242,18 +257,27 @@ verify_stream_effects(DB, TestCase, Nodes0, L) -> -spec verify_stream_effects(atom(), binary(), node(), emqx_types:clientid(), ds_stream()) -> ok. verify_stream_effects(DB, TestCase, Node, ClientId, ExpectedStream) -> ct:pal("Checking consistency of effects for ~p on ~p", [ClientId, Node]), - DiffOpts = #{context => 20, window => 1000, compare_fun => fun message_eq/2}, ?defer_assert( begin snabbkaffe_diff:assert_lists_eq( ExpectedStream, ds_topic_stream(DB, ClientId, client_topic(TestCase, ClientId), Node), - DiffOpts + message_diff_options([id, qos, from, flags, headers, topic, payload, extra]) ), ct:pal("Data for client ~p on ~p is consistent.", [ClientId, Node]) end ). +diff_messages(Fields, Expected, Got) -> + snabbkaffe_diff:assert_lists_eq(Expected, Got, message_diff_options(Fields)). + +message_diff_options(Fields) -> + #{ + context => 20, + window => 1000, + compare_fun => fun(M1, M2) -> message_eq(Fields, M1, M2) end + }. + %% Create a stream from the topic (wildcards are NOT supported for a %% good reason: order of messages is implementation-dependent!). %% @@ -297,7 +321,7 @@ nodes_of_clientid(DB, ClientId, Nodes = [N0 | _]) -> shard_of_clientid(DB, Node, ClientId) -> ?ON( Node, - emqx_ds_replication_layer:shard_of_message(DB, #message{from = ClientId}, clientid) + emqx_ds_buffer:shard_of_message(DB, #message{from = ClientId}, clientid) ). %% Consume eagerly: diff --git a/apps/emqx_machine/priv/reboot_lists.eterm b/apps/emqx_machine/priv/reboot_lists.eterm index 62d357c19..277d9fd66 100644 --- a/apps/emqx_machine/priv/reboot_lists.eterm +++ b/apps/emqx_machine/priv/reboot_lists.eterm @@ -42,6 +42,7 @@ esasl, emqx_utils, emqx_durable_storage, + emqx_ds_backends, emqx_http_lib, emqx_resource, emqx_connector, @@ -135,7 +136,8 @@ emqx_bridge_confluent, emqx_ds_shared_sub, emqx_auth_ext, - emqx_cluster_link + emqx_cluster_link, + emqx_ds_builtin_raft ], %% must always be of type `load' ce_business_apps => diff --git a/apps/emqx_machine/src/emqx_machine_boot.erl b/apps/emqx_machine/src/emqx_machine_boot.erl index 777ad2959..0a01c1d20 100644 --- a/apps/emqx_machine/src/emqx_machine_boot.erl +++ b/apps/emqx_machine/src/emqx_machine_boot.erl @@ -188,6 +188,8 @@ runtime_deps() -> {emqx_connector, fun(App) -> lists:prefix("emqx_bridge_", atom_to_list(App)) end}, %% emqx_fdb is an EE app {emqx_durable_storage, emqx_fdb}, + %% emqx_ds_builtin is an EE app + {emqx_ds_backends, emqx_ds_builtin_raft}, {emqx_dashboard, emqx_license} ]. diff --git a/apps/emqx_management/src/emqx_mgmt_api_ds.erl b/apps/emqx_management/src/emqx_mgmt_api_ds.erl index 5d0bd8763..bc949cd8a 100644 --- a/apps/emqx_management/src/emqx_mgmt_api_ds.erl +++ b/apps/emqx_management/src/emqx_mgmt_api_ds.erl @@ -15,13 +15,9 @@ %%-------------------------------------------------------------------- -module(emqx_mgmt_api_ds). --behaviour(minirest_api). +-if(?EMQX_RELEASE_EDITION == ee). --include_lib("emqx/include/logger.hrl"). --include_lib("typerefl/include/types.hrl"). --include_lib("hocon/include/hoconsc.hrl"). --include_lib("emqx_utils/include/emqx_utils_api.hrl"). --include_lib("emqx/include/emqx_persistent_message.hrl"). +-behaviour(minirest_api). -import(hoconsc, [mk/2, ref/1, enum/1, array/1]). @@ -50,10 +46,11 @@ fields/1 ]). -%% internal exports: --export([]). - --export_type([]). +-include_lib("emqx/include/logger.hrl"). +-include_lib("typerefl/include/types.hrl"). +-include_lib("hocon/include/hoconsc.hrl"). +-include_lib("emqx_utils/include/emqx_utils_api.hrl"). +-include_lib("emqx/include/emqx_persistent_message.hrl"). %%================================================================================ %% Type declarations @@ -494,3 +491,5 @@ meta_result_to_binary({error, {member_of_replica_sets, DBNames}}) -> meta_result_to_binary({error, Err}) -> IOList = io_lib:format("Error: ~p", [Err]), {error, iolist_to_binary(IOList)}. + +-endif. diff --git a/apps/emqx_management/src/emqx_mgmt_cli.erl b/apps/emqx_management/src/emqx_mgmt_cli.erl index 7dc614c6d..8d327efe6 100644 --- a/apps/emqx_management/src/emqx_mgmt_cli.erl +++ b/apps/emqx_management/src/emqx_mgmt_cli.erl @@ -848,6 +848,7 @@ ds(CMD) -> emqx_ctl:usage([{"ds", "Durable storage is disabled"}]) end. +-if(?EMQX_RELEASE_EDITION == ee). do_ds(["info"]) -> emqx_ds_replication_layer_meta:print_status(); do_ds(["set_replicas", DBStr | SitesStr]) -> @@ -907,6 +908,10 @@ do_ds(_) -> {"ds leave ", "Remove site from the replica set of the storage"}, {"ds forget ", "Forcefully remove a site from the list of known sites"} ]). +-else. +do_ds(_CMD) -> + emqx_ctl:usage([{"ds", "DS CLI is not available in this edition of EMQX"}]). +-endif. %%-------------------------------------------------------------------- %% Dump ETS diff --git a/apps/emqx_management/test/emqx_mgmt_SUITE.erl b/apps/emqx_management/test/emqx_mgmt_SUITE.erl index e5de64b5a..0cde87465 100644 --- a/apps/emqx_management/test/emqx_mgmt_SUITE.erl +++ b/apps/emqx_management/test/emqx_mgmt_SUITE.erl @@ -56,22 +56,27 @@ init_per_group(persistence_disabled, Config) -> | Config ]; init_per_group(persistence_enabled, Config) -> - Apps = emqx_cth_suite:start( - [ - {emqx, - "durable_sessions {\n" - " enable = true\n" - " heartbeat_interval = 100ms\n" - " renew_streams_interval = 100ms\n" - "}"}, - emqx_management - ], - #{work_dir => emqx_cth_suite:work_dir(Config)} - ), - [ - {apps, Apps} - | Config - ]; + case emqx_ds_test_helpers:skip_if_norepl() of + false -> + Apps = emqx_cth_suite:start( + [ + {emqx, + "durable_sessions {\n" + " enable = true\n" + " heartbeat_interval = 100ms\n" + " renew_streams_interval = 100ms\n" + "}"}, + emqx_management + ], + #{work_dir => emqx_cth_suite:work_dir(Config)} + ), + [ + {apps, Apps} + | Config + ]; + Yes -> + Yes + end; init_per_group(cm_registry_enabled, Config) -> [{emqx_config, "broker.enable_session_registry = true"} | Config]; init_per_group(cm_registry_disabled, Config) -> diff --git a/apps/emqx_management/test/emqx_mgmt_api_clients_SUITE.erl b/apps/emqx_management/test/emqx_mgmt_api_clients_SUITE.erl index 2c71e9822..37b769655 100644 --- a/apps/emqx_management/test/emqx_mgmt_api_clients_SUITE.erl +++ b/apps/emqx_management/test/emqx_mgmt_api_clients_SUITE.erl @@ -52,6 +52,7 @@ persistent_session_testcases() -> t_persistent_sessions_subscriptions1, t_list_clients_v2 ]. + client_msgs_testcases() -> [ t_inflight_messages, diff --git a/apps/emqx_management/test/emqx_mgmt_api_ds_SUITE.erl b/apps/emqx_management/test/emqx_mgmt_api_ds_SUITE.erl index 881ce8e3f..f707ed1ac 100644 --- a/apps/emqx_management/test/emqx_mgmt_api_ds_SUITE.erl +++ b/apps/emqx_management/test/emqx_mgmt_api_ds_SUITE.erl @@ -27,16 +27,21 @@ all() -> emqx_common_test_helpers:all(?MODULE). init_per_suite(Config) -> - Apps = emqx_cth_suite:start( - [ - {emqx, "durable_sessions.enable = true"}, - emqx_management, - {emqx_dashboard, "dashboard.listeners.http { enable = true, bind = 18083 }"} - ], - #{work_dir => emqx_cth_suite:work_dir(Config)} - ), - {ok, _} = emqx_common_test_http:create_default_app(), - [{suite_apps, Apps} | Config]. + case emqx_ds_test_helpers:skip_if_norepl() of + false -> + Apps = emqx_cth_suite:start( + [ + {emqx, "durable_sessions.enable = true"}, + emqx_management, + {emqx_dashboard, "dashboard.listeners.http { enable = true, bind = 18083 }"} + ], + #{work_dir => emqx_cth_suite:work_dir(Config)} + ), + {ok, _} = emqx_common_test_http:create_default_app(), + [{suite_apps, Apps} | Config]; + Yes -> + Yes + end. end_per_suite(Config) -> ok = emqx_cth_suite:stop(?config(suite_apps, Config)). diff --git a/apps/emqx_prometheus/src/emqx_prometheus.erl b/apps/emqx_prometheus/src/emqx_prometheus.erl index f4d0ff2c0..5d88ebd17 100644 --- a/apps/emqx_prometheus/src/emqx_prometheus.erl +++ b/apps/emqx_prometheus/src/emqx_prometheus.erl @@ -504,12 +504,12 @@ emqx_collect(K = emqx_mria_bootstrap_num_keys, D) -> gauge_metrics(?MG(K, D, []) emqx_collect(K = emqx_mria_message_queue_len, D) -> gauge_metrics(?MG(K, D, [])); emqx_collect(K = emqx_mria_replayq_len, D) -> gauge_metrics(?MG(K, D, [])); %% DS -emqx_collect(K = ?DS_EGRESS_BATCHES, D) -> counter_metrics(?MG(K, D, [])); -emqx_collect(K = ?DS_EGRESS_BATCHES_RETRY, D) -> counter_metrics(?MG(K, D, [])); -emqx_collect(K = ?DS_EGRESS_BATCHES_FAILED, D) -> counter_metrics(?MG(K, D, [])); -emqx_collect(K = ?DS_EGRESS_MESSAGES, D) -> counter_metrics(?MG(K, D, [])); -emqx_collect(K = ?DS_EGRESS_BYTES, D) -> counter_metrics(?MG(K, D, [])); -emqx_collect(K = ?DS_EGRESS_FLUSH_TIME, D) -> gauge_metrics(?MG(K, D, [])); +emqx_collect(K = ?DS_BUFFER_BATCHES, D) -> counter_metrics(?MG(K, D, [])); +emqx_collect(K = ?DS_BUFFER_BATCHES_RETRY, D) -> counter_metrics(?MG(K, D, [])); +emqx_collect(K = ?DS_BUFFER_BATCHES_FAILED, D) -> counter_metrics(?MG(K, D, [])); +emqx_collect(K = ?DS_BUFFER_MESSAGES, D) -> counter_metrics(?MG(K, D, [])); +emqx_collect(K = ?DS_BUFFER_BYTES, D) -> counter_metrics(?MG(K, D, [])); +emqx_collect(K = ?DS_BUFFER_FLUSH_TIME, D) -> gauge_metrics(?MG(K, D, [])); emqx_collect(K = ?DS_STORE_BATCH_TIME, D) -> gauge_metrics(?MG(K, D, [])); emqx_collect(K = ?DS_BUILTIN_NEXT_TIME, D) -> gauge_metrics(?MG(K, D, [])); emqx_collect(K = ?DS_LTS_SEEK_COUNTER, D) -> counter_metrics(?MG(K, D, [])); diff --git a/changes/ce/breaking-13248.en.md b/changes/ce/breaking-13248.en.md new file mode 100644 index 000000000..a359cc960 --- /dev/null +++ b/changes/ce/breaking-13248.en.md @@ -0,0 +1,16 @@ +`builtin` durable storage backend has been replaced with the following two backends: + +- `builtin_local`: A durable storage backend that doesn't support replication. + It can't be used in a multi-node cluster. + This backend is available in both open source and enterprise editions. +- `builtin_raft`: A durable storage backend that uses Raft algorithm for replication. + This backend is available only in the enterprise edition. + +The following Prometheus metrics have been renamed: + +- `emqx_ds_egress_batches` -> `emqx_ds_buffer_batches` +- `emqx_ds_egress_batches_retry` -> `emqx_ds_buffer_batches_retry` +- `emqx_ds_egress_batches_failed` -> `emqx_ds_buffer_batches_failed` +- `emqx_ds_egress_messages` -> `emqx_ds_buffer_messages` +- `emqx_ds_egress_bytes` -> `emqx_ds_buffer_bytes` +- `emqx_ds_egress_flush_time` -> `emqx_ds_buffer_flush_time` diff --git a/mix.exs b/mix.exs index 0c1168c61..6a7e6bda7 100644 --- a/mix.exs +++ b/mix.exs @@ -205,7 +205,8 @@ defmodule EMQXUmbrella.MixProject do :emqx_bridge_syskeeper, :emqx_ds_shared_sub, :emqx_auth_ext, - :emqx_cluster_link + :emqx_cluster_link, + :emqx_ds_builtin_raft ]) end @@ -341,6 +342,8 @@ defmodule EMQXUmbrella.MixProject do :emqx_s3, :emqx_opentelemetry, :emqx_durable_storage, + :emqx_ds_builtin_local, + :emqx_ds_builtin_raft, :rabbit_common, :emqx_eviction_agent, :emqx_node_rebalance diff --git a/rebar.config.erl b/rebar.config.erl index 493955670..2cfa8c986 100644 --- a/rebar.config.erl +++ b/rebar.config.erl @@ -124,6 +124,7 @@ is_community_umbrella_app("apps/emqx_node_rebalance") -> false; is_community_umbrella_app("apps/emqx_ds_shared_sub") -> false; is_community_umbrella_app("apps/emqx_auth_ext") -> false; is_community_umbrella_app("apps/emqx_cluster_link") -> false; +is_community_umbrella_app("apps/emqx_ds_builtin_raft") -> false; is_community_umbrella_app(_) -> true. %% BUILD_WITHOUT_JQ diff --git a/rel/i18n/emqx_ds_schema.hocon b/rel/i18n/emqx_ds_schema.hocon index 65b76b6fa..52268b8ac 100644 --- a/rel/i18n/emqx_ds_schema.hocon +++ b/rel/i18n/emqx_ds_schema.hocon @@ -5,15 +5,21 @@ messages.desc: """~ Configuration related to the durable storage of MQTT messages.~""" -builtin.label: "Builtin backend" -builtin.desc: +builtin_raft.label: "Builtin backend with Raft replication" +builtin_raft.desc: """~ - Builtin session storage backend utilizing embedded RocksDB key-value store.~""" + Builtin storage backend utilizing embedded RocksDB key-value store.~""" -builtin_backend.label: "Backend type" -builtin_backend.desc: +builtin_local.label: "Builtin backend" +builtin_local.desc: """~ - Built-in backend.~""" + Builtin storage backend utilizing embedded RocksDB key-value store. + This backend doesn't support clustering.~""" + +backend_type.label: "Backend type" +backend_type.desc: + """~ + Backend type.~""" builtin_data_dir.label: "Database location" builtin_data_dir.desc: @@ -39,21 +45,21 @@ builtin_n_sites.desc: During this phase at least that many sites should come online to distribute shards between them, otherwise message storage will be unavailable until then. After the initialization is complete, sites may be offline, which will affect availability depending on the number of offline sites and replication factor.~""" -builtin_local_write_buffer.label: "Local write buffer" -builtin_local_write_buffer.desc: +builtin_write_buffer.label: "Local write buffer" +builtin_write_buffer.desc: """~ Configuration related to the buffering of messages sent from the local node to the shard leader. EMQX accumulates PUBLISH messages from the local clients in a write buffer before committing them to the durable storage. This helps to hide network latency between EMQX nodes and improves write throughput.~""" -builtin_local_write_buffer_max_items.label: "Max items" -builtin_local_write_buffer_max_items.desc: +builtin_write_buffer_max_items.label: "Max items" +builtin_write_buffer_max_items.desc: """~ This configuration parameter defines maximum number of messages stored in the local write buffer.~""" -builtin_local_write_buffer_flush_interval.label: "Flush interval" -builtin_local_write_buffer_flush_interval.desc: +builtin_write_buffer_flush_interval.label: "Flush interval" +builtin_write_buffer_flush_interval.desc: """~ Maximum linger time for the buffered messages. Local write buffer will be flushed _at least_ as often as `flush_interval`.