From a18d1987a2d2e05a29d0355e14f452e5b9ee6db9 Mon Sep 17 00:00:00 2001 From: ieQu1 <99872536+ieQu1@users.noreply.github.com> Date: Thu, 13 Jun 2024 15:48:10 +0200 Subject: [PATCH 01/26] test(ds): Add a helper function for diffing messages --- .../test/emqx_ds_test_helpers.erl | 23 +++++++++++++++---- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/apps/emqx_durable_storage/test/emqx_ds_test_helpers.erl b/apps/emqx_durable_storage/test/emqx_ds_test_helpers.erl index 4fed1d57b..5e7753058 100644 --- a/apps/emqx_durable_storage/test/emqx_ds_test_helpers.erl +++ b/apps/emqx_durable_storage/test/emqx_ds_test_helpers.erl @@ -217,9 +217,13 @@ transitions(Node, DB) -> %% Stream comparison -message_eq(Msg1, {_Key, Msg2}) -> - %% Timestamps can be modified by the replication layer, ignore them: - Msg1#message{timestamp = 0} =:= Msg2#message{timestamp = 0}. +message_eq(Fields, {_Key, Msg1 = #message{}}, Msg2) -> + message_eq(Fields, Msg1, Msg2); +message_eq(Fields, Msg1, {_Key, Msg2 = #message{}}) -> + message_eq(Fields, Msg1, Msg2); +message_eq(Fields, Msg1 = #message{}, Msg2 = #message{}) -> + maps:with(Fields, emqx_message:to_map(Msg1)) =:= + maps:with(Fields, emqx_message:to_map(Msg2)). %% Consuming streams and iterators @@ -242,18 +246,27 @@ verify_stream_effects(DB, TestCase, Nodes0, L) -> -spec verify_stream_effects(atom(), binary(), node(), emqx_types:clientid(), ds_stream()) -> ok. verify_stream_effects(DB, TestCase, Node, ClientId, ExpectedStream) -> ct:pal("Checking consistency of effects for ~p on ~p", [ClientId, Node]), - DiffOpts = #{context => 20, window => 1000, compare_fun => fun message_eq/2}, ?defer_assert( begin snabbkaffe_diff:assert_lists_eq( ExpectedStream, ds_topic_stream(DB, ClientId, client_topic(TestCase, ClientId), Node), - DiffOpts + message_diff_options([id, qos, from, flags, headers, topic, payload, extra]) ), ct:pal("Data for client ~p on ~p is consistent.", [ClientId, Node]) end ). +diff_messages(Fields, Expected, Got) -> + snabbkaffe_diff:assert_lists_eq(Expected, Got, message_diff_options(Fields)). + +message_diff_options(Fields) -> + #{ + context => 20, + window => 1000, + compare_fun => fun(M1, M2) -> message_eq(Fields, M1, M2) end + }. + %% Create a stream from the topic (wildcards are NOT supported for a %% good reason: order of messages is implementation-dependent!). %% From 83dc8f4d7707fd6c41d708aee64df244ed506d44 Mon Sep 17 00:00:00 2001 From: ieQu1 <99872536+ieQu1@users.noreply.github.com> Date: Thu, 13 Jun 2024 15:18:40 +0200 Subject: [PATCH 02/26] fix(ds): Fix return values of emqx_ds_storage_layer functions --- apps/emqx_durable_storage/src/emqx_ds.erl | 5 +---- .../emqx_durable_storage/src/emqx_ds_storage_layer.erl | 10 +++++----- 2 files changed, 6 insertions(+), 9 deletions(-) diff --git a/apps/emqx_durable_storage/src/emqx_ds.erl b/apps/emqx_durable_storage/src/emqx_ds.erl index 2b903f5cf..2523d1c4f 100644 --- a/apps/emqx_durable_storage/src/emqx_ds.erl +++ b/apps/emqx_durable_storage/src/emqx_ds.erl @@ -136,7 +136,7 @@ -type ds_specific_delete_stream() :: term(). --type make_delete_iterator_result(DeleteIterator) :: {ok, DeleteIterator} | {error, term()}. +-type make_delete_iterator_result(DeleteIterator) :: {ok, DeleteIterator} | error(_). -type make_delete_iterator_result() :: make_delete_iterator_result(delete_iterator()). @@ -286,9 +286,6 @@ drop_generation(DB, GenId) -> {error, not_implemented} end. -%% @doc TODO: currently if one or a few shards are down, they won't be - -%% deleted. -spec drop_db(db()) -> ok. drop_db(DB) -> case persistent_term:get(?persistent_term(DB), undefined) of diff --git a/apps/emqx_durable_storage/src/emqx_ds_storage_layer.erl b/apps/emqx_durable_storage/src/emqx_ds_storage_layer.erl index 47fe047fc..b6161d956 100644 --- a/apps/emqx_durable_storage/src/emqx_ds_storage_layer.erl +++ b/apps/emqx_durable_storage/src/emqx_ds_storage_layer.erl @@ -424,11 +424,11 @@ make_delete_iterator( ?generation => GenId, ?enc => Iter }}; - {error, _} = Err -> - Err + {error, Err} -> + {error, unrecoverable, Err} end; not_found -> - {error, end_of_stream} + {error, unrecoverable, generation_not_found} end. -spec update_iterator(shard_id(), iterator(), emqx_ds:message_key()) -> @@ -447,8 +447,8 @@ update_iterator( ?generation => GenId, ?enc => Iter }}; - {error, _} = Err -> - Err + {error, Err} -> + {error, unrecoverable, Err} end; not_found -> {error, unrecoverable, generation_not_found} From 63f1856a2cf49116a80a07b4396e459c4263567e Mon Sep 17 00:00:00 2001 From: ieQu1 <99872536+ieQu1@users.noreply.github.com> Date: Thu, 13 Jun 2024 15:19:57 +0200 Subject: [PATCH 03/26] feat(ds): Dynamic backend registration --- apps/emqx_durable_storage/src/emqx_ds.erl | 33 ++++++++++++------- .../src/emqx_ds_storage_layer.erl | 10 ++++-- 2 files changed, 28 insertions(+), 15 deletions(-) diff --git a/apps/emqx_durable_storage/src/emqx_ds.erl b/apps/emqx_durable_storage/src/emqx_ds.erl index 2523d1c4f..81efa6c5b 100644 --- a/apps/emqx_durable_storage/src/emqx_ds.erl +++ b/apps/emqx_durable_storage/src/emqx_ds.erl @@ -23,8 +23,10 @@ %% Management API: -export([ - base_dir/0, + register_backend/2, + open_db/2, + close_db/1, update_db_config/2, add_generation/1, list_generations_with_lifetimes/1, @@ -199,6 +201,8 @@ -callback open_db(db(), create_db_opts()) -> ok | {error, _}. +-callback close_db(db()) -> ok. + -callback add_generation(db()) -> ok | {error, _}. -callback update_db_config(db(), create_db_opts()) -> ok | {error, _}. @@ -247,21 +251,26 @@ %% API functions %%================================================================================ --spec base_dir() -> file:filename(). -base_dir() -> - application:get_env(?APP, db_data_dir, emqx:data_dir()). +%% @doc Register DS backend. +-spec register_backend(atom(), module()) -> ok. +register_backend(Name, Module) -> + persistent_term:put({emqx_ds_backend_module, Name}, Module). %% @doc Different DBs are completely independent from each other. They %% could represent something like different tenants. -spec open_db(db(), create_db_opts()) -> ok. -open_db(DB, Opts = #{backend := Backend}) when Backend =:= builtin orelse Backend =:= fdb -> - Module = - case Backend of - builtin -> emqx_ds_replication_layer; - fdb -> emqx_fdb_ds - end, - persistent_term:put(?persistent_term(DB), Module), - ?module(DB):open_db(DB, Opts). +open_db(DB, Opts = #{backend := Backend}) -> + case persistent_term:get({emqx_ds_backend_module, Backend}, undefined) of + undefined -> + error({no_such_backend, Backend}); + Module -> + persistent_term:put(?persistent_term(DB), Module), + ?module(DB):open_db(DB, Opts) + end. + +-spec close_db(db()) -> ok. +close_db(DB) -> + ?module(DB):close_db(DB). -spec add_generation(db()) -> ok. add_generation(DB) -> diff --git a/apps/emqx_durable_storage/src/emqx_ds_storage_layer.erl b/apps/emqx_durable_storage/src/emqx_ds_storage_layer.erl index b6161d956..3ca2dcefd 100644 --- a/apps/emqx_durable_storage/src/emqx_ds_storage_layer.erl +++ b/apps/emqx_durable_storage/src/emqx_ds_storage_layer.erl @@ -55,7 +55,7 @@ -export([init/1, format_status/1, handle_call/3, handle_cast/2, handle_info/2, terminate/2]). %% internal exports: --export([db_dir/1]). +-export([db_dir/1, base_dir/0]). -export_type([ gen_id/0, @@ -889,13 +889,17 @@ rocksdb_open(Shard, Options) -> Error end. +-spec base_dir() -> file:filename(). +base_dir() -> + application:get_env(?APP, db_data_dir, emqx:data_dir()). + -spec db_dir(shard_id()) -> file:filename(). db_dir({DB, ShardId}) -> - filename:join([emqx_ds:base_dir(), DB, binary_to_list(ShardId)]). + filename:join([base_dir(), DB, binary_to_list(ShardId)]). -spec checkpoints_dir(shard_id()) -> file:filename(). checkpoints_dir({DB, ShardId}) -> - filename:join([emqx_ds:base_dir(), DB, checkpoints, binary_to_list(ShardId)]). + filename:join([base_dir(), DB, checkpoints, binary_to_list(ShardId)]). -spec checkpoint_dir(shard_id(), _Name :: file:name()) -> file:filename(). checkpoint_dir(ShardId, Name) -> From a8ea0ae4e545f84c717efc24c1cd5f2c0088f7b3 Mon Sep 17 00:00:00 2001 From: ieQu1 <99872536+ieQu1@users.noreply.github.com> Date: Thu, 13 Jun 2024 15:29:31 +0200 Subject: [PATCH 04/26] refactor(ds): Extract DS replication layer to a separate application --- apps/emqx_ds_builtin_raft/BSL.txt | 94 ++++++++++ apps/emqx_ds_builtin_raft/README.md | 3 + apps/emqx_ds_builtin_raft/rebar.config | 5 + .../src/emqx_ds_builtin_raft.app.src | 11 ++ .../src/emqx_ds_builtin_raft_app.erl | 11 ++ .../src/emqx_ds_builtin_raft_db_sup.erl} | 18 +- .../src/emqx_ds_builtin_raft_sup.erl} | 19 +-- .../src/emqx_ds_replication_layer.erl | 27 ++- .../src/emqx_ds_replication_layer.hrl | 12 -- .../src/emqx_ds_replication_layer_egress.erl | 12 -- .../src/emqx_ds_replication_layer_meta.erl | 14 +- .../src/emqx_ds_replication_layer_shard.erl | 12 -- .../emqx_ds_replication_shard_allocator.erl | 18 +- .../src/emqx_ds_replication_snapshot.erl | 4 +- .../src/proto/emqx_ds_proto_v1.erl | 12 -- .../src/proto/emqx_ds_proto_v2.erl | 12 -- .../src/proto/emqx_ds_proto_v3.erl | 12 -- .../src/proto/emqx_ds_proto_v4.erl | 12 -- .../test/emqx_ds_replication_SUITE.erl | 161 +++++++++++++++++- apps/emqx_durable_storage/README.md | 36 +++- apps/emqx_durable_storage/src/emqx_ds.erl | 3 +- apps/emqx_machine/priv/reboot_lists.eterm | 3 +- changes/ce/breaking-13248.en.md | 7 + mix.exs | 5 +- rebar.config.erl | 1 + 25 files changed, 347 insertions(+), 177 deletions(-) create mode 100644 apps/emqx_ds_builtin_raft/BSL.txt create mode 100644 apps/emqx_ds_builtin_raft/README.md create mode 100644 apps/emqx_ds_builtin_raft/rebar.config create mode 100644 apps/emqx_ds_builtin_raft/src/emqx_ds_builtin_raft.app.src create mode 100644 apps/emqx_ds_builtin_raft/src/emqx_ds_builtin_raft_app.erl rename apps/{emqx_durable_storage/src/emqx_ds_builtin_db_sup.erl => emqx_ds_builtin_raft/src/emqx_ds_builtin_raft_db_sup.erl} (92%) rename apps/{emqx_durable_storage/src/emqx_ds_builtin_sup.erl => emqx_ds_builtin_raft/src/emqx_ds_builtin_raft_sup.erl} (85%) rename apps/{emqx_durable_storage => emqx_ds_builtin_raft}/src/emqx_ds_replication_layer.erl (97%) rename apps/{emqx_durable_storage => emqx_ds_builtin_raft}/src/emqx_ds_replication_layer.hrl (59%) rename apps/{emqx_durable_storage => emqx_ds_builtin_raft}/src/emqx_ds_replication_layer_egress.erl (95%) rename apps/{emqx_durable_storage => emqx_ds_builtin_raft}/src/emqx_ds_replication_layer_meta.erl (97%) rename apps/{emqx_durable_storage => emqx_ds_builtin_raft}/src/emqx_ds_replication_layer_shard.erl (96%) rename apps/{emqx_durable_storage => emqx_ds_builtin_raft}/src/emqx_ds_replication_shard_allocator.erl (95%) rename apps/{emqx_durable_storage => emqx_ds_builtin_raft}/src/emqx_ds_replication_snapshot.erl (98%) rename apps/{emqx_durable_storage => emqx_ds_builtin_raft}/src/proto/emqx_ds_proto_v1.erl (82%) rename apps/{emqx_durable_storage => emqx_ds_builtin_raft}/src/proto/emqx_ds_proto_v2.erl (86%) rename apps/{emqx_durable_storage => emqx_ds_builtin_raft}/src/proto/emqx_ds_proto_v3.erl (88%) rename apps/{emqx_durable_storage => emqx_ds_builtin_raft}/src/proto/emqx_ds_proto_v4.erl (90%) rename apps/{emqx_durable_storage => emqx_ds_builtin_raft}/test/emqx_ds_replication_SUITE.erl (81%) create mode 100644 changes/ce/breaking-13248.en.md diff --git a/apps/emqx_ds_builtin_raft/BSL.txt b/apps/emqx_ds_builtin_raft/BSL.txt new file mode 100644 index 000000000..5df4e60aa --- /dev/null +++ b/apps/emqx_ds_builtin_raft/BSL.txt @@ -0,0 +1,94 @@ +Business Source License 1.1 + +Licensor: Hangzhou EMQ Technologies Co., Ltd. +Licensed Work: EMQX Enterprise Edition + The Licensed Work is (c) 2024 + Hangzhou EMQ Technologies Co., Ltd. +Additional Use Grant: Students and educators are granted right to copy, + modify, and create derivative work for research + or education. +Change Date: 2028-06-13 +Change License: Apache License, Version 2.0 + +For information about alternative licensing arrangements for the Software, +please contact Licensor: https://www.emqx.com/en/contact + +Notice + +The Business Source License (this document, or the “License”) is not an Open +Source license. However, the Licensed Work will eventually be made available +under an Open Source License, as stated in this License. + +License text copyright (c) 2017, 2024 MariaDB Corporation Ab, All Rights Reserved. +“Business Source License” is a trademark of MariaDB Corporation Ab. + +----------------------------------------------------------------------------- + +Business Source License 1.1 + +Terms + +The Licensor hereby grants you the right to copy, modify, create derivative +works, redistribute, and make non-production use of the Licensed Work. The +Licensor may make an Additional Use Grant, above, permitting limited +production use. + +Effective on the Change Date, or the fourth anniversary of the first publicly +available distribution of a specific version of the Licensed Work under this +License, whichever comes first, the Licensor hereby grants you rights under +the terms of the Change License, and the rights granted in the paragraph +above terminate. + +If your use of the Licensed Work does not comply with the requirements +currently in effect as described in this License, you must purchase a +commercial license from the Licensor, its affiliated entities, or authorized +resellers, or you must refrain from using the Licensed Work. + +All copies of the original and modified Licensed Work, and derivative works +of the Licensed Work, are subject to this License. This License applies +separately for each version of the Licensed Work and the Change Date may vary +for each version of the Licensed Work released by Licensor. + +You must conspicuously display this License on each original or modified copy +of the Licensed Work. If you receive the Licensed Work in original or +modified form from a third party, the terms and conditions set forth in this +License apply to your use of that work. + +Any use of the Licensed Work in violation of this License will automatically +terminate your rights under this License for the current and all other +versions of the Licensed Work. + +This License does not grant you any right in any trademark or logo of +Licensor or its affiliates (provided that you may use a trademark or logo of +Licensor as expressly required by this License). + +TO THE EXTENT PERMITTED BY APPLICABLE LAW, THE LICENSED WORK IS PROVIDED ON +AN “AS IS” BASIS. LICENSOR HEREBY DISCLAIMS ALL WARRANTIES AND CONDITIONS, +EXPRESS OR IMPLIED, INCLUDING (WITHOUT LIMITATION) WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NON-INFRINGEMENT, AND +TITLE. + +MariaDB hereby grants you permission to use this License’s text to license +your works, and to refer to it using the trademark “Business Source License”, +as long as you comply with the Covenants of Licensor below. + +Covenants of Licensor + +In consideration of the right to use this License’s text and the “Business +Source License” name and trademark, Licensor covenants to MariaDB, and to all +other recipients of the licensed work to be provided by Licensor: + +1. To specify as the Change License the GPL Version 2.0 or any later version, + or a license that is compatible with GPL Version 2.0 or a later version, + where “compatible” means that software provided under the Change License can + be included in a program with software provided under GPL Version 2.0 or a + later version. Licensor may specify additional Change Licenses without + limitation. + +2. To either: (a) specify an additional grant of rights to use that does not + impose any additional restriction on the right granted in this License, as + the Additional Use Grant; or (b) insert the text “None”. + +3. To specify a Change Date. + +4. Not to modify this License in any other way. diff --git a/apps/emqx_ds_builtin_raft/README.md b/apps/emqx_ds_builtin_raft/README.md new file mode 100644 index 000000000..7f468f365 --- /dev/null +++ b/apps/emqx_ds_builtin_raft/README.md @@ -0,0 +1,3 @@ +# `emqx_ds_builtin_raft` + +Replication layer for the builtin EMQX durable storage backend that uses Raft algorithm. diff --git a/apps/emqx_ds_builtin_raft/rebar.config b/apps/emqx_ds_builtin_raft/rebar.config new file mode 100644 index 000000000..d70aa75e0 --- /dev/null +++ b/apps/emqx_ds_builtin_raft/rebar.config @@ -0,0 +1,5 @@ +%% -*- mode:erlang -*- + +{deps, [ + {emqx_durable_storage, {path, "../emqx_durable_storage"}} +]}. diff --git a/apps/emqx_ds_builtin_raft/src/emqx_ds_builtin_raft.app.src b/apps/emqx_ds_builtin_raft/src/emqx_ds_builtin_raft.app.src new file mode 100644 index 000000000..18e84e6b1 --- /dev/null +++ b/apps/emqx_ds_builtin_raft/src/emqx_ds_builtin_raft.app.src @@ -0,0 +1,11 @@ +%% -*- mode: erlang -*- +{application, emqx_ds_builtin_raft, [ + {description, "Raft replication layer for the durable storage"}, + % strict semver, bump manually! + {vsn, "0.1.0"}, + {modules, []}, + {registered, []}, + {applications, [kernel, stdlib, gproc, mria, ra, emqx_durable_storage]}, + {mod, {emqx_ds_builtin_raft_app, []}}, + {env, []} +]}. diff --git a/apps/emqx_ds_builtin_raft/src/emqx_ds_builtin_raft_app.erl b/apps/emqx_ds_builtin_raft/src/emqx_ds_builtin_raft_app.erl new file mode 100644 index 000000000..65e640ed5 --- /dev/null +++ b/apps/emqx_ds_builtin_raft/src/emqx_ds_builtin_raft_app.erl @@ -0,0 +1,11 @@ +%%-------------------------------------------------------------------- +%% Copyright (c) 2020-2024 EMQ Technologies Co., Ltd. All Rights Reserved. +%%-------------------------------------------------------------------- + +-module(emqx_ds_builtin_raft_app). + +-export([start/2]). + +start(_Type, _Args) -> + emqx_ds:register_backend(builtin_raft, emqx_ds_replication_layer), + {ok, self()}. diff --git a/apps/emqx_durable_storage/src/emqx_ds_builtin_db_sup.erl b/apps/emqx_ds_builtin_raft/src/emqx_ds_builtin_raft_db_sup.erl similarity index 92% rename from apps/emqx_durable_storage/src/emqx_ds_builtin_db_sup.erl rename to apps/emqx_ds_builtin_raft/src/emqx_ds_builtin_raft_db_sup.erl index 40380ed59..74e97bf52 100644 --- a/apps/emqx_durable_storage/src/emqx_ds_builtin_db_sup.erl +++ b/apps/emqx_ds_builtin_raft/src/emqx_ds_builtin_raft_db_sup.erl @@ -1,22 +1,10 @@ %%-------------------------------------------------------------------- %% Copyright (c) 2024 EMQ Technologies Co., Ltd. All Rights Reserved. -%% -%% Licensed under the Apache License, Version 2.0 (the "License"); -%% you may not use this file except in compliance with the License. -%% You may obtain a copy of the License at -%% -%% http://www.apache.org/licenses/LICENSE-2.0 -%% -%% Unless required by applicable law or agreed to in writing, software -%% distributed under the License is distributed on an "AS IS" BASIS, -%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -%% See the License for the specific language governing permissions and -%% limitations under the License. %%-------------------------------------------------------------------- %% @doc Supervisor that contains all the processes that belong to a %% given builtin DS database. --module(emqx_ds_builtin_db_sup). +-module(emqx_ds_builtin_raft_db_sup). -behaviour(supervisor). @@ -150,7 +138,7 @@ get_shard_workers(DB) -> init({#?db_sup{db = DB}, DefaultOpts}) -> %% Spec for the top-level supervisor for the database: logger:notice("Starting DS DB ~p", [DB]), - emqx_ds_builtin_sup:clean_gvars(DB), + emqx_ds_builtin_raft_sup:clean_gvars(DB), emqx_ds_builtin_metrics:init_for_db(DB), Opts = emqx_ds_replication_layer_meta:open_db(DB, DefaultOpts), ok = start_ra_system(DB, Opts), @@ -197,7 +185,7 @@ init({#?shard_sup{db = DB, shard = Shard}, _}) -> {ok, {SupFlags, Children}}. start_ra_system(DB, #{replication_options := ReplicationOpts}) -> - DataDir = filename:join([emqx_ds:base_dir(), DB, dsrepl]), + DataDir = filename:join([emqx_ds_storage_layer:base_dir(), DB, dsrepl]), Config = lists:foldr(fun maps:merge/2, #{}, [ ra_system:default_config(), #{ diff --git a/apps/emqx_durable_storage/src/emqx_ds_builtin_sup.erl b/apps/emqx_ds_builtin_raft/src/emqx_ds_builtin_raft_sup.erl similarity index 85% rename from apps/emqx_durable_storage/src/emqx_ds_builtin_sup.erl rename to apps/emqx_ds_builtin_raft/src/emqx_ds_builtin_raft_sup.erl index 971805351..a88e7fc2e 100644 --- a/apps/emqx_durable_storage/src/emqx_ds_builtin_sup.erl +++ b/apps/emqx_ds_builtin_raft/src/emqx_ds_builtin_raft_sup.erl @@ -1,23 +1,11 @@ %%-------------------------------------------------------------------- %% Copyright (c) 2023-2024 EMQ Technologies Co., Ltd. All Rights Reserved. -%% -%% Licensed under the Apache License, Version 2.0 (the "License"); -%% you may not use this file except in compliance with the License. -%% You may obtain a copy of the License at -%% -%% http://www.apache.org/licenses/LICENSE-2.0 -%% -%% Unless required by applicable law or agreed to in writing, software -%% distributed under the License is distributed on an "AS IS" BASIS, -%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -%% See the License for the specific language governing permissions and -%% limitations under the License. %%-------------------------------------------------------------------- %% @doc This supervisor manages the global worker processes needed for %% the functioning of builtin databases, and all builtin database %% attach to it. --module(emqx_ds_builtin_sup). +-module(emqx_ds_builtin_raft_sup). -behaviour(supervisor). @@ -39,7 +27,6 @@ -define(top, ?MODULE). -define(databases, emqx_ds_builtin_databases_sup). - -define(gvar_tab, emqx_ds_builtin_gvar). -record(gvar, { @@ -57,7 +44,7 @@ start_db(DB, Opts) -> ensure_top(), ChildSpec = #{ id => DB, - start => {emqx_ds_builtin_db_sup, start_db, [DB, Opts]}, + start => {emqx_ds_builtin_raft_db_sup, start_db, [DB, Opts]}, type => supervisor, shutdown => infinity }, @@ -158,5 +145,5 @@ start_databases_sup() -> %%================================================================================ ensure_top() -> - {ok, _} = emqx_ds_sup:attach_backend(builtin, {?MODULE, start_top, []}), + {ok, _} = emqx_ds_sup:attach_backend(builtin_raft, {?MODULE, start_top, []}), ok. diff --git a/apps/emqx_durable_storage/src/emqx_ds_replication_layer.erl b/apps/emqx_ds_builtin_raft/src/emqx_ds_replication_layer.erl similarity index 97% rename from apps/emqx_durable_storage/src/emqx_ds_replication_layer.erl rename to apps/emqx_ds_builtin_raft/src/emqx_ds_replication_layer.erl index 836e9df07..45f04e341 100644 --- a/apps/emqx_durable_storage/src/emqx_ds_replication_layer.erl +++ b/apps/emqx_ds_builtin_raft/src/emqx_ds_replication_layer.erl @@ -1,28 +1,17 @@ %%-------------------------------------------------------------------- %% Copyright (c) 2023-2024 EMQ Technologies Co., Ltd. All Rights Reserved. -%% -%% Licensed under the Apache License, Version 2.0 (the "License"); -%% you may not use this file except in compliance with the License. -%% You may obtain a copy of the License at -%% -%% http://www.apache.org/licenses/LICENSE-2.0 -%% -%% Unless required by applicable law or agreed to in writing, software -%% distributed under the License is distributed on an "AS IS" BASIS, -%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -%% See the License for the specific language governing permissions and -%% limitations under the License. %%-------------------------------------------------------------------- %% @doc Replication layer for DS backends that don't support %% replication on their own. -module(emqx_ds_replication_layer). --behaviour(emqx_ds). +%-behaviour(emqx_ds). -export([ list_shards/1, open_db/2, + close_db/1, add_generation/1, update_db_config/2, list_generations_with_lifetimes/1, @@ -176,7 +165,7 @@ list_shards(DB) -> -spec open_db(emqx_ds:db(), builtin_db_opts()) -> ok | {error, _}. open_db(DB, CreateOpts) -> - case emqx_ds_builtin_sup:start_db(DB, CreateOpts) of + case emqx_ds_builtin_raft_sup:start_db(DB, CreateOpts) of {ok, _} -> ok; {error, {already_started, _}} -> @@ -185,6 +174,10 @@ open_db(DB, CreateOpts) -> {error, Err} end. +-spec close_db(emqx_ds:db()) -> ok. +close_db(DB) -> + emqx_ds_builtin_raft_sup:stop_db(DB). + -spec add_generation(emqx_ds:db()) -> ok | {error, _}. add_generation(DB) -> foreach_shard( @@ -376,7 +369,7 @@ foreach_shard(DB, Fun) -> %% local server -spec current_timestamp(emqx_ds:db(), emqx_ds_replication_layer:shard_id()) -> emqx_ds:time(). current_timestamp(DB, Shard) -> - emqx_ds_builtin_sup:get_gvar(DB, ?gv_timestamp(Shard), 0). + emqx_ds_builtin_raft_sup:get_gvar(DB, ?gv_timestamp(Shard), 0). %%================================================================================ %% behavior callbacks @@ -402,7 +395,7 @@ current_timestamp(DB, Shard) -> -spec do_drop_db_v1(emqx_ds:db()) -> ok | {error, _}. do_drop_db_v1(DB) -> MyShards = emqx_ds_replication_layer_meta:my_shards(DB), - emqx_ds_builtin_sup:stop_db(DB), + emqx_ds_builtin_raft_sup:stop_db(DB), lists:foreach( fun(Shard) -> emqx_ds_storage_layer:drop_shard({DB, Shard}) @@ -874,4 +867,4 @@ handle_custom_event(DBShard, Latest, Event) -> end. set_ts({DB, Shard}, TS) -> - emqx_ds_builtin_sup:set_gvar(DB, ?gv_timestamp(Shard), TS). + emqx_ds_builtin_raft_sup:set_gvar(DB, ?gv_timestamp(Shard), TS). diff --git a/apps/emqx_durable_storage/src/emqx_ds_replication_layer.hrl b/apps/emqx_ds_builtin_raft/src/emqx_ds_replication_layer.hrl similarity index 59% rename from apps/emqx_durable_storage/src/emqx_ds_replication_layer.hrl rename to apps/emqx_ds_builtin_raft/src/emqx_ds_replication_layer.hrl index 4472b5a47..f33090c46 100644 --- a/apps/emqx_durable_storage/src/emqx_ds_replication_layer.hrl +++ b/apps/emqx_ds_builtin_raft/src/emqx_ds_replication_layer.hrl @@ -1,17 +1,5 @@ %%-------------------------------------------------------------------- %% Copyright (c) 2022, 2024 EMQ Technologies Co., Ltd. All Rights Reserved. -%% -%% Licensed under the Apache License, Version 2.0 (the "License"); -%% you may not use this file except in compliance with the License. -%% You may obtain a copy of the License at -%% -%% http://www.apache.org/licenses/LICENSE-2.0 -%% -%% Unless required by applicable law or agreed to in writing, software -%% distributed under the License is distributed on an "AS IS" BASIS, -%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -%% See the License for the specific language governing permissions and -%% limitations under the License. %%-------------------------------------------------------------------- -ifndef(EMQX_DS_REPLICATION_LAYER_HRL). -define(EMQX_DS_REPLICATION_LAYER_HRL, true). diff --git a/apps/emqx_durable_storage/src/emqx_ds_replication_layer_egress.erl b/apps/emqx_ds_builtin_raft/src/emqx_ds_replication_layer_egress.erl similarity index 95% rename from apps/emqx_durable_storage/src/emqx_ds_replication_layer_egress.erl rename to apps/emqx_ds_builtin_raft/src/emqx_ds_replication_layer_egress.erl index 1d0efca6f..ce117011c 100644 --- a/apps/emqx_durable_storage/src/emqx_ds_replication_layer_egress.erl +++ b/apps/emqx_ds_builtin_raft/src/emqx_ds_replication_layer_egress.erl @@ -1,17 +1,5 @@ %%-------------------------------------------------------------------- %% Copyright (c) 2023-2024 EMQ Technologies Co., Ltd. All Rights Reserved. -%% -%% Licensed under the Apache License, Version 2.0 (the "License"); -%% you may not use this file except in compliance with the License. -%% You may obtain a copy of the License at -%% -%% http://www.apache.org/licenses/LICENSE-2.0 -%% -%% Unless required by applicable law or agreed to in writing, software -%% distributed under the License is distributed on an "AS IS" BASIS, -%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -%% See the License for the specific language governing permissions and -%% limitations under the License. %%-------------------------------------------------------------------- %% @doc Egress servers are responsible for proxing the outcoming diff --git a/apps/emqx_durable_storage/src/emqx_ds_replication_layer_meta.erl b/apps/emqx_ds_builtin_raft/src/emqx_ds_replication_layer_meta.erl similarity index 97% rename from apps/emqx_durable_storage/src/emqx_ds_replication_layer_meta.erl rename to apps/emqx_ds_builtin_raft/src/emqx_ds_replication_layer_meta.erl index 09e24e23f..2348d7c2d 100644 --- a/apps/emqx_durable_storage/src/emqx_ds_replication_layer_meta.erl +++ b/apps/emqx_ds_builtin_raft/src/emqx_ds_replication_layer_meta.erl @@ -1,17 +1,5 @@ %%-------------------------------------------------------------------- %% Copyright (c) 2023-2024 EMQ Technologies Co., Ltd. All Rights Reserved. -%% -%% Licensed under the Apache License, Version 2.0 (the "License"); -%% you may not use this file except in compliance with the License. -%% You may obtain a copy of the License at -%% -%% http://www.apache.org/licenses/LICENSE-2.0 -%% -%% Unless required by applicable law or agreed to in writing, software -%% distributed under the License is distributed on an "AS IS" BASIS, -%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -%% See the License for the specific language governing permissions and -%% limitations under the License. %%-------------------------------------------------------------------- %% @doc Metadata storage for the builtin sharded database. @@ -678,7 +666,7 @@ ensure_tables() -> ok = mria:wait_for_tables([?META_TAB, ?NODE_TAB, ?SHARD_TAB]). ensure_site() -> - Filename = filename:join(emqx_ds:base_dir(), "emqx_ds_builtin_site.eterm"), + Filename = filename:join(emqx_ds_storage_layer:base_dir(), "emqx_ds_builtin_site.eterm"), case file:consult(Filename) of {ok, [Site]} -> ok; diff --git a/apps/emqx_durable_storage/src/emqx_ds_replication_layer_shard.erl b/apps/emqx_ds_builtin_raft/src/emqx_ds_replication_layer_shard.erl similarity index 96% rename from apps/emqx_durable_storage/src/emqx_ds_replication_layer_shard.erl rename to apps/emqx_ds_builtin_raft/src/emqx_ds_replication_layer_shard.erl index 1070fbde0..b43373c43 100644 --- a/apps/emqx_durable_storage/src/emqx_ds_replication_layer_shard.erl +++ b/apps/emqx_ds_builtin_raft/src/emqx_ds_replication_layer_shard.erl @@ -1,17 +1,5 @@ %%-------------------------------------------------------------------- %% Copyright (c) 2024 EMQ Technologies Co., Ltd. All Rights Reserved. -%% -%% Licensed under the Apache License, Version 2.0 (the "License"); -%% you may not use this file except in compliance with the License. -%% You may obtain a copy of the License at -%% -%% http://www.apache.org/licenses/LICENSE-2.0 -%% -%% Unless required by applicable law or agreed to in writing, software -%% distributed under the License is distributed on an "AS IS" BASIS, -%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -%% See the License for the specific language governing permissions and -%% limitations under the License. %%-------------------------------------------------------------------- -module(emqx_ds_replication_layer_shard). diff --git a/apps/emqx_durable_storage/src/emqx_ds_replication_shard_allocator.erl b/apps/emqx_ds_builtin_raft/src/emqx_ds_replication_shard_allocator.erl similarity index 95% rename from apps/emqx_durable_storage/src/emqx_ds_replication_shard_allocator.erl rename to apps/emqx_ds_builtin_raft/src/emqx_ds_replication_shard_allocator.erl index cbaafc718..fa6814572 100644 --- a/apps/emqx_durable_storage/src/emqx_ds_replication_shard_allocator.erl +++ b/apps/emqx_ds_builtin_raft/src/emqx_ds_replication_shard_allocator.erl @@ -1,17 +1,5 @@ %%-------------------------------------------------------------------- %% Copyright (c) 2024 EMQ Technologies Co., Ltd. All Rights Reserved. -%% -%% Licensed under the Apache License, Version 2.0 (the "License"); -%% you may not use this file except in compliance with the License. -%% You may obtain a copy of the License at -%% -%% http://www.apache.org/licenses/LICENSE-2.0 -%% -%% Unless required by applicable law or agreed to in writing, software -%% distributed under the License is distributed on an "AS IS" BASIS, -%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -%% See the License for the specific language governing permissions and -%% limitations under the License. %%-------------------------------------------------------------------- -module(emqx_ds_replication_shard_allocator). @@ -297,7 +285,7 @@ trans_drop_local(DB, Shard, {del, Site}) -> do_drop_local(DB, Shard) -> case emqx_ds_replication_layer_shard:drop_local_server(DB, Shard) of ok -> - ok = emqx_ds_builtin_db_sup:stop_shard({DB, Shard}), + ok = emqx_ds_builtin_raft_db_sup:stop_shard({DB, Shard}), ok = emqx_ds_storage_layer:drop_shard({DB, Shard}), logger:info(#{msg => "Local shard replica dropped"}); {error, recoverable, Reason} -> @@ -428,7 +416,7 @@ start_shards(DB, Shards) -> lists:foreach(fun(Shard) -> start_shard(DB, Shard) end, Shards). start_shard(DB, Shard) -> - ok = emqx_ds_builtin_db_sup:ensure_shard({DB, Shard}), + ok = emqx_ds_builtin_raft_db_sup:ensure_shard({DB, Shard}), ok = logger:info(#{msg => "Shard started", shard => Shard}), ok. @@ -436,7 +424,7 @@ start_egresses(DB, Shards) -> lists:foreach(fun(Shard) -> start_egress(DB, Shard) end, Shards). start_egress(DB, Shard) -> - ok = emqx_ds_builtin_db_sup:ensure_egress({DB, Shard}), + ok = emqx_ds_builtin_raft_db_sup:ensure_egress({DB, Shard}), ok = logger:info(#{msg => "Egress started", shard => Shard}), ok. diff --git a/apps/emqx_durable_storage/src/emqx_ds_replication_snapshot.erl b/apps/emqx_ds_builtin_raft/src/emqx_ds_replication_snapshot.erl similarity index 98% rename from apps/emqx_durable_storage/src/emqx_ds_replication_snapshot.erl rename to apps/emqx_ds_builtin_raft/src/emqx_ds_replication_snapshot.erl index c90c71688..9267aee77 100644 --- a/apps/emqx_durable_storage/src/emqx_ds_replication_snapshot.erl +++ b/apps/emqx_ds_builtin_raft/src/emqx_ds_replication_snapshot.erl @@ -195,7 +195,7 @@ start_snapshot_writer(WS) -> msg => "dsrepl_snapshot_write_started", shard => ShardId }), - _ = emqx_ds_builtin_db_sup:terminate_storage(ShardId), + _ = emqx_ds_builtin_raft_db_sup:terminate_storage(ShardId), {ok, SnapWriter} = emqx_ds_storage_layer:accept_snapshot(ShardId), {ok, WS#ws{phase = storage_snapshot, writer = SnapWriter}}. @@ -223,7 +223,7 @@ complete_accept(WS = #ws{started_at = StartedAt, writer = SnapWriter}) -> duration_ms => erlang:monotonic_time(millisecond) - StartedAt, bytes_written => emqx_ds_storage_snapshot:writer_info(bytes_written, SnapWriter) }), - {ok, _} = emqx_ds_builtin_db_sup:restart_storage(ShardId), + {ok, _} = emqx_ds_builtin_raft_db_sup:restart_storage(ShardId), write_machine_snapshot(WS). write_machine_snapshot(#ws{dir = Dir, meta = Meta, state = MachineState}) -> diff --git a/apps/emqx_durable_storage/src/proto/emqx_ds_proto_v1.erl b/apps/emqx_ds_builtin_raft/src/proto/emqx_ds_proto_v1.erl similarity index 82% rename from apps/emqx_durable_storage/src/proto/emqx_ds_proto_v1.erl rename to apps/emqx_ds_builtin_raft/src/proto/emqx_ds_proto_v1.erl index 77d5693d5..d2c4e1c0a 100644 --- a/apps/emqx_durable_storage/src/proto/emqx_ds_proto_v1.erl +++ b/apps/emqx_ds_builtin_raft/src/proto/emqx_ds_proto_v1.erl @@ -1,17 +1,5 @@ %%-------------------------------------------------------------------- %% Copyright (c) 2023-2024 EMQ Technologies Co., Ltd. All Rights Reserved. -%% -%% Licensed under the Apache License, Version 2.0 (the "License"); -%% you may not use this file except in compliance with the License. -%% You may obtain a copy of the License at -%% -%% http://www.apache.org/licenses/LICENSE-2.0 -%% -%% Unless required by applicable law or agreed to in writing, software -%% distributed under the License is distributed on an "AS IS" BASIS, -%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -%% See the License for the specific language governing permissions and -%% limitations under the License. %%-------------------------------------------------------------------- -module(emqx_ds_proto_v1). diff --git a/apps/emqx_durable_storage/src/proto/emqx_ds_proto_v2.erl b/apps/emqx_ds_builtin_raft/src/proto/emqx_ds_proto_v2.erl similarity index 86% rename from apps/emqx_durable_storage/src/proto/emqx_ds_proto_v2.erl rename to apps/emqx_ds_builtin_raft/src/proto/emqx_ds_proto_v2.erl index 836bfbc86..259e9cb4e 100644 --- a/apps/emqx_durable_storage/src/proto/emqx_ds_proto_v2.erl +++ b/apps/emqx_ds_builtin_raft/src/proto/emqx_ds_proto_v2.erl @@ -1,17 +1,5 @@ %%-------------------------------------------------------------------- %% Copyright (c) 2023-2024 EMQ Technologies Co., Ltd. All Rights Reserved. -%% -%% Licensed under the Apache License, Version 2.0 (the "License"); -%% you may not use this file except in compliance with the License. -%% You may obtain a copy of the License at -%% -%% http://www.apache.org/licenses/LICENSE-2.0 -%% -%% Unless required by applicable law or agreed to in writing, software -%% distributed under the License is distributed on an "AS IS" BASIS, -%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -%% See the License for the specific language governing permissions and -%% limitations under the License. %%-------------------------------------------------------------------- -module(emqx_ds_proto_v2). diff --git a/apps/emqx_durable_storage/src/proto/emqx_ds_proto_v3.erl b/apps/emqx_ds_builtin_raft/src/proto/emqx_ds_proto_v3.erl similarity index 88% rename from apps/emqx_durable_storage/src/proto/emqx_ds_proto_v3.erl rename to apps/emqx_ds_builtin_raft/src/proto/emqx_ds_proto_v3.erl index f55ef9fb2..4c49906da 100644 --- a/apps/emqx_durable_storage/src/proto/emqx_ds_proto_v3.erl +++ b/apps/emqx_ds_builtin_raft/src/proto/emqx_ds_proto_v3.erl @@ -1,17 +1,5 @@ %%-------------------------------------------------------------------- %% Copyright (c) 2024 EMQ Technologies Co., Ltd. All Rights Reserved. -%% -%% Licensed under the Apache License, Version 2.0 (the "License"); -%% you may not use this file except in compliance with the License. -%% You may obtain a copy of the License at -%% -%% http://www.apache.org/licenses/LICENSE-2.0 -%% -%% Unless required by applicable law or agreed to in writing, software -%% distributed under the License is distributed on an "AS IS" BASIS, -%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -%% See the License for the specific language governing permissions and -%% limitations under the License. %%-------------------------------------------------------------------- -module(emqx_ds_proto_v3). diff --git a/apps/emqx_durable_storage/src/proto/emqx_ds_proto_v4.erl b/apps/emqx_ds_builtin_raft/src/proto/emqx_ds_proto_v4.erl similarity index 90% rename from apps/emqx_durable_storage/src/proto/emqx_ds_proto_v4.erl rename to apps/emqx_ds_builtin_raft/src/proto/emqx_ds_proto_v4.erl index 73285247f..9f66f2c32 100644 --- a/apps/emqx_durable_storage/src/proto/emqx_ds_proto_v4.erl +++ b/apps/emqx_ds_builtin_raft/src/proto/emqx_ds_proto_v4.erl @@ -1,17 +1,5 @@ %%-------------------------------------------------------------------- %% Copyright (c) 2024 EMQ Technologies Co., Ltd. All Rights Reserved. -%% -%% Licensed under the Apache License, Version 2.0 (the "License"); -%% you may not use this file except in compliance with the License. -%% You may obtain a copy of the License at -%% -%% http://www.apache.org/licenses/LICENSE-2.0 -%% -%% Unless required by applicable law or agreed to in writing, software -%% distributed under the License is distributed on an "AS IS" BASIS, -%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -%% See the License for the specific language governing permissions and -%% limitations under the License. %%-------------------------------------------------------------------- -module(emqx_ds_proto_v4). diff --git a/apps/emqx_durable_storage/test/emqx_ds_replication_SUITE.erl b/apps/emqx_ds_builtin_raft/test/emqx_ds_replication_SUITE.erl similarity index 81% rename from apps/emqx_durable_storage/test/emqx_ds_replication_SUITE.erl rename to apps/emqx_ds_builtin_raft/test/emqx_ds_replication_SUITE.erl index 1b2a21105..e84abb78b 100644 --- a/apps/emqx_durable_storage/test/emqx_ds_replication_SUITE.erl +++ b/apps/emqx_ds_builtin_raft/test/emqx_ds_replication_SUITE.erl @@ -35,7 +35,7 @@ opts() -> opts(Overrides) -> maps:merge( #{ - backend => builtin, + backend => builtin_raft, %% storage => {emqx_ds_storage_reference, #{}}, storage => {emqx_ds_storage_bitfield_lts, #{epoch_bits => 10}}, n_shards => 16, @@ -56,8 +56,52 @@ appspec(emqx_durable_storage) -> override_env => [{egress_flush_interval, 1}] }}. +t_metadata(init, Config) -> + emqx_cth_suite:start([emqx_ds_builtin_raft], #{ + work_dir => emqx_cth_suite:work_dir(?FUNCTION_NAME, Config) + }), + Config; +t_metadata('end', Config) -> + emqx_cth_suite:stop([emqx_ds_builtin_raft]), + Config. + +t_metadata(_Config) -> + DB = ?FUNCTION_NAME, + NShards = 1, + Options = #{ + backend => builtin_raft, + storage => {emqx_ds_storage_reference, #{}}, + n_shards => NShards, + n_sites => 1, + replication_factor => 1, + replication_options => #{} + }, + try + ?assertMatch(ok, emqx_ds:open_db(DB, Options)), + %% Check metadata: + %% We have only one site: + [Site] = emqx_ds_replication_layer_meta:sites(), + %% Check all shards: + Shards = emqx_ds_replication_layer_meta:shards(DB), + %% Since there is only one site all shards should be allocated + %% to this site: + MyShards = emqx_ds_replication_layer_meta:my_shards(DB), + ?assertEqual(NShards, length(Shards)), + lists:foreach( + fun(Shard) -> + ?assertEqual( + [Site], emqx_ds_replication_layer_meta:replica_set(DB, Shard) + ) + end, + Shards + ), + ?assertEqual(lists:sort(Shards), lists:sort(MyShards)) + after + ?assertMatch(ok, emqx_ds:drop_db(DB)) + end. + t_replication_transfers_snapshots(init, Config) -> - Apps = [appspec(emqx_durable_storage)], + Apps = [appspec(emqx_durable_storage), emqx_ds_builtin_raft], NodeSpecs = emqx_cth_cluster:mk_nodespecs( [ {t_replication_transfers_snapshots1, #{apps => Apps}}, @@ -130,7 +174,7 @@ t_replication_transfers_snapshots(Config) -> ). t_rebalance(init, Config) -> - Apps = [appspec(emqx_durable_storage)], + Apps = [appspec(emqx_durable_storage), emqx_ds_builtin_raft], Nodes = emqx_cth_cluster:start( [ {t_rebalance1, #{apps => Apps}}, @@ -260,7 +304,7 @@ t_rebalance(Config) -> ). t_join_leave_errors(init, Config) -> - Apps = [appspec(emqx_durable_storage)], + Apps = [appspec(emqx_durable_storage), emqx_ds_builtin_raft], Nodes = emqx_cth_cluster:start( [ {t_join_leave_errors1, #{apps => Apps}}, @@ -322,7 +366,7 @@ t_join_leave_errors(Config) -> ?assertEqual([], emqx_ds_test_helpers:transitions(N1, ?DB)). t_rebalance_chaotic_converges(init, Config) -> - Apps = [appspec(emqx_durable_storage)], + Apps = [appspec(emqx_durable_storage), emqx_ds_builtin_raft], Nodes = emqx_cth_cluster:start( [ {t_rebalance_chaotic_converges1, #{apps => Apps}}, @@ -418,7 +462,7 @@ t_rebalance_chaotic_converges(Config) -> ). t_rebalance_offline_restarts(init, Config) -> - Apps = [appspec(emqx_durable_storage)], + Apps = [appspec(emqx_durable_storage), emqx_ds_builtin_raft], Specs = emqx_cth_cluster:mk_nodespecs( [ {t_rebalance_offline_restarts1, #{apps => Apps}}, @@ -435,6 +479,7 @@ t_rebalance_offline_restarts('end', Config) -> t_rebalance_offline_restarts(Config) -> %% This testcase verifies that rebalancing progresses if nodes restart or %% go offline and never come back. + ok = snabbkaffe:start_trace(), Nodes = [N1, N2, N3] = ?config(nodes, Config), _Specs = [NS1, NS2, _] = ?config(nodespecs, Config), @@ -477,7 +522,7 @@ t_rebalance_offline_restarts(Config) -> ?assertEqual(lists:sort([S1, S2]), ds_repl_meta(N1, db_sites, [?DB])). t_drop_generation(Config) -> - Apps = [appspec(emqx_durable_storage)], + Apps = [appspec(emqx_durable_storage), emqx_ds_builtin_raft], [_, _, NS3] = NodeSpecs = emqx_cth_cluster:mk_nodespecs( [ @@ -554,6 +599,105 @@ t_drop_generation(Config) -> end ). +t_error_mapping_replication_layer(init, Config) -> + emqx_cth_suite:start([emqx_ds_builtin_raft], #{ + work_dir => emqx_cth_suite:work_dir(?FUNCTION_NAME, Config) + }), + Config; +t_error_mapping_replication_layer('end', Config) -> + emqx_cth_suite:stop([emqx_ds_builtin_raft]), + Config. + +t_error_mapping_replication_layer(_Config) -> + %% This checks that the replication layer maps recoverable errors correctly. + + ok = emqx_ds_test_helpers:mock_rpc(), + ok = snabbkaffe:start_trace(), + + DB = ?FUNCTION_NAME, + ?assertMatch(ok, emqx_ds:open_db(DB, (opts())#{n_shards => 2})), + [Shard1, Shard2] = emqx_ds_replication_layer_meta:shards(DB), + + TopicFilter = emqx_topic:words(<<"foo/#">>), + Msgs = [ + message(<<"C1">>, <<"foo/bar">>, <<"1">>, 0), + message(<<"C1">>, <<"foo/baz">>, <<"2">>, 1), + message(<<"C2">>, <<"foo/foo">>, <<"3">>, 2), + message(<<"C3">>, <<"foo/xyz">>, <<"4">>, 3), + message(<<"C4">>, <<"foo/bar">>, <<"5">>, 4), + message(<<"C5">>, <<"foo/oof">>, <<"6">>, 5) + ], + + ?assertMatch(ok, emqx_ds:store_batch(DB, Msgs)), + + ?block_until(#{?snk_kind := emqx_ds_replication_layer_egress_flush, shard := Shard1}), + ?block_until(#{?snk_kind := emqx_ds_replication_layer_egress_flush, shard := Shard2}), + + Streams0 = emqx_ds:get_streams(DB, TopicFilter, 0), + Iterators0 = lists:map( + fun({_Rank, S}) -> + {ok, Iter} = emqx_ds:make_iterator(DB, S, TopicFilter, 0), + Iter + end, + Streams0 + ), + + %% Disrupt the link to the second shard. + ok = emqx_ds_test_helpers:mock_rpc_result( + fun(_Node, emqx_ds_replication_layer, _Function, Args) -> + case Args of + [DB, Shard1 | _] -> passthrough; + [DB, Shard2 | _] -> unavailable + end + end + ), + + %% Result of `emqx_ds:get_streams/3` will just contain partial results, not an error. + Streams1 = emqx_ds:get_streams(DB, TopicFilter, 0), + ?assert( + length(Streams1) > 0 andalso length(Streams1) =< length(Streams0), + Streams1 + ), + + %% At least one of `emqx_ds:make_iterator/4` will end in an error. + Results1 = lists:map( + fun({_Rank, S}) -> + case emqx_ds:make_iterator(DB, S, TopicFilter, 0) of + Ok = {ok, _Iter} -> + Ok; + Error = {error, recoverable, {erpc, _}} -> + Error; + Other -> + ct:fail({unexpected_result, Other}) + end + end, + Streams0 + ), + ?assert( + length([error || {error, _, _} <- Results1]) > 0, + Results1 + ), + + %% At least one of `emqx_ds:next/3` over initial set of iterators will end in an error. + Results2 = lists:map( + fun(Iter) -> + case emqx_ds:next(DB, Iter, _BatchSize = 42) of + Ok = {ok, _Iter, [_ | _]} -> + Ok; + Error = {error, recoverable, {badrpc, _}} -> + Error; + Other -> + ct:fail({unexpected_result, Other}) + end + end, + Iterators0 + ), + ?assert( + length([error || {error, _, _} <- Results2]) > 0, + Results2 + ), + meck:unload(). + %% shard_server_info(Node, DB, Shard, Site, Info) -> @@ -583,7 +727,7 @@ shards(Node, DB) -> erpc:call(Node, emqx_ds_replication_layer_meta, shards, [DB]). shards_online(Node, DB) -> - erpc:call(Node, emqx_ds_builtin_db_sup, which_shards, [DB]). + erpc:call(Node, emqx_ds_builtin_raft_db_sup, which_shards, [DB]). n_shards_online(Node, DB) -> length(shards_online(Node, DB)). @@ -635,7 +779,6 @@ all() -> emqx_common_test_helpers:all(?MODULE). init_per_testcase(TCName, Config0) -> Config = emqx_common_test_helpers:init_per_testcase(?MODULE, TCName, Config0), - ok = snabbkaffe:start_trace(), Config. end_per_testcase(TCName, Config) -> diff --git a/apps/emqx_durable_storage/README.md b/apps/emqx_durable_storage/README.md index 362ad47a3..739cbdc64 100644 --- a/apps/emqx_durable_storage/README.md +++ b/apps/emqx_durable_storage/README.md @@ -103,7 +103,7 @@ Consumption of messages is done in several stages: # Documentation links -TBD +https://docs.emqx.com/en/enterprise/latest/durability/durability_introduction.html # Usage @@ -146,7 +146,39 @@ The following REST APIs are available for managing the builtin durable storages: - `/ds/storages/:ds/replicas/:site` — add or remove replica of the durable storage on the site # Other -TBD + +Note: this application contains main interface module and some common utility modules used by the backends, but it doesn't contain any ready-to-use DS backends. +The backends are instead implemented as separate OTP applications, such as `emqx_ds_backend_local` and `emqx_ds_backend_raft`. + +There is a helper placeholder application `emqx_ds_backends` that depends on all backend applications available in the release. +Business logic applications must have `emqx_ds_backends` as a dependency. + +The dependency diagram is the following: + +``` + +------------------------+ + | emqx_durable_storage | + +------------------------+ + / | \ + / | \ + / | \ + +------------------------+ +----------------------+ +------+ + | emqx_ds_backend_local | | emqx_ds_builtin_raft | | ... | + +------------------------+ +-----------+----------+ +------+ + \ | / + \ | / + \ | / + +-------------------------+ + | emqx_ds_backends | + +-------------------------+ + / \ + / \ + ......................../.. business apps .\........................ + / \ + +------+ +-------+ + | emqx | | ... | + +------+ +-------+ +``` # Contributing Please see our [contributing.md](../../CONTRIBUTING.md). diff --git a/apps/emqx_durable_storage/src/emqx_ds.erl b/apps/emqx_durable_storage/src/emqx_ds.erl index 81efa6c5b..c2f1e7eb3 100644 --- a/apps/emqx_durable_storage/src/emqx_ds.erl +++ b/apps/emqx_durable_storage/src/emqx_ds.erl @@ -175,8 +175,7 @@ _ => _ }. --type create_db_opts() :: - emqx_ds_replication_layer:builtin_db_opts() | generic_db_opts(). +-type create_db_opts() :: generic_db_opts(). -type message_id() :: emqx_ds_replication_layer:message_id(). diff --git a/apps/emqx_machine/priv/reboot_lists.eterm b/apps/emqx_machine/priv/reboot_lists.eterm index 62d357c19..c3311c09b 100644 --- a/apps/emqx_machine/priv/reboot_lists.eterm +++ b/apps/emqx_machine/priv/reboot_lists.eterm @@ -135,7 +135,8 @@ emqx_bridge_confluent, emqx_ds_shared_sub, emqx_auth_ext, - emqx_cluster_link + emqx_cluster_link, + emqx_ds_builtin_raft ], %% must always be of type `load' ce_business_apps => diff --git a/changes/ce/breaking-13248.en.md b/changes/ce/breaking-13248.en.md new file mode 100644 index 000000000..9f2ad2bd8 --- /dev/null +++ b/changes/ce/breaking-13248.en.md @@ -0,0 +1,7 @@ +`builtin` durable storage backend has been replaced with the following two backends: + +- `builtin_local`: A durable storage backend that doesn't support replication. + It can't be used in a multi-node cluster. + This backend is available in both open source and enterprise editions. +- `builtin_raft`: A durable storage backend that uses Raft algorithm for replication. + This backend is available enterprise edition. diff --git a/mix.exs b/mix.exs index 0c1168c61..6a7e6bda7 100644 --- a/mix.exs +++ b/mix.exs @@ -205,7 +205,8 @@ defmodule EMQXUmbrella.MixProject do :emqx_bridge_syskeeper, :emqx_ds_shared_sub, :emqx_auth_ext, - :emqx_cluster_link + :emqx_cluster_link, + :emqx_ds_builtin_raft ]) end @@ -341,6 +342,8 @@ defmodule EMQXUmbrella.MixProject do :emqx_s3, :emqx_opentelemetry, :emqx_durable_storage, + :emqx_ds_builtin_local, + :emqx_ds_builtin_raft, :rabbit_common, :emqx_eviction_agent, :emqx_node_rebalance diff --git a/rebar.config.erl b/rebar.config.erl index 493955670..2cfa8c986 100644 --- a/rebar.config.erl +++ b/rebar.config.erl @@ -124,6 +124,7 @@ is_community_umbrella_app("apps/emqx_node_rebalance") -> false; is_community_umbrella_app("apps/emqx_ds_shared_sub") -> false; is_community_umbrella_app("apps/emqx_auth_ext") -> false; is_community_umbrella_app("apps/emqx_cluster_link") -> false; +is_community_umbrella_app("apps/emqx_ds_builtin_raft") -> false; is_community_umbrella_app(_) -> true. %% BUILD_WITHOUT_JQ From 279619fc8014c4fb6855284a860ab8897f4727a4 Mon Sep 17 00:00:00 2001 From: ieQu1 <99872536+ieQu1@users.noreply.github.com> Date: Thu, 13 Jun 2024 15:31:40 +0200 Subject: [PATCH 05/26] feat(ds): Add `builtin_local' backend --- apps/emqx_ds_builtin_local/README.md | 32 ++ apps/emqx_ds_builtin_local/rebar.config | 5 + .../src/emqx_ds_builtin_local.app.src | 11 + .../src/emqx_ds_builtin_local.erl | 372 +++++++++++++++ .../src/emqx_ds_builtin_local_app.erl | 39 ++ .../src/emqx_ds_builtin_local_db_sup.erl | 219 +++++++++ .../src/emqx_ds_builtin_local_meta.erl | 204 +++++++++ .../src/emqx_ds_builtin_local_sup.erl | 133 ++++++ .../test/emqx_ds_builtin_local_SUITE.erl | 346 ++++++++++++++ .../src/emqx_ds_buffer.erl | 423 ++++++++++++++++++ .../emqx_ds_storage_bitfield_lts_SUITE.erl | 19 +- 11 files changed, 1792 insertions(+), 11 deletions(-) create mode 100644 apps/emqx_ds_builtin_local/README.md create mode 100644 apps/emqx_ds_builtin_local/rebar.config create mode 100644 apps/emqx_ds_builtin_local/src/emqx_ds_builtin_local.app.src create mode 100644 apps/emqx_ds_builtin_local/src/emqx_ds_builtin_local.erl create mode 100644 apps/emqx_ds_builtin_local/src/emqx_ds_builtin_local_app.erl create mode 100644 apps/emqx_ds_builtin_local/src/emqx_ds_builtin_local_db_sup.erl create mode 100644 apps/emqx_ds_builtin_local/src/emqx_ds_builtin_local_meta.erl create mode 100644 apps/emqx_ds_builtin_local/src/emqx_ds_builtin_local_sup.erl create mode 100644 apps/emqx_ds_builtin_local/test/emqx_ds_builtin_local_SUITE.erl create mode 100644 apps/emqx_durable_storage/src/emqx_ds_buffer.erl diff --git a/apps/emqx_ds_builtin_local/README.md b/apps/emqx_ds_builtin_local/README.md new file mode 100644 index 000000000..fec609493 --- /dev/null +++ b/apps/emqx_ds_builtin_local/README.md @@ -0,0 +1,32 @@ +# Local Backend for EMQX Durable Storage + +# Features + +This backend uses local RocksDB database to store data. + +# Limitation + +This backend cannot be used in a clustered EMQX setup. + +# Documentation links + +TBD + +# Usage + +TBD + +# Configurations + +TBD + +# HTTP APIs + +TBD + +# Other + +TBD + +# Contributing +Please see our [contributing.md](../../CONTRIBUTING.md). diff --git a/apps/emqx_ds_builtin_local/rebar.config b/apps/emqx_ds_builtin_local/rebar.config new file mode 100644 index 000000000..d70aa75e0 --- /dev/null +++ b/apps/emqx_ds_builtin_local/rebar.config @@ -0,0 +1,5 @@ +%% -*- mode:erlang -*- + +{deps, [ + {emqx_durable_storage, {path, "../emqx_durable_storage"}} +]}. diff --git a/apps/emqx_ds_builtin_local/src/emqx_ds_builtin_local.app.src b/apps/emqx_ds_builtin_local/src/emqx_ds_builtin_local.app.src new file mode 100644 index 000000000..e7531ae45 --- /dev/null +++ b/apps/emqx_ds_builtin_local/src/emqx_ds_builtin_local.app.src @@ -0,0 +1,11 @@ +%% -*- mode: erlang -*- +{application, emqx_ds_builtin_local, [ + {description, "A DS backend that stores all data locally and thus doesn't support clustering."}, + % strict semver, bump manually! + {vsn, "0.1.0"}, + {modules, []}, + {registered, []}, + {applications, [kernel, stdlib, rocksdb, emqx_durable_storage, emqx_utils]}, + {mod, {emqx_ds_builtin_local_app, []}}, + {env, []} +]}. diff --git a/apps/emqx_ds_builtin_local/src/emqx_ds_builtin_local.erl b/apps/emqx_ds_builtin_local/src/emqx_ds_builtin_local.erl new file mode 100644 index 000000000..d7e5972ab --- /dev/null +++ b/apps/emqx_ds_builtin_local/src/emqx_ds_builtin_local.erl @@ -0,0 +1,372 @@ +%%-------------------------------------------------------------------- +%% Copyright (c) 2024 EMQ Technologies Co., Ltd. All Rights Reserved. +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. +%%-------------------------------------------------------------------- +-module(emqx_ds_builtin_local). + +-behaviour(emqx_ds). +-behaviour(emqx_ds_buffer). + +%% API: +-export([]). + +%% behavior callbacks: +-export([ + %% `emqx_ds': + open_db/2, + close_db/1, + add_generation/1, + update_db_config/2, + list_generations_with_lifetimes/1, + drop_generation/2, + drop_db/1, + store_batch/3, + get_streams/3, + get_delete_streams/3, + make_iterator/4, + make_delete_iterator/4, + update_iterator/3, + next/3, + delete_next/4, + + %% `emqx_ds_buffer': + init_buffer/3, + flush_buffer/4, + shard_of_message/4 +]). + +-export_type([db_opts/0, shard/0, iterator/0, delete_iterator/0]). + +-include_lib("emqx_utils/include/emqx_message.hrl"). + +%%================================================================================ +%% Type declarations +%%================================================================================ + +-define(tag, 1). +-define(shard, 2). +-define(enc, 3). + +-define(IT, 61). +-define(DELETE_IT, 62). + +-type shard() :: binary(). + +-opaque iterator() :: + #{ + ?tag := ?IT, + ?shard := shard(), + ?enc := term() + }. + +-opaque delete_iterator() :: + #{ + ?tag := ?DELETE_IT, + ?shard := shard(), + ?enc := term() + }. + +-type db_opts() :: + #{ + backend := builtin_local, + storage := emqx_ds_storage_layer:prototype(), + n_shards := pos_integer() + }. + +-type generation_rank() :: {shard(), emqx_ds_storage_layer:gen_id()}. + +-define(stream(SHARD, INNER), [2, SHARD | INNER]). +-define(delete_stream(SHARD, INNER), [3, SHARD | INNER]). + +%%================================================================================ +%% API functions +%%================================================================================ + +%%================================================================================ +%% behavior callbacks +%%================================================================================ + +-spec open_db(emqx_ds:db(), db_opts()) -> ok | {error, _}. +open_db(DB, CreateOpts) -> + case emqx_ds_builtin_local_sup:start_db(DB, CreateOpts) of + {ok, _} -> + ok; + {error, {already_started, _}} -> + ok; + {error, Err} -> + {error, Err} + end. + +-spec close_db(emqx_ds:db()) -> ok. +close_db(DB) -> + emqx_ds_builtin_local_sup:stop_db(DB). + +-spec add_generation(emqx_ds:db()) -> ok | {error, _}. +add_generation(DB) -> + Shards = emqx_ds_builtin_local_meta:shards(DB), + Errors = lists:filtermap( + fun(Shard) -> + ShardId = {DB, Shard}, + case + emqx_ds_storage_layer:add_generation( + ShardId, emqx_ds_builtin_local_meta:ensure_monotonic_timestamp(ShardId) + ) + of + ok -> + false; + Error -> + {true, {Shard, Error}} + end + end, + Shards + ), + case Errors of + [] -> ok; + _ -> {error, Errors} + end. + +-spec update_db_config(emqx_ds:db(), db_opts()) -> ok | {error, _}. +update_db_config(DB, CreateOpts) -> + Opts = #{} = emqx_ds_builtin_local_meta:update_db_config(DB, CreateOpts), + lists:foreach( + fun(Shard) -> + ShardId = {DB, Shard}, + emqx_ds_storage_layer:update_config( + ShardId, emqx_ds_builtin_local_meta:ensure_monotonic_timestamp(ShardId), Opts + ) + end, + emqx_ds_builtin_local_meta:shards(DB) + ). + +-spec list_generations_with_lifetimes(emqx_ds:db()) -> + #{emqx_ds:generation_rank() => emqx_ds:generation_info()}. +list_generations_with_lifetimes(DB) -> + lists:foldl( + fun(Shard, Acc) -> + maps:fold( + fun(GenId, Data, Acc1) -> + Acc1#{{Shard, GenId} => Data} + end, + Acc, + emqx_ds_storage_layer:list_generations_with_lifetimes({DB, Shard}) + ) + end, + #{}, + emqx_ds_builtin_local_meta:shards(DB) + ). + +-spec drop_generation(emqx_ds:db(), generation_rank()) -> ok | {error, _}. +drop_generation(DB, {Shard, GenId}) -> + emqx_ds_storage_layer:drop_generation({DB, Shard}, GenId). + +-spec drop_db(emqx_ds:db()) -> ok | {error, _}. +drop_db(DB) -> + close_db(DB), + lists:foreach( + fun(Shard) -> + emqx_ds_storage_layer:drop_shard({DB, Shard}) + end, + emqx_ds_builtin_local_meta:shards(DB) + ), + emqx_ds_builtin_local_meta:drop_db(DB). + +-spec store_batch(emqx_ds:db(), [emqx_types:message()], emqx_ds:message_store_opts()) -> + emqx_ds:store_batch_result(). +store_batch(DB, Messages, Opts) -> + try + emqx_ds_buffer:store_batch(DB, Messages, Opts) + catch + error:{Reason, _Call} when Reason == timeout; Reason == noproc -> + {error, recoverable, Reason} + end. + +-record(bs, {options :: term()}). +-type buffer_state() :: #bs{}. + +-spec init_buffer(emqx_ds:db(), shard(), _Options) -> {ok, buffer_state()}. +init_buffer(DB, Shard, Options) -> + ShardId = {DB, Shard}, + case current_timestamp(ShardId) of + undefined -> + Latest = erlang:system_time(microsecond), + emqx_ds_builtin_local_meta:set_current_timestamp(ShardId, Latest); + _Latest -> + ok + end, + {ok, #bs{options = Options}}. + +-spec flush_buffer(emqx_ds:db(), shard(), [emqx_types:message()], buffer_state()) -> + {buffer_state(), emqx_ds:store_batch_result()}. +flush_buffer(DB, Shard, Messages, S0 = #bs{options = Options}) -> + {Latest, Batch} = assign_timestamps(current_timestamp({DB, Shard}), Messages), + Result = emqx_ds_storage_layer:store_batch({DB, Shard}, Batch, Options), + emqx_ds_builtin_local_meta:set_current_timestamp({DB, Shard}, Latest), + {S0, Result}. + +assign_timestamps(Latest, Messages) -> + assign_timestamps(Latest, Messages, []). + +assign_timestamps(Latest, [MessageIn | Rest], Acc) -> + case emqx_message:timestamp(MessageIn, microsecond) of + TimestampUs when TimestampUs > Latest -> + Message = assign_timestamp(TimestampUs, MessageIn), + assign_timestamps(TimestampUs, Rest, [Message | Acc]); + _Earlier -> + Message = assign_timestamp(Latest + 1, MessageIn), + assign_timestamps(Latest + 1, Rest, [Message | Acc]) + end; +assign_timestamps(Latest, [], Acc) -> + {Latest, lists:reverse(Acc)}. + +assign_timestamp(TimestampUs, Message) -> + {TimestampUs, Message}. + +-spec shard_of_message(emqx_ds:db(), emqx_types:message(), clientid | topic, _Options) -> shard(). +shard_of_message(DB, #message{from = From, topic = Topic}, SerializeBy, _Options) -> + N = emqx_ds_builtin_local_meta:n_shards(DB), + Hash = + case SerializeBy of + clientid -> erlang:phash2(From, N); + topic -> erlang:phash2(Topic, N) + end, + integer_to_binary(Hash). + +-spec get_streams(emqx_ds:db(), emqx_ds:topic_filter(), emqx_ds:time()) -> + [{emqx_ds:stream_rank(), emqx_ds:ds_specific_stream()}]. +get_streams(DB, TopicFilter, StartTime) -> + Shards = emqx_ds_builtin_local_meta:shards(DB), + lists:flatmap( + fun(Shard) -> + Streams = emqx_ds_storage_layer:get_streams( + {DB, Shard}, TopicFilter, timestamp_to_timeus(StartTime) + ), + lists:map( + fun({RankY, InnerStream}) -> + Rank = {Shard, RankY}, + {Rank, ?stream(Shard, InnerStream)} + end, + Streams + ) + end, + Shards + ). + +-spec make_iterator( + emqx_ds:db(), emqx_ds:ds_specific_stream(), emqx_ds:topic_filter(), emqx_ds:time() +) -> + emqx_ds:make_iterator_result(emqx_ds:ds_specific_iterator()). +make_iterator(DB, ?stream(Shard, InnerStream), TopicFilter, StartTime) -> + ShardId = {DB, Shard}, + case + emqx_ds_storage_layer:make_iterator( + ShardId, InnerStream, TopicFilter, timestamp_to_timeus(StartTime) + ) + of + {ok, Iter} -> + {ok, #{?tag => ?IT, ?shard => Shard, ?enc => Iter}}; + Error = {error, _, _} -> + Error + end. + +-spec update_iterator(emqx_ds:db(), emqx_ds:ds_specific_iterator(), emqx_ds:message_key()) -> + emqx_ds:make_iterator_result(iterator()). +update_iterator(DB, Iter0 = #{?tag := ?IT, ?shard := Shard, ?enc := StorageIter0}, Key) -> + case emqx_ds_storage_layer:update_iterator({DB, Shard}, StorageIter0, Key) of + {ok, StorageIter} -> + {ok, Iter0#{?enc => StorageIter}}; + Err = {error, _, _} -> + Err + end. + +-spec next(emqx_ds:db(), iterator(), pos_integer()) -> emqx_ds:next_result(iterator()). +next(DB, Iter0 = #{?tag := ?IT, ?shard := Shard, ?enc := StorageIter0}, N) -> + ShardId = {DB, Shard}, + T0 = erlang:monotonic_time(microsecond), + Result = emqx_ds_storage_layer:next(ShardId, StorageIter0, N, current_timestamp(ShardId)), + T1 = erlang:monotonic_time(microsecond), + emqx_ds_builtin_metrics:observe_next_time(DB, T1 - T0), + case Result of + {ok, StorageIter, Batch} -> + Iter = Iter0#{?enc := StorageIter}, + {ok, Iter, Batch}; + Other -> + Other + end. + +-spec get_delete_streams(emqx_ds:db(), emqx_ds:topic_filter(), emqx_ds:time()) -> + [emqx_ds:ds_specific_delete_stream()]. +get_delete_streams(DB, TopicFilter, StartTime) -> + Shards = emqx_ds_builtin_local_meta:shards(DB), + lists:flatmap( + fun(Shard) -> + Streams = emqx_ds_storage_layer:get_delete_streams( + {DB, Shard}, TopicFilter, timestamp_to_timeus(StartTime) + ), + lists:map( + fun(InnerStream) -> + ?delete_stream(Shard, InnerStream) + end, + Streams + ) + end, + Shards + ). + +-spec make_delete_iterator( + emqx_ds:db(), emqx_ds:ds_specific_delete_stream(), emqx_ds:topic_filter(), emqx_ds:time() +) -> + emqx_ds:make_delete_iterator_result(delete_iterator()). +make_delete_iterator(DB, ?delete_stream(Shard, InnerStream), TopicFilter, StartTime) -> + ShardId = {DB, Shard}, + case + emqx_ds_storage_layer:make_delete_iterator( + ShardId, InnerStream, TopicFilter, timestamp_to_timeus(StartTime) + ) + of + {ok, Iter} -> + {ok, #{?tag => ?DELETE_IT, ?shard => Shard, ?enc => Iter}}; + Error = {error, _, _} -> + Error + end. + +-spec delete_next(emqx_ds:db(), delete_iterator(), emqx_ds:delete_selector(), pos_integer()) -> + emqx_ds:delete_next_result(emqx_ds:delete_iterator()). +delete_next(DB, Iter = #{?tag := ?DELETE_IT, ?shard := Shard, ?enc := StorageIter0}, Selector, N) -> + ShardId = {DB, Shard}, + case + emqx_ds_storage_layer:delete_next( + ShardId, StorageIter0, Selector, N, current_timestamp(ShardId) + ) + of + {ok, StorageIter, Ndeleted} -> + {ok, Iter#{?enc => StorageIter}, Ndeleted}; + {ok, end_of_stream} -> + {ok, end_of_stream}; + Error -> + Error + end. + +%%================================================================================ +%% Internal exports +%%================================================================================ + +current_timestamp(ShardId) -> + emqx_ds_builtin_local_meta:current_timestamp(ShardId). + +%%================================================================================ +%% Internal functions +%%================================================================================ + +timestamp_to_timeus(TimestampMs) -> + TimestampMs * 1000. diff --git a/apps/emqx_ds_builtin_local/src/emqx_ds_builtin_local_app.erl b/apps/emqx_ds_builtin_local/src/emqx_ds_builtin_local_app.erl new file mode 100644 index 000000000..1b64405d6 --- /dev/null +++ b/apps/emqx_ds_builtin_local/src/emqx_ds_builtin_local_app.erl @@ -0,0 +1,39 @@ +%%-------------------------------------------------------------------- +%% Copyright (c) 2024 EMQ Technologies Co., Ltd. All Rights Reserved. +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. +%%-------------------------------------------------------------------- +-module(emqx_ds_builtin_local_app). + +%% API: +-export([]). + +%% behavior callbacks: +-export([start/2]). + +%%================================================================================ +%% behavior callbacks +%%================================================================================ + +start(_StartType, _StartArgs) -> + emqx_ds:register_backend(builtin_local, emqx_ds_builtin_local), + %% TODO: fixme + {ok, self()}. + +%%================================================================================ +%% Internal exports +%%================================================================================ + +%%================================================================================ +%% Internal functions +%%================================================================================ diff --git a/apps/emqx_ds_builtin_local/src/emqx_ds_builtin_local_db_sup.erl b/apps/emqx_ds_builtin_local/src/emqx_ds_builtin_local_db_sup.erl new file mode 100644 index 000000000..8776416e0 --- /dev/null +++ b/apps/emqx_ds_builtin_local/src/emqx_ds_builtin_local_db_sup.erl @@ -0,0 +1,219 @@ +%%-------------------------------------------------------------------- +%% Copyright (c) 2024 EMQ Technologies Co., Ltd. All Rights Reserved. +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. +%%-------------------------------------------------------------------- + +%% @doc Supervisor that contains all the processes that belong to a +%% given builtin DS database. +-module(emqx_ds_builtin_local_db_sup). + +-behaviour(supervisor). + +%% API: +-export([ + start_db/2, + start_shard/1, + stop_shard/1, + terminate_storage/1, + restart_storage/1, + ensure_shard/1 +]). +-export([which_dbs/0, which_shards/1]). + +%% Debug: +-export([ + get_shard_workers/1 +]). + +%% behaviour callbacks: +-export([init/1]). + +%% internal exports: +-export([start_link_sup/2]). + +%%================================================================================ +%% Type declarations +%%================================================================================ + +-define(via(REC), {via, gproc, {n, l, REC}}). + +-define(db_sup, ?MODULE). +-define(shards_sup, emqx_ds_builtin_local_db_shards_sup). +-define(shard_sup, emqx_ds_builtin_local_db_shard_sup). + +-record(?db_sup, {db}). +-record(?shards_sup, {db}). +-record(?shard_sup, {db, shard}). + +%%================================================================================ +%% API functions +%%================================================================================ + +-spec start_db(emqx_ds:db(), emqx_ds_builtin_local:db_opts()) -> {ok, pid()}. +start_db(DB, Opts) -> + start_link_sup(#?db_sup{db = DB}, Opts). + +-spec start_shard(emqx_ds_storage_layer:shard_id()) -> + supervisor:startchild_ret(). +start_shard({DB, Shard}) -> + supervisor:start_child(?via(#?shards_sup{db = DB}), shard_spec(DB, Shard)). + +-spec stop_shard(emqx_ds_storage_layer:shard_id()) -> ok | {error, not_found}. +stop_shard({DB, Shard}) -> + Sup = ?via(#?shards_sup{db = DB}), + case supervisor:terminate_child(Sup, Shard) of + ok -> + supervisor:delete_child(Sup, Shard); + {error, Reason} -> + {error, Reason} + end. + +-spec terminate_storage(emqx_ds_storage_layer:shard_id()) -> ok | {error, _Reason}. +terminate_storage({DB, Shard}) -> + Sup = ?via(#?shard_sup{db = DB, shard = Shard}), + supervisor:terminate_child(Sup, {Shard, storage}). + +-spec restart_storage(emqx_ds_storage_layer:shard_id()) -> {ok, _Child} | {error, _Reason}. +restart_storage({DB, Shard}) -> + Sup = ?via(#?shard_sup{db = DB, shard = Shard}), + supervisor:restart_child(Sup, {Shard, storage}). + +-spec ensure_shard(emqx_ds_storage_layer:shard_id()) -> + ok | {error, _Reason}. +ensure_shard(Shard) -> + ensure_started(start_shard(Shard)). + +-spec which_shards(emqx_ds:db()) -> + [_Child]. +which_shards(DB) -> + supervisor:which_children(?via(#?shards_sup{db = DB})). + +%% @doc Return the list of builtin DS databases that are currently +%% active on the node. +-spec which_dbs() -> [emqx_ds:db()]. +which_dbs() -> + Key = {n, l, #?db_sup{_ = '_', db = '$1'}}, + gproc:select({local, names}, [{{Key, '_', '_'}, [], ['$1']}]). + +%% @doc Get pids of all local shard servers for the given DB. +-spec get_shard_workers(emqx_ds:db()) -> #{_Shard => pid()}. +get_shard_workers(DB) -> + Shards = supervisor:which_children(?via(#?shards_sup{db = DB})), + L = lists:flatmap( + fun + ({_Shard, Sup, _, _}) when is_pid(Sup) -> + [{Id, Pid} || {Id, Pid, _, _} <- supervisor:which_children(Sup), is_pid(Pid)]; + (_) -> + [] + end, + Shards + ), + maps:from_list(L). + +%%================================================================================ +%% behaviour callbacks +%%================================================================================ + +init({#?db_sup{db = DB}, DefaultOpts}) -> + %% Spec for the top-level supervisor for the database: + logger:notice("Starting DS DB ~p", [DB]), + emqx_ds_builtin_metrics:init_for_db(DB), + Opts = emqx_ds_builtin_local_meta:open_db(DB, DefaultOpts), + Children = [ + sup_spec(#?shards_sup{db = DB}, Opts) + ], + SupFlags = #{ + strategy => one_for_all, + intensity => 0, + period => 1 + }, + {ok, {SupFlags, Children}}; +init({#?shards_sup{db = DB}, _Opts}) -> + %% Spec for the supervisor that manages the supervisors for + %% each local shard of the DB: + SupFlags = #{ + strategy => one_for_one, + intensity => 10, + period => 1 + }, + Children = [shard_spec(DB, Shard) || Shard <- emqx_ds_builtin_local_meta:shards(DB)], + {ok, {SupFlags, Children}}; +init({#?shard_sup{db = DB, shard = Shard}, _}) -> + SupFlags = #{ + strategy => rest_for_one, + intensity => 10, + period => 100 + }, + Opts = emqx_ds_builtin_local_meta:db_config(DB), + Children = [ + shard_storage_spec(DB, Shard, Opts), + shard_buffer_spec(DB, Shard, Opts) + ], + {ok, {SupFlags, Children}}. + +%%================================================================================ +%% Internal exports +%%================================================================================ + +start_link_sup(Id, Options) -> + supervisor:start_link(?via(Id), ?MODULE, {Id, Options}). + +%%================================================================================ +%% Internal functions +%%================================================================================ + +sup_spec(Id, Options) -> + #{ + id => element(1, Id), + start => {?MODULE, start_link_sup, [Id, Options]}, + type => supervisor, + shutdown => infinity + }. + +shard_spec(DB, Shard) -> + #{ + id => Shard, + start => {?MODULE, start_link_sup, [#?shard_sup{db = DB, shard = Shard}, []]}, + shutdown => infinity, + restart => permanent, + type => supervisor + }. + +shard_storage_spec(DB, Shard, Opts) -> + #{ + id => {Shard, storage}, + start => {emqx_ds_storage_layer, start_link, [{DB, Shard}, Opts]}, + shutdown => 5_000, + restart => permanent, + type => worker + }. + +shard_buffer_spec(DB, Shard, Options) -> + #{ + id => {Shard, buffer}, + start => {emqx_ds_buffer, start_link, [emqx_ds_builtin_local, Options, DB, Shard]}, + shutdown => 5_000, + restart => permanent, + type => worker + }. + +ensure_started(Res) -> + case Res of + {ok, _Pid} -> + ok; + {error, {already_started, _Pid}} -> + ok; + {error, Reason} -> + {error, Reason} + end. diff --git a/apps/emqx_ds_builtin_local/src/emqx_ds_builtin_local_meta.erl b/apps/emqx_ds_builtin_local/src/emqx_ds_builtin_local_meta.erl new file mode 100644 index 000000000..dbc68cd2c --- /dev/null +++ b/apps/emqx_ds_builtin_local/src/emqx_ds_builtin_local_meta.erl @@ -0,0 +1,204 @@ +%%-------------------------------------------------------------------- +%% Copyright (c) 2023-2024 EMQ Technologies Co., Ltd. All Rights Reserved. +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. +%%-------------------------------------------------------------------- +-module(emqx_ds_builtin_local_meta). + +-behaviour(gen_server). + +%% API: +-export([ + start_link/0, + open_db/2, + drop_db/1, + n_shards/1, + shards/1, + db_config/1, + update_db_config/2, + + current_timestamp/1, + set_current_timestamp/2, + ensure_monotonic_timestamp/1 +]). + +%% behavior callbacks: +-export([init/1, handle_call/3, handle_cast/2, handle_info/2, terminate/2]). + +%% internal exports: +-export([]). + +-export_type([]). + +-include_lib("stdlib/include/ms_transform.hrl"). + +%%================================================================================ +%% Type declarations +%%================================================================================ + +-define(META_TAB, emqx_ds_builtin_local_metadata_tab). +-record(?META_TAB, { + db :: emqx_ds:db(), + db_props :: emqx_ds_builtin_local:db_opts() +}). + +%% We save timestamp of the last written message to a mnesia table. +%% The saved value is restored when the node restarts. This is needed +%% to create a timestamp that is truly monotonic even in presence of +%% node restarts. +-define(TS_TAB, emqx_ds_builtin_local_timestamp_tab). +-record(?TS_TAB, { + id :: emqx_ds_storage_layer:shard_id(), + latest :: integer() +}). + +%%================================================================================ +%% API functions +%%================================================================================ + +-define(SERVER, ?MODULE). + +-spec start_link() -> {ok, pid()}. +start_link() -> + gen_server:start_link({local, ?SERVER}, ?MODULE, [], []). + +-spec open_db(emqx_ds:db(), emqx_ds_builtin_local:db_opts()) -> + emqx_ds_builtin_local:db_opts(). +open_db(DB, CreateOpts = #{backend := builtin_local, storage := _, n_shards := _}) -> + transaction( + fun() -> + case mnesia:wread({?META_TAB, DB}) of + [] -> + mnesia:write(#?META_TAB{db = DB, db_props = CreateOpts}), + CreateOpts; + [#?META_TAB{db_props = Opts}] -> + Opts + end + end + ). + +-spec drop_db(emqx_ds:db()) -> ok. +drop_db(DB) -> + transaction( + fun() -> + MS = ets:fun2ms(fun(#?TS_TAB{id = ID}) when element(1, ID) =:= DB -> + ID + end), + Timestamps = mnesia:select(?TS_TAB, MS, write), + [mnesia:delete({?TS_TAB, I}) || I <- Timestamps], + mnesia:delete({?META_TAB, DB}) + end + ). + +-spec update_db_config(emqx_ds:db(), emqx_ds_builtin_local:db_opts()) -> + emqx_ds_builtin_local:db_opts(). +update_db_config(DB, Opts) -> + transaction( + fun() -> + mnesia:write(#?META_TAB{db = DB, db_props = Opts}), + Opts + end + ). + +-spec n_shards(emqx_ds:db()) -> pos_integer(). +n_shards(DB) -> + #{n_shards := NShards} = db_config(DB), + NShards. + +-spec shards(emqx_ds:db()) -> [emqx_ds_builtin_local:shard()]. +shards(DB) -> + NShards = n_shards(DB), + [integer_to_binary(Shard) || Shard <- lists:seq(0, NShards - 1)]. + +-spec db_config(emqx_ds:db()) -> emqx_ds_builtin_local:db_opts(). +db_config(DB) -> + case mnesia:dirty_read(?META_TAB, DB) of + [#?META_TAB{db_props = Props}] -> + Props; + [] -> + error({no_such_db, DB}) + end. + +-spec set_current_timestamp(emqx_ds_storage_layer:shard_id(), emqx_ds:time()) -> ok. +set_current_timestamp(ShardId, Time) -> + mria:dirty_write(?TS_TAB, #?TS_TAB{id = ShardId, latest = Time}). + +-spec current_timestamp(emqx_ds_storage_layer:shard_id()) -> emqx_ds:time() | undefined. +current_timestamp(ShardId) -> + case mnesia:dirty_read(?TS_TAB, ShardId) of + [#?TS_TAB{latest = Latest}] -> + Latest; + [] -> + undefined + end. + +-spec ensure_monotonic_timestamp(emqx_ds_storage_layer:shard_id()) -> emqx_ds:time(). +ensure_monotonic_timestamp(ShardId) -> + mria:dirty_update_counter({?TS_TAB, ShardId}, 1). + +%%================================================================================ +%% behavior callbacks +%%================================================================================ + +-record(s, {}). +-define(timer_update, timer_update). + +init([]) -> + process_flag(trap_exit, true), + ensure_tables(), + S = #s{}, + {ok, S}. + +handle_call(_Call, _From, S) -> + {reply, {error, unknown_call}, S}. + +handle_cast(_Cast, S) -> + {noreply, S}. + +handle_info(_Info, S) -> + {noreply, S}. + +terminate(_Reason, _S) -> + ok. + +%%================================================================================ +%% Internal exports +%%================================================================================ + +%%================================================================================ +%% Internal functions +%%================================================================================ + +ensure_tables() -> + ok = mria:create_table(?META_TAB, [ + {local_content, true}, + {type, ordered_set}, + {storage, disc_copies}, + {record_name, ?META_TAB}, + {attributes, record_info(fields, ?META_TAB)} + ]), + ok = mria:create_table(?TS_TAB, [ + {local_content, true}, + {type, set}, + {storage, disc_copies}, + {record_name, ?TS_TAB}, + {attributes, record_info(fields, ?TS_TAB)} + ]). + +transaction(Fun) -> + case mria:transaction(mria:local_content_shard(), Fun) of + {atomic, Result} -> + Result; + {aborted, Reason} -> + {error, Reason} + end. diff --git a/apps/emqx_ds_builtin_local/src/emqx_ds_builtin_local_sup.erl b/apps/emqx_ds_builtin_local/src/emqx_ds_builtin_local_sup.erl new file mode 100644 index 000000000..5994588ec --- /dev/null +++ b/apps/emqx_ds_builtin_local/src/emqx_ds_builtin_local_sup.erl @@ -0,0 +1,133 @@ +%%-------------------------------------------------------------------- +%% Copyright (c) 2023-2024 EMQ Technologies Co., Ltd. All Rights Reserved. +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. +%%-------------------------------------------------------------------- + +%% @doc This supervisor manages the global worker processes needed for +%% the functioning of builtin local databases, and all builtin local +%% databases that attach to it. +-module(emqx_ds_builtin_local_sup). + +-behaviour(supervisor). + +%% API: +-export([start_db/2, stop_db/1]). + +%% behavior callbacks: +-export([init/1]). + +%% internal exports: +-export([start_top/0, start_databases_sup/0]). + +-export_type([]). + +%%================================================================================ +%% Type declarations +%%================================================================================ + +-define(top, ?MODULE). +-define(databases, emqx_ds_builtin_local_db_sup). + +%%================================================================================ +%% API functions +%%================================================================================ + +-spec start_db(emqx_ds:db(), emqx_ds_builtin_local:db_opts()) -> + supervisor:startchild_ret(). +start_db(DB, Opts) -> + ensure_top(), + ChildSpec = #{ + id => DB, + start => {?databases, start_db, [DB, Opts]}, + type => supervisor, + shutdown => infinity + }, + supervisor:start_child(?databases, ChildSpec). + +-spec stop_db(emqx_ds:db()) -> ok. +stop_db(DB) -> + case whereis(?databases) of + Pid when is_pid(Pid) -> + _ = supervisor:terminate_child(?databases, DB), + _ = supervisor:delete_child(?databases, DB), + ok; + undefined -> + ok + end. + +%%================================================================================ +%% behavior callbacks +%%================================================================================ + +%% There are two layers of supervision: +%% +%% 1. top supervisor for the builtin backend. It contains the global +%% worker processes (like the metadata server), and `?databases' +%% supervisior. +%% +%% 2. `?databases': a `one_for_one' supervisor where each child is a +%% `db' supervisor that contains processes that represent the DB. +%% Chidren are attached dynamically to this one. +init(?top) -> + %% Children: + MetricsWorker = emqx_ds_builtin_metrics:child_spec(), + MetadataServer = #{ + id => metadata_server, + start => {emqx_ds_builtin_local_meta, start_link, []}, + restart => permanent, + type => worker, + shutdown => 5000 + }, + DBsSup = #{ + id => ?databases, + start => {?MODULE, start_databases_sup, []}, + restart => permanent, + type => supervisor, + shutdown => infinity + }, + %% + SupFlags = #{ + strategy => one_for_all, + intensity => 1, + period => 1, + auto_shutdown => never + }, + {ok, {SupFlags, [MetricsWorker, MetadataServer, DBsSup]}}; +init(?databases) -> + %% Children are added dynamically: + SupFlags = #{ + strategy => one_for_one, + intensity => 10, + period => 1 + }, + {ok, {SupFlags, []}}. + +%%================================================================================ +%% Internal exports +%%================================================================================ + +-spec start_top() -> {ok, pid()}. +start_top() -> + supervisor:start_link({local, ?top}, ?MODULE, ?top). + +start_databases_sup() -> + supervisor:start_link({local, ?databases}, ?MODULE, ?databases). + +%%================================================================================ +%% Internal functions +%%================================================================================ + +ensure_top() -> + {ok, _} = emqx_ds_sup:attach_backend(builtin_local, {?MODULE, start_top, []}), + ok. diff --git a/apps/emqx_ds_builtin_local/test/emqx_ds_builtin_local_SUITE.erl b/apps/emqx_ds_builtin_local/test/emqx_ds_builtin_local_SUITE.erl new file mode 100644 index 000000000..67db21b4b --- /dev/null +++ b/apps/emqx_ds_builtin_local/test/emqx_ds_builtin_local_SUITE.erl @@ -0,0 +1,346 @@ +%%-------------------------------------------------------------------- +%% Copyright (c) 2024 EMQ Technologies Co., Ltd. All Rights Reserved. +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. +%%-------------------------------------------------------------------- +-module(emqx_ds_builtin_local_SUITE). + +-compile(export_all). +-compile(nowarn_export_all). + +-include_lib("emqx/include/emqx.hrl"). +-include_lib("common_test/include/ct.hrl"). +-include_lib("stdlib/include/assert.hrl"). +-include_lib("emqx/include/asserts.hrl"). +-include_lib("snabbkaffe/include/snabbkaffe.hrl"). + +-define(N_SHARDS, 1). + +opts(_Config) -> + #{ + backend => builtin_local, + storage => {emqx_ds_storage_reference, #{}}, + n_shards => ?N_SHARDS + }. + +t_drop_generation_with_never_used_iterator(Config) -> + %% This test checks how the iterator behaves when: + %% 1) it's created at generation 1 and not consumed from. + %% 2) generation 2 is created and 1 dropped. + %% 3) iteration begins. + %% In this case, the iterator won't see any messages and the stream will end. + + DB = ?FUNCTION_NAME, + ?assertMatch(ok, emqx_ds:open_db(DB, opts(Config))), + [GenId0] = maps:keys(emqx_ds:list_generations_with_lifetimes(DB)), + + TopicFilter = emqx_topic:words(<<"foo/+">>), + StartTime = 0, + Msgs0 = [ + message(<<"foo/bar">>, <<"1">>, 0), + message(<<"foo/baz">>, <<"2">>, 1) + ], + ?assertMatch(ok, emqx_ds:store_batch(DB, Msgs0)), + + [{_, Stream0}] = emqx_ds:get_streams(DB, TopicFilter, StartTime), + {ok, Iter0} = emqx_ds:make_iterator(DB, Stream0, TopicFilter, StartTime), + + ok = emqx_ds:add_generation(DB), + ok = emqx_ds:drop_generation(DB, GenId0), + + Now = emqx_message:timestamp_now(), + Msgs1 = [ + message(<<"foo/bar">>, <<"3">>, Now + 100), + message(<<"foo/baz">>, <<"4">>, Now + 101) + ], + ?assertMatch(ok, emqx_ds:store_batch(DB, Msgs1)), + + ?assertError( + {error, unrecoverable, generation_not_found}, + emqx_ds_test_helpers:consume_iter(DB, Iter0) + ), + + %% New iterator for the new stream will only see the later messages. + [{_, Stream1}] = emqx_ds:get_streams(DB, TopicFilter, StartTime), + ?assertNotEqual(Stream0, Stream1), + {ok, Iter1} = emqx_ds:make_iterator(DB, Stream1, TopicFilter, StartTime), + + {ok, Iter, Batch} = emqx_ds_test_helpers:consume_iter(DB, Iter1, #{batch_size => 1}), + ?assertNotEqual(end_of_stream, Iter), + ?assertEqual(Msgs1, Batch), + + ok. + +t_drop_generation_with_used_once_iterator(Config) -> + %% This test checks how the iterator behaves when: + %% 1) it's created at generation 1 and consumes at least 1 message. + %% 2) generation 2 is created and 1 dropped. + %% 3) iteration continues. + %% In this case, the iterator should see no more messages and the stream will end. + + DB = ?FUNCTION_NAME, + ?assertMatch(ok, emqx_ds:open_db(DB, opts(Config))), + [GenId0] = maps:keys(emqx_ds:list_generations_with_lifetimes(DB)), + + TopicFilter = emqx_topic:words(<<"foo/+">>), + StartTime = 0, + Msgs0 = + [Msg0 | _] = [ + message(<<"foo/bar">>, <<"1">>, 0), + message(<<"foo/baz">>, <<"2">>, 1) + ], + ?assertMatch(ok, emqx_ds:store_batch(DB, Msgs0)), + + [{_, Stream0}] = emqx_ds:get_streams(DB, TopicFilter, StartTime), + {ok, Iter0} = emqx_ds:make_iterator(DB, Stream0, TopicFilter, StartTime), + {ok, Iter1, Batch1} = emqx_ds:next(DB, Iter0, 1), + ?assertNotEqual(end_of_stream, Iter1), + ?assertEqual([Msg0], [Msg || {_Key, Msg} <- Batch1]), + + ok = emqx_ds:add_generation(DB), + ok = emqx_ds:drop_generation(DB, GenId0), + + Now = emqx_message:timestamp_now(), + Msgs1 = [ + message(<<"foo/bar">>, <<"3">>, Now + 100), + message(<<"foo/baz">>, <<"4">>, Now + 101) + ], + ?assertMatch(ok, emqx_ds:store_batch(DB, Msgs1)), + + ?assertError( + {error, unrecoverable, generation_not_found}, + emqx_ds_test_helpers:consume_iter(DB, Iter1) + ). + +t_drop_generation_update_iterator(Config) -> + %% This checks the behavior of `emqx_ds:update_iterator' after the generation + %% underlying the iterator has been dropped. + + DB = ?FUNCTION_NAME, + ?assertMatch(ok, emqx_ds:open_db(DB, opts(Config))), + [GenId0] = maps:keys(emqx_ds:list_generations_with_lifetimes(DB)), + + TopicFilter = emqx_topic:words(<<"foo/+">>), + StartTime = 0, + Msgs0 = [ + message(<<"foo/bar">>, <<"1">>, 0), + message(<<"foo/baz">>, <<"2">>, 1) + ], + ?assertMatch(ok, emqx_ds:store_batch(DB, Msgs0)), + + [{_, Stream0}] = emqx_ds:get_streams(DB, TopicFilter, StartTime), + {ok, Iter0} = emqx_ds:make_iterator(DB, Stream0, TopicFilter, StartTime), + {ok, Iter1, _Batch1} = emqx_ds:next(DB, Iter0, 1), + {ok, _Iter2, [{Key2, _Msg}]} = emqx_ds:next(DB, Iter1, 1), + + ok = emqx_ds:add_generation(DB), + ok = emqx_ds:drop_generation(DB, GenId0), + + ?assertEqual( + {error, unrecoverable, generation_not_found}, + emqx_ds:update_iterator(DB, Iter1, Key2) + ). + +t_make_iterator_stale_stream(Config) -> + %% This checks the behavior of `emqx_ds:make_iterator' after the generation underlying + %% the stream has been dropped. + + DB = ?FUNCTION_NAME, + ?assertMatch(ok, emqx_ds:open_db(DB, opts(Config))), + [GenId0] = maps:keys(emqx_ds:list_generations_with_lifetimes(DB)), + + TopicFilter = emqx_topic:words(<<"foo/+">>), + StartTime = 0, + Msgs0 = [ + message(<<"foo/bar">>, <<"1">>, 0), + message(<<"foo/baz">>, <<"2">>, 1) + ], + ?assertMatch(ok, emqx_ds:store_batch(DB, Msgs0)), + + [{_, Stream0}] = emqx_ds:get_streams(DB, TopicFilter, StartTime), + + ok = emqx_ds:add_generation(DB), + ok = emqx_ds:drop_generation(DB, GenId0), + + ?assertEqual( + {error, unrecoverable, generation_not_found}, + emqx_ds:make_iterator(DB, Stream0, TopicFilter, StartTime) + ), + + ok. + +t_get_streams_concurrently_with_drop_generation(Config) -> + %% This checks that we can get all streams while a generation is dropped + %% mid-iteration. + + DB = ?FUNCTION_NAME, + ?check_trace( + #{timetrap => 5_000}, + begin + ?assertMatch(ok, emqx_ds:open_db(DB, opts(Config))), + + [GenId0] = maps:keys(emqx_ds:list_generations_with_lifetimes(DB)), + ok = emqx_ds:add_generation(DB), + ok = emqx_ds:add_generation(DB), + + %% All streams + TopicFilter = emqx_topic:words(<<"foo/+">>), + StartTime = 0, + ?assertMatch([_, _, _], emqx_ds:get_streams(DB, TopicFilter, StartTime)), + + ?force_ordering( + #{?snk_kind := dropped_gen}, + #{?snk_kind := get_streams_get_gen} + ), + + spawn_link(fun() -> + {ok, _} = ?block_until(#{?snk_kind := get_streams_all_gens}), + ok = emqx_ds:drop_generation(DB, GenId0), + ?tp(dropped_gen, #{}) + end), + + ?assertMatch([_, _], emqx_ds:get_streams(DB, TopicFilter, StartTime)), + + ok + end, + [] + ). + +%% This testcase verifies the behavior of `store_batch' operation +%% when the underlying code experiences recoverable or unrecoverable +%% problems. +t_store_batch_fail(Config) -> + ?check_trace( + #{timetrap => 15_000}, + try + meck:new(emqx_ds_storage_layer, [passthrough, no_history]), + DB = ?FUNCTION_NAME, + ?assertMatch(ok, emqx_ds:open_db(DB, (opts(Config))#{n_shards => 2})), + %% Success: + Batch1 = [ + message(<<"C1">>, <<"foo/bar">>, <<"1">>, 1), + message(<<"C1">>, <<"foo/bar">>, <<"2">>, 1) + ], + ?assertMatch(ok, emqx_ds:store_batch(DB, Batch1, #{sync => true})), + %% Inject unrecoverable error: + meck:expect(emqx_ds_storage_layer, store_batch, fun(_DB, _Shard, _Messages) -> + {error, unrecoverable, mock} + end), + Batch2 = [ + message(<<"C1">>, <<"foo/bar">>, <<"3">>, 1), + message(<<"C1">>, <<"foo/bar">>, <<"4">>, 1) + ], + ?assertMatch( + {error, unrecoverable, mock}, emqx_ds:store_batch(DB, Batch2, #{sync => true}) + ), + %% Inject a recoveralbe error: + meck:expect(emqx_ds_storage_layer, store_batch, fun(_DB, _Shard, _Messages) -> + {error, recoverable, mock} + end), + Batch3 = [ + message(<<"C1">>, <<"foo/bar">>, <<"5">>, 2), + message(<<"C2">>, <<"foo/bar">>, <<"6">>, 2), + message(<<"C1">>, <<"foo/bar">>, <<"7">>, 3), + message(<<"C2">>, <<"foo/bar">>, <<"8">>, 3) + ], + %% Note: due to idempotency issues the number of retries + %% is currently set to 0: + ?assertMatch( + {error, recoverable, mock}, + emqx_ds:store_batch(DB, Batch3, #{sync => true}) + ), + meck:unload(emqx_ds_storage_layer), + ?assertMatch(ok, emqx_ds:store_batch(DB, Batch3, #{sync => true})), + lists:sort(emqx_ds_test_helpers:consume_per_stream(DB, ['#'], 1)) + after + meck:unload() + end, + [ + {"message ordering", fun(StoredMessages, _Trace) -> + [{_, MessagesFromStream1}, {_, MessagesFromStream2}] = StoredMessages, + emqx_ds_test_helpers:diff_messages( + [payload], + [ + #message{payload = <<"1">>}, + #message{payload = <<"2">>}, + #message{payload = <<"5">>}, + #message{payload = <<"7">>} + ], + MessagesFromStream1 + ), + emqx_ds_test_helpers:diff_messages( + [payload], + [ + #message{payload = <<"6">>}, + #message{payload = <<"8">>} + ], + MessagesFromStream2 + ) + end} + ] + ). + +message(ClientId, Topic, Payload, PublishedAt) -> + Msg = message(Topic, Payload, PublishedAt), + Msg#message{from = ClientId}. + +message(Topic, Payload, PublishedAt) -> + #message{ + topic = Topic, + payload = Payload, + timestamp = PublishedAt, + id = emqx_guid:gen() + }. + +delete(DB, It, Selector, BatchSize) -> + delete(DB, It, Selector, BatchSize, 0). + +delete(DB, It0, Selector, BatchSize, Acc) -> + case emqx_ds:delete_next(DB, It0, Selector, BatchSize) of + {ok, It, 0} -> + {ok, It, Acc}; + {ok, It, NumDeleted} -> + delete(DB, It, BatchSize, Selector, Acc + NumDeleted); + {ok, end_of_stream} -> + {ok, end_of_stream, Acc}; + Ret -> + Ret + end. + +%% CT callbacks + +all() -> + emqx_common_test_helpers:all(?MODULE). + +init_per_suite(Config) -> + emqx_common_test_helpers:clear_screen(), + Apps = emqx_cth_suite:start( + [mria, emqx_ds_builtin_local], + #{work_dir => ?config(priv_dir, Config)} + ), + [{apps, Apps} | Config]. + +end_per_suite(Config) -> + ok = emqx_cth_suite:stop(?config(apps, Config)), + ok. + +init_per_testcase(_TC, Config) -> + application:ensure_all_started(emqx_durable_storage), + Config. + +end_per_testcase(_TC, _Config) -> + snabbkaffe:stop(), + ok = application:stop(emqx_durable_storage), + mria:stop(), + _ = mnesia:delete_schema([node()]), + ok. diff --git a/apps/emqx_durable_storage/src/emqx_ds_buffer.erl b/apps/emqx_durable_storage/src/emqx_ds_buffer.erl new file mode 100644 index 000000000..f6f6c6241 --- /dev/null +++ b/apps/emqx_durable_storage/src/emqx_ds_buffer.erl @@ -0,0 +1,423 @@ +%%-------------------------------------------------------------------- +%% Copyright (c) 2023-2024 EMQ Technologies Co., Ltd. All Rights Reserved. +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. +%%-------------------------------------------------------------------- + +%% @doc Buffer servers are responsible for collecting batches from the +%% local processes, sharding and repackaging them. +-module(emqx_ds_buffer). + +-behaviour(gen_server). + +%% API: +-export([start_link/4, store_batch/3]). + +%% behavior callbacks: +-export([init/1, format_status/1, handle_call/3, handle_cast/2, handle_info/2, terminate/2]). + +%% internal exports: +-export([]). + +-export_type([]). + +-include_lib("emqx_utils/include/emqx_message.hrl"). +-include_lib("snabbkaffe/include/trace.hrl"). + +%%================================================================================ +%% Type declarations +%%================================================================================ + +-define(via(DB, Shard), {via, gproc, {n, l, {?MODULE, DB, Shard}}}). +-define(flush, flush). + +-define(cbm(DB), {?MODULE, DB}). + +-record(enqueue_req, { + messages :: [emqx_types:message()], + sync :: boolean(), + atomic :: boolean(), + n_messages :: non_neg_integer(), + payload_bytes :: non_neg_integer() +}). + +-callback init_buffer(emqx_ds:db(), _Shard, _Options) -> {ok, _State}. + +-callback flush_buffer(emqx_ds:db(), _Shard, [emqx_types:message()], State) -> + {State, ok | {error, recoverable | unrecoverable, _}}. + +-callback shard_of_message(emqx_ds:db(), emqx_types:message(), topic | clientid, _Options) -> + _Shard. + +%%================================================================================ +%% API functions +%%================================================================================ + +-spec start_link(module(), _CallbackOptions, emqx_ds:db(), emqx_ds_replication_layer:shard_id()) -> + {ok, pid()}. +start_link(CallbackModule, CallbackOptions, DB, Shard) -> + gen_server:start_link( + ?via(DB, Shard), ?MODULE, [CallbackModule, CallbackOptions, DB, Shard], [] + ). + +-spec store_batch(emqx_ds:db(), [emqx_types:message()], emqx_ds:message_store_opts()) -> + emqx_ds:store_batch_result(). +store_batch(DB, Messages, Opts) -> + Sync = maps:get(sync, Opts, true), + Atomic = maps:get(atomic, Opts, false), + %% Usually we expect all messages in the batch to go into the + %% single shard, so this function is optimized for the happy case. + case shards_of_batch(DB, Messages) of + [{Shard, {NMsgs, NBytes}}] -> + %% Happy case: + enqueue_call_or_cast( + ?via(DB, Shard), + #enqueue_req{ + messages = Messages, + sync = Sync, + atomic = Atomic, + n_messages = NMsgs, + payload_bytes = NBytes + } + ); + [_, _ | _] when Atomic -> + %% It's impossible to commit a batch to multiple shards + %% atomically + {error, unrecoverable, atomic_commit_to_multiple_shards}; + _Shards -> + %% Use a slower implementation for the unlikely case: + repackage_messages(DB, Messages, Sync) + end. + +%%================================================================================ +%% behavior callbacks +%%================================================================================ + +-record(s, { + callback_module :: module(), + callback_state :: term(), + db :: emqx_ds:db(), + shard :: emqx_ds_replication_layer:shard_id(), + metrics_id :: emqx_ds_builtin_metrics:shard_metrics_id(), + n_retries = 0 :: non_neg_integer(), + %% FIXME: Currently max_retries is always 0, because replication + %% layer doesn't guarantee idempotency. Retrying would create + %% duplicate messages. + max_retries = 0 :: non_neg_integer(), + n = 0 :: non_neg_integer(), + n_bytes = 0 :: non_neg_integer(), + tref :: undefined | reference(), + queue :: queue:queue(emqx_types:message()), + pending_replies = [] :: [gen_server:from()] +}). + +init([CBM, CBMOptions, DB, Shard]) -> + process_flag(trap_exit, true), + process_flag(message_queue_data, off_heap), + logger:update_process_metadata(#{domain => [emqx, ds, egress, DB]}), + MetricsId = emqx_ds_builtin_metrics:shard_metric_id(DB, Shard), + ok = emqx_ds_builtin_metrics:init_for_shard(MetricsId), + {ok, CallbackS} = CBM:init_buffer(DB, Shard, CBMOptions), + S = #s{ + callback_module = CBM, + callback_state = CallbackS, + db = DB, + shard = Shard, + metrics_id = MetricsId, + queue = queue:new() + }, + persistent_term:put(?cbm(DB), {CBM, CBMOptions}), + {ok, S}. + +format_status(Status) -> + maps:map( + fun + (state, #s{db = DB, shard = Shard, queue = Q}) -> + #{ + db => DB, + shard => Shard, + queue => queue:len(Q) + }; + (_, Val) -> + Val + end, + Status + ). + +handle_call( + #enqueue_req{ + messages = Msgs, + sync = Sync, + atomic = Atomic, + n_messages = NMsgs, + payload_bytes = NBytes + }, + From, + S0 = #s{pending_replies = Replies0} +) -> + S = S0#s{pending_replies = [From | Replies0]}, + {noreply, enqueue(Sync, Atomic, Msgs, NMsgs, NBytes, S)}; +handle_call(_Call, _From, S) -> + {reply, {error, unknown_call}, S}. + +handle_cast( + #enqueue_req{ + messages = Msgs, + sync = Sync, + atomic = Atomic, + n_messages = NMsgs, + payload_bytes = NBytes + }, + S +) -> + {noreply, enqueue(Sync, Atomic, Msgs, NMsgs, NBytes, S)}; +handle_cast(_Cast, S) -> + {noreply, S}. + +handle_info(?flush, S) -> + {noreply, flush(S)}; +handle_info(_Info, S) -> + {noreply, S}. + +terminate(_Reason, #s{db = DB}) -> + persistent_term:erase(?cbm(DB)), + ok. + +%%================================================================================ +%% Internal exports +%%================================================================================ + +%%================================================================================ +%% Internal functions +%%================================================================================ + +enqueue( + Sync, + Atomic, + Msgs, + BatchSize, + BatchBytes, + S0 = #s{n = NMsgs0, n_bytes = NBytes0, queue = Q0} +) -> + %% At this point we don't split the batches, even when they aren't + %% atomic. It wouldn't win us anything in terms of memory, and + %% EMQX currently feeds data to DS in very small batches, so + %% granularity should be fine enough. + NMax = application:get_env(emqx_durable_storage, egress_batch_size, 1000), + NBytesMax = application:get_env(emqx_durable_storage, egress_batch_bytes, infinity), + NMsgs = NMsgs0 + BatchSize, + NBytes = NBytes0 + BatchBytes, + case (NMsgs >= NMax orelse NBytes >= NBytesMax) andalso (NMsgs0 > 0) of + true -> + %% Adding this batch would cause buffer to overflow. Flush + %% it now, and retry: + S1 = flush(S0), + enqueue(Sync, Atomic, Msgs, BatchSize, BatchBytes, S1); + false -> + %% The buffer is empty, we enqueue the atomic batch in its + %% entirety: + Q1 = lists:foldl(fun queue:in/2, Q0, Msgs), + S1 = S0#s{n = NMsgs, n_bytes = NBytes, queue = Q1}, + case NMsgs >= NMax orelse NBytes >= NBytesMax of + true -> + flush(S1); + false -> + ensure_timer(S1) + end + end. + +shard_of_message(DB, Message, ShardBy) -> + {CBM, Options} = persistent_term:get(?cbm(DB)), + CBM:shard_of_message(DB, Message, ShardBy, Options). + +-define(COOLDOWN_MIN, 1000). +-define(COOLDOWN_MAX, 5000). + +flush(S) -> + do_flush(cancel_timer(S)). + +do_flush(S0 = #s{n = 0}) -> + S0; +do_flush( + S0 = #s{ + callback_module = CBM, + callback_state = CallbackS0, + queue = Q, + pending_replies = Replies, + db = DB, + shard = Shard, + metrics_id = Metrics, + n_retries = Retries, + max_retries = MaxRetries + } +) -> + Messages = queue:to_list(Q), + T0 = erlang:monotonic_time(microsecond), + {CallbackS, Result} = CBM:flush_buffer(DB, Shard, Messages, CallbackS0), + S = S0#s{callback_state = CallbackS}, + T1 = erlang:monotonic_time(microsecond), + emqx_ds_builtin_metrics:observe_egress_flush_time(Metrics, T1 - T0), + case Result of + ok -> + emqx_ds_builtin_metrics:inc_egress_batches(Metrics), + emqx_ds_builtin_metrics:inc_egress_messages(Metrics, S#s.n), + emqx_ds_builtin_metrics:inc_egress_bytes(Metrics, S#s.n_bytes), + ?tp( + emqx_ds_replication_layer_egress_flush, + #{db => DB, shard => Shard, batch => Messages} + ), + lists:foreach(fun(From) -> gen_server:reply(From, ok) end, Replies), + erlang:garbage_collect(), + S#s{ + callback_state = CallbackS, + n = 0, + n_bytes = 0, + queue = queue:new(), + pending_replies = [] + }; + {timeout, ServerId} when Retries < MaxRetries -> + %% Note: this is a hot loop, so we report error messages + %% with `debug' level to avoid wiping the logs. Instead, + %% error the detection must rely on the metrics. Debug + %% logging can be enabled for the particular egress server + %% via logger domain. + ?tp( + debug, + emqx_ds_replication_layer_egress_flush_retry, + #{db => DB, shard => Shard, reason => timeout, server_id => ServerId} + ), + %% Retry sending the batch: + emqx_ds_builtin_metrics:inc_egress_batches_retry(Metrics), + erlang:garbage_collect(), + %% We block the gen_server until the next retry. + BlockTime = ?COOLDOWN_MIN + rand:uniform(?COOLDOWN_MAX - ?COOLDOWN_MIN), + timer:sleep(BlockTime), + S#s{n_retries = Retries + 1}; + Err -> + ?tp( + debug, + emqx_ds_replication_layer_egress_flush_failed, + #{db => DB, shard => Shard, error => Err} + ), + emqx_ds_builtin_metrics:inc_egress_batches_failed(Metrics), + Reply = + case Err of + {error, _, _} -> Err; + {timeout, ServerId} -> {error, recoverable, {timeout, ServerId}}; + _ -> {error, unrecoverable, Err} + end, + lists:foreach( + fun(From) -> gen_server:reply(From, Reply) end, Replies + ), + erlang:garbage_collect(), + S#s{ + n = 0, + n_bytes = 0, + queue = queue:new(), + pending_replies = [], + n_retries = 0 + } + end. + +-spec shards_of_batch(emqx_ds:db(), [emqx_types:message()]) -> + [{emqx_ds_replication_layer:shard_id(), {NMessages, NBytes}}] +when + NMessages :: non_neg_integer(), + NBytes :: non_neg_integer(). +shards_of_batch(DB, Messages) -> + maps:to_list( + lists:foldl( + fun(Message, Acc) -> + %% TODO: sharding strategy must be part of the DS DB schema: + Shard = shard_of_message(DB, Message, clientid), + Size = payload_size(Message), + maps:update_with( + Shard, + fun({N, S}) -> + {N + 1, S + Size} + end, + {1, Size}, + Acc + ) + end, + #{}, + Messages + ) + ). + +repackage_messages(DB, Messages, Sync) -> + Batches = lists:foldl( + fun(Message, Acc) -> + Shard = shard_of_message(DB, Message, clientid), + Size = payload_size(Message), + maps:update_with( + Shard, + fun({N, S, Msgs}) -> + {N + 1, S + Size, [Message | Msgs]} + end, + {1, Size, [Message]}, + Acc + ) + end, + #{}, + Messages + ), + maps:fold( + fun(Shard, {NMsgs, ByteSize, RevMessages}, ErrAcc) -> + Err = enqueue_call_or_cast( + ?via(DB, Shard), + #enqueue_req{ + messages = lists:reverse(RevMessages), + sync = Sync, + atomic = false, + n_messages = NMsgs, + payload_bytes = ByteSize + } + ), + compose_errors(ErrAcc, Err) + end, + ok, + Batches + ). + +enqueue_call_or_cast(To, Req = #enqueue_req{sync = true}) -> + gen_server:call(To, Req, infinity); +enqueue_call_or_cast(To, Req = #enqueue_req{sync = false}) -> + gen_server:cast(To, Req). + +compose_errors(ErrAcc, ok) -> + ErrAcc; +compose_errors(ok, Err) -> + Err; +compose_errors({error, recoverable, _}, {error, unrecoverable, Err}) -> + {error, unrecoverable, Err}; +compose_errors(ErrAcc, _Err) -> + ErrAcc. + +ensure_timer(S = #s{tref = undefined}) -> + Interval = application:get_env(emqx_durable_storage, egress_flush_interval, 100), + Tref = erlang:send_after(Interval, self(), ?flush), + S#s{tref = Tref}; +ensure_timer(S) -> + S. + +cancel_timer(S = #s{tref = undefined}) -> + S; +cancel_timer(S = #s{tref = TRef}) -> + _ = erlang:cancel_timer(TRef), + S#s{tref = undefined}. + +%% @doc Return approximate size of the MQTT message (it doesn't take +%% all things into account, for example headers and extras) +payload_size(#message{payload = P, topic = T}) -> + size(P) + size(T). diff --git a/apps/emqx_durable_storage/test/emqx_ds_storage_bitfield_lts_SUITE.erl b/apps/emqx_durable_storage/test/emqx_ds_storage_bitfield_lts_SUITE.erl index 004096431..bf820e0bf 100644 --- a/apps/emqx_durable_storage/test/emqx_ds_storage_bitfield_lts_SUITE.erl +++ b/apps/emqx_durable_storage/test/emqx_ds_storage_bitfield_lts_SUITE.erl @@ -26,16 +26,13 @@ -define(SHARD, shard(?FUNCTION_NAME)). -define(DEFAULT_CONFIG, #{ - backend => builtin, + backend => builtin_local, storage => {emqx_ds_storage_bitfield_lts, #{}}, - n_shards => 1, - n_sites => 1, - replication_factor => 1, - replication_options => #{} + n_shards => 1 }). -define(COMPACT_CONFIG, #{ - backend => builtin, + backend => builtin_local, storage => {emqx_ds_storage_bitfield_lts, #{ bits_per_wildcard_level => 8 @@ -138,8 +135,8 @@ t_get_streams(_Config) -> [FooBarBaz] = GetStream(<<"foo/bar/baz">>), [A] = GetStream(<<"a">>), %% Restart shard to make sure trie is persisted and restored: - ok = emqx_ds_builtin_sup:stop_db(?FUNCTION_NAME), - {ok, _} = emqx_ds_builtin_sup:start_db(?FUNCTION_NAME, #{}), + ok = emqx_ds:close_db(?FUNCTION_NAME), + ok = emqx_ds:open_db(?FUNCTION_NAME, ?DEFAULT_CONFIG), %% Verify that there are no "ghost streams" for topics that don't %% have any messages: [] = GetStream(<<"bar/foo">>), @@ -241,8 +238,8 @@ t_replay(_Config) -> ?assert(check(?SHARD, <<"+/+/+">>, 0, Messages)), ?assert(check(?SHARD, <<"+/+/baz">>, 0, Messages)), %% Restart the DB to make sure trie is persisted and restored: - ok = emqx_ds_builtin_sup:stop_db(?FUNCTION_NAME), - {ok, _} = emqx_ds_builtin_sup:start_db(?FUNCTION_NAME, #{}), + ok = emqx_ds:close_db(?FUNCTION_NAME), + ok = emqx_ds:open_db(?FUNCTION_NAME, ?DEFAULT_CONFIG), %% Learned wildcard topics: ?assertNot(check(?SHARD, <<"wildcard/1000/suffix/foo">>, 0, [])), ?assert(check(?SHARD, <<"wildcard/1/suffix/foo">>, 0, Messages)), @@ -512,7 +509,7 @@ suite() -> [{timetrap, {seconds, 20}}]. init_per_suite(Config) -> emqx_common_test_helpers:clear_screen(), Apps = emqx_cth_suite:start( - [emqx_durable_storage], + [emqx_ds_builtin_local], #{work_dir => emqx_cth_suite:work_dir(Config)} ), [{apps, Apps} | Config]. From ef09cfcd71f6f40ac79f46c3920465cc034d9af5 Mon Sep 17 00:00:00 2001 From: ieQu1 <99872536+ieQu1@users.noreply.github.com> Date: Thu, 13 Jun 2024 15:45:35 +0200 Subject: [PATCH 06/26] feat(ds): Add `emqx_ds_backends` application --- apps/emqx/rebar.config | 2 + apps/emqx_ds_backends/README.md | 32 ++ apps/emqx_ds_backends/rebar.config | 5 + .../src/emqx_ds_backends.app.src | 11 + .../test/emqx_ds_backends_SUITE.erl} | 413 +++++------------- .../test/emqx_ds_replication_SUITE.erl | 75 ++++ .../src/emqx_ds_storage_layer.erl | 2 + .../src/emqx_durable_storage.app.src | 4 +- apps/emqx_machine/priv/reboot_lists.eterm | 1 + 9 files changed, 228 insertions(+), 317 deletions(-) create mode 100644 apps/emqx_ds_backends/README.md create mode 100644 apps/emqx_ds_backends/rebar.config create mode 100644 apps/emqx_ds_backends/src/emqx_ds_backends.app.src rename apps/{emqx_durable_storage/test/emqx_ds_SUITE.erl => emqx_ds_backends/test/emqx_ds_backends_SUITE.erl} (60%) diff --git a/apps/emqx/rebar.config b/apps/emqx/rebar.config index 98a2d36fa..6ff0de648 100644 --- a/apps/emqx/rebar.config +++ b/apps/emqx/rebar.config @@ -24,6 +24,8 @@ {deps, [ {emqx_utils, {path, "../emqx_utils"}}, {emqx_durable_storage, {path, "../emqx_durable_storage"}}, + {emqx_ds_builtin_local, {path, "../emqx_ds_builtin_local"}}, + {emqx_ds_backends, {path, "../emqx_ds_backends"}}, {lc, {git, "https://github.com/emqx/lc.git", {tag, "0.3.2"}}}, {gproc, {git, "https://github.com/emqx/gproc", {tag, "0.9.0.1"}}}, {cowboy, {git, "https://github.com/emqx/cowboy", {tag, "2.9.2"}}}, diff --git a/apps/emqx_ds_backends/README.md b/apps/emqx_ds_backends/README.md new file mode 100644 index 000000000..02986e0e1 --- /dev/null +++ b/apps/emqx_ds_backends/README.md @@ -0,0 +1,32 @@ +# EMQX Durable Storage Backends + +This is a placeholder OTP application that depends on all durable storage backends available in the release. +Starting it will ensure that all backends are properly loaded and registered. + +Consumers of `emqx_durable_storage` API should depend on this application instead of the parent `emqx_durable_storage`. + +# Features + +N/A + +# Limitation + +N/A + +# Documentation links + +N/A + +# Usage + +Any business application that creates DS databases should add this application as a dependency. + +# Configurations + +None + +# Other +N/A + +# Contributing +Please see our [contributing.md](../../CONTRIBUTING.md). diff --git a/apps/emqx_ds_backends/rebar.config b/apps/emqx_ds_backends/rebar.config new file mode 100644 index 000000000..7af4ea8e3 --- /dev/null +++ b/apps/emqx_ds_backends/rebar.config @@ -0,0 +1,5 @@ +%% -*- mode:erlang -*- +{deps, [ + {emqx_utils, {path, "../emqx_utils"}}, + {emqx_durable_storage, {path, "../emqx_durable_storage"}} +]}. diff --git a/apps/emqx_ds_backends/src/emqx_ds_backends.app.src b/apps/emqx_ds_backends/src/emqx_ds_backends.app.src new file mode 100644 index 000000000..5215124e4 --- /dev/null +++ b/apps/emqx_ds_backends/src/emqx_ds_backends.app.src @@ -0,0 +1,11 @@ +%% -*- mode: erlang -*- +{application, emqx_ds_backends, [ + {description, "A placeholder application that depends on all available DS backends"}, + % strict semver, bump manually! + {vsn, "0.1.0"}, + {modules, []}, + {registered, []}, + {applications, [kernel, stdlib, emqx_durable_storage, emqx_ds_builtin_local]}, + {optional_applications, [emqx_ds_builtin_raft]}, + {env, []} +]}. diff --git a/apps/emqx_durable_storage/test/emqx_ds_SUITE.erl b/apps/emqx_ds_backends/test/emqx_ds_backends_SUITE.erl similarity index 60% rename from apps/emqx_durable_storage/test/emqx_ds_SUITE.erl rename to apps/emqx_ds_backends/test/emqx_ds_backends_SUITE.erl index eb14456cb..11ea1417f 100644 --- a/apps/emqx_durable_storage/test/emqx_ds_SUITE.erl +++ b/apps/emqx_ds_backends/test/emqx_ds_backends_SUITE.erl @@ -13,7 +13,7 @@ %% See the License for the specific language governing permissions and %% limitations under the License. %%-------------------------------------------------------------------- --module(emqx_ds_SUITE). +-module(emqx_ds_backends_SUITE). -compile(export_all). -compile(nowarn_export_all). @@ -26,52 +26,27 @@ -define(N_SHARDS, 1). -opts() -> - #{ - backend => builtin, - storage => {emqx_ds_storage_reference, #{}}, - n_shards => ?N_SHARDS, - n_sites => 1, - replication_factor => 3, - replication_options => #{} - }. +opts(Config) -> + proplists:get_value(ds_conf, Config). %% A simple smoke test that verifies that opening/closing the DB %% doesn't crash, and not much else -t_00_smoke_open_drop(_Config) -> +t_00_smoke_open_drop(Config) -> DB = 'DB', - ?assertMatch(ok, emqx_ds:open_db(DB, opts())), - %% Check metadata: - %% We have only one site: - [Site] = emqx_ds_replication_layer_meta:sites(), - %% Check all shards: - Shards = emqx_ds_replication_layer_meta:shards(DB), - %% Since there is only one site all shards should be allocated - %% to this site: - MyShards = emqx_ds_replication_layer_meta:my_shards(DB), - ?assertEqual(?N_SHARDS, length(Shards)), - lists:foreach( - fun(Shard) -> - ?assertEqual( - [Site], emqx_ds_replication_layer_meta:replica_set(DB, Shard) - ) - end, - Shards - ), - ?assertEqual(lists:sort(Shards), lists:sort(MyShards)), + ?assertMatch(ok, emqx_ds:open_db(DB, opts(Config))), %% Reopen the DB and make sure the operation is idempotent: - ?assertMatch(ok, emqx_ds:open_db(DB, opts())), + ?assertMatch(ok, emqx_ds:open_db(DB, opts(Config))), %% Close the DB: ?assertMatch(ok, emqx_ds:drop_db(DB)). %% A simple smoke test that verifies that storing the messages doesn't %% crash -t_01_smoke_store(_Config) -> +t_01_smoke_store(Config) -> ?check_trace( #{timetrap => 10_000}, begin DB = default, - ?assertMatch(ok, emqx_ds:open_db(DB, opts())), + ?assertMatch(ok, emqx_ds:open_db(DB, opts(Config))), Msg = message(<<"foo/bar">>, <<"foo">>, 0), ?assertMatch(ok, emqx_ds:store_batch(DB, [Msg])) end, @@ -80,9 +55,9 @@ t_01_smoke_store(_Config) -> %% A simple smoke test that verifies that getting the list of streams %% doesn't crash and that iterators can be opened. -t_02_smoke_get_streams_start_iter(_Config) -> +t_02_smoke_get_streams_start_iter(Config) -> DB = ?FUNCTION_NAME, - ?assertMatch(ok, emqx_ds:open_db(DB, opts())), + ?assertMatch(ok, emqx_ds:open_db(DB, opts(Config))), StartTime = 0, TopicFilter = ['#'], [{Rank, Stream}] = emqx_ds:get_streams(DB, TopicFilter, StartTime), @@ -91,9 +66,9 @@ t_02_smoke_get_streams_start_iter(_Config) -> %% A simple smoke test that verifies that it's possible to iterate %% over messages. -t_03_smoke_iterate(_Config) -> +t_03_smoke_iterate(Config) -> DB = ?FUNCTION_NAME, - ?assertMatch(ok, emqx_ds:open_db(DB, opts())), + ?assertMatch(ok, emqx_ds:open_db(DB, opts(Config))), StartTime = 0, TopicFilter = ['#'], Msgs = [ @@ -101,7 +76,7 @@ t_03_smoke_iterate(_Config) -> message(<<"foo">>, <<"2">>, 1), message(<<"bar/bar">>, <<"3">>, 2) ], - ?assertMatch(ok, emqx_ds:store_batch(DB, Msgs)), + ?assertMatch(ok, emqx_ds:store_batch(DB, Msgs, #{sync => true})), [{_, Stream}] = emqx_ds:get_streams(DB, TopicFilter, StartTime), {ok, Iter0} = emqx_ds:make_iterator(DB, Stream, TopicFilter, StartTime), {ok, Iter, Batch} = emqx_ds_test_helpers:consume_iter(DB, Iter0), @@ -112,9 +87,9 @@ t_03_smoke_iterate(_Config) -> %% to the external resources, such as clients' sessions, and they %% should always be able to continue replaying the topics from where %% they are left off. -t_04_restart(_Config) -> +t_04_restart(Config) -> DB = ?FUNCTION_NAME, - ?assertMatch(ok, emqx_ds:open_db(DB, opts())), + ?assertMatch(ok, emqx_ds:open_db(DB, opts(Config))), TopicFilter = ['#'], StartTime = 0, Msgs = [ @@ -122,22 +97,22 @@ t_04_restart(_Config) -> message(<<"foo">>, <<"2">>, 1), message(<<"bar/bar">>, <<"3">>, 2) ], - ?assertMatch(ok, emqx_ds:store_batch(DB, Msgs)), + ?assertMatch(ok, emqx_ds:store_batch(DB, Msgs, #{sync => true})), [{_, Stream}] = emqx_ds:get_streams(DB, TopicFilter, StartTime), {ok, Iter0} = emqx_ds:make_iterator(DB, Stream, TopicFilter, StartTime), %% Restart the application: ?tp(warning, emqx_ds_SUITE_restart_app, #{}), ok = application:stop(emqx_durable_storage), {ok, _} = application:ensure_all_started(emqx_durable_storage), - ok = emqx_ds:open_db(DB, opts()), + ok = emqx_ds:open_db(DB, opts(Config)), %% The old iterator should be still operational: {ok, Iter, Batch} = emqx_ds_test_helpers:consume_iter(DB, Iter0), ?assertEqual(Msgs, Batch, {Iter0, Iter}). %% Check that we can create iterators directly from DS keys. -t_05_update_iterator(_Config) -> +t_05_update_iterator(Config) -> DB = ?FUNCTION_NAME, - ?assertMatch(ok, emqx_ds:open_db(DB, opts())), + ?assertMatch(ok, emqx_ds:open_db(DB, opts(Config))), TopicFilter = ['#'], StartTime = 0, Msgs = [ @@ -158,104 +133,42 @@ t_05_update_iterator(_Config) -> ?assertEqual(Msgs, [Msg0 | Batch], #{from_key => Iter1, final_iter => Iter}), ok. -t_06_update_config(_Config) -> +t_06_smoke_add_generation(Config) -> DB = ?FUNCTION_NAME, - ?assertMatch(ok, emqx_ds:open_db(DB, opts())), - TopicFilter = ['#'], + ?assertMatch(ok, emqx_ds:open_db(DB, opts(Config))), + ?assertMatch( + [{_, _}], + maps:to_list(emqx_ds:list_generations_with_lifetimes(DB)) + ), + ?assertMatch(ok, emqx_ds:add_generation(DB)), + ?assertMatch( + [{_, _}, {_, _}], + maps:to_list(emqx_ds:list_generations_with_lifetimes(DB)) + ). - DataSet = update_data_set(), - - ToMsgs = fun(Datas) -> - lists:map( - fun({Topic, Payload}) -> - message(Topic, Payload, emqx_message:timestamp_now()) - end, - Datas - ) - end, - - {_, StartTimes, MsgsList} = - lists:foldl( - fun - (Datas, {true, TimeAcc, MsgAcc}) -> - Msgs = ToMsgs(Datas), - ?assertMatch(ok, emqx_ds:store_batch(DB, Msgs)), - {false, TimeAcc, [Msgs | MsgAcc]}; - (Datas, {Any, TimeAcc, MsgAcc}) -> - timer:sleep(500), - ?assertMatch(ok, emqx_ds:update_db_config(DB, opts())), - timer:sleep(500), - StartTime = emqx_message:timestamp_now(), - Msgs = ToMsgs(Datas), - ?assertMatch(ok, emqx_ds:store_batch(DB, Msgs)), - {Any, [StartTime | TimeAcc], [Msgs | MsgAcc]} - end, - {true, [emqx_message:timestamp_now()], []}, - DataSet - ), - - Checker = fun({StartTime, Msgs0}, Acc) -> - Msgs = Acc ++ Msgs0, - Batch = emqx_ds_test_helpers:consume(DB, TopicFilter, StartTime), - ?assertEqual(Msgs, Batch, StartTime), - Msgs - end, - lists:foldl(Checker, [], lists:zip(StartTimes, MsgsList)). - -t_07_add_generation(_Config) -> +t_07_smoke_update_config(Config) -> DB = ?FUNCTION_NAME, - ?assertMatch(ok, emqx_ds:open_db(DB, opts())), - TopicFilter = ['#'], - - DataSet = update_data_set(), - - ToMsgs = fun(Datas) -> - lists:map( - fun({Topic, Payload}) -> - message(Topic, Payload, emqx_message:timestamp_now()) - end, - Datas - ) - end, - - {_, StartTimes, MsgsList} = - lists:foldl( - fun - (Datas, {true, TimeAcc, MsgAcc}) -> - Msgs = ToMsgs(Datas), - ?assertMatch(ok, emqx_ds:store_batch(DB, Msgs)), - {false, TimeAcc, [Msgs | MsgAcc]}; - (Datas, {Any, TimeAcc, MsgAcc}) -> - timer:sleep(500), - ?assertMatch(ok, emqx_ds:add_generation(DB)), - timer:sleep(500), - StartTime = emqx_message:timestamp_now(), - Msgs = ToMsgs(Datas), - ?assertMatch(ok, emqx_ds:store_batch(DB, Msgs)), - {Any, [StartTime | TimeAcc], [Msgs | MsgAcc]} - end, - {true, [emqx_message:timestamp_now()], []}, - DataSet - ), - - Checker = fun({StartTime, Msgs0}, Acc) -> - Msgs = Acc ++ Msgs0, - Batch = emqx_ds_test_helpers:consume(DB, TopicFilter, StartTime), - ?assertEqual(Msgs, Batch, StartTime), - Msgs - end, - lists:foldl(Checker, [], lists:zip(StartTimes, MsgsList)). + ?assertMatch(ok, emqx_ds:open_db(DB, opts(Config))), + ?assertMatch( + [{_, _}], + maps:to_list(emqx_ds:list_generations_with_lifetimes(DB)) + ), + ?assertMatch(ok, emqx_ds:update_db_config(DB, opts(Config))), + ?assertMatch( + [{_, _}, {_, _}], + maps:to_list(emqx_ds:list_generations_with_lifetimes(DB)) + ). %% Verifies the basic usage of `list_generations_with_lifetimes' and `drop_generation'... %% 1) Cannot drop current generation. %% 2) All existing generations are returned by `list_generation_with_lifetimes'. %% 3) Dropping a generation removes it from the list. %% 4) Dropped generations stay dropped even after restarting the application. -t_08_smoke_list_drop_generation(_Config) -> +t_08_smoke_list_drop_generation(Config) -> DB = ?FUNCTION_NAME, ?check_trace( begin - ?assertMatch(ok, emqx_ds:open_db(DB, opts())), + ?assertMatch(ok, emqx_ds:open_db(DB, opts(Config))), %% Exactly one generation at first. Generations0 = emqx_ds:list_generations_with_lifetimes(DB), ?assertMatch( @@ -295,7 +208,7 @@ t_08_smoke_list_drop_generation(_Config) -> %% Should persist surviving generation list ok = application:stop(emqx_durable_storage), {ok, _} = application:ensure_all_started(emqx_durable_storage), - ok = emqx_ds:open_db(DB, opts()), + ok = emqx_ds:open_db(DB, opts(Config)), Generations3 = emqx_ds:list_generations_with_lifetimes(DB), ?assertMatch( @@ -310,12 +223,12 @@ t_08_smoke_list_drop_generation(_Config) -> ), ok. -t_09_atomic_store_batch(_Config) -> +t_09_atomic_store_batch(Config) -> DB = ?FUNCTION_NAME, ?check_trace( begin application:set_env(emqx_durable_storage, egress_batch_size, 1), - ?assertMatch(ok, emqx_ds:open_db(DB, opts())), + ?assertMatch(ok, emqx_ds:open_db(DB, opts(Config))), Msgs = [ message(<<"1">>, <<"1">>, 0), message(<<"2">>, <<"2">>, 1), @@ -335,12 +248,12 @@ t_09_atomic_store_batch(_Config) -> ), ok. -t_10_non_atomic_store_batch(_Config) -> +t_10_non_atomic_store_batch(Config) -> DB = ?FUNCTION_NAME, ?check_trace( begin application:set_env(emqx_durable_storage, egress_batch_size, 1), - ?assertMatch(ok, emqx_ds:open_db(DB, opts())), + ?assertMatch(ok, emqx_ds:open_db(DB, opts(Config))), Msgs = [ message(<<"1">>, <<"1">>, 0), message(<<"2">>, <<"2">>, 1), @@ -369,11 +282,11 @@ t_10_non_atomic_store_batch(_Config) -> ), ok. -t_smoke_delete_next(_Config) -> +t_smoke_delete_next(Config) -> DB = ?FUNCTION_NAME, ?check_trace( begin - ?assertMatch(ok, emqx_ds:open_db(DB, opts())), + ?assertMatch(ok, emqx_ds:open_db(DB, opts(Config))), StartTime = 0, TopicFilter = [<<"foo">>, '#'], Msgs = @@ -410,7 +323,7 @@ t_smoke_delete_next(_Config) -> ), ok. -t_drop_generation_with_never_used_iterator(_Config) -> +t_drop_generation_with_never_used_iterator(Config) -> %% This test checks how the iterator behaves when: %% 1) it's created at generation 1 and not consumed from. %% 2) generation 2 is created and 1 dropped. @@ -418,7 +331,7 @@ t_drop_generation_with_never_used_iterator(_Config) -> %% In this case, the iterator won't see any messages and the stream will end. DB = ?FUNCTION_NAME, - ?assertMatch(ok, emqx_ds:open_db(DB, opts())), + ?assertMatch(ok, emqx_ds:open_db(DB, opts(Config))), [GenId0] = maps:keys(emqx_ds:list_generations_with_lifetimes(DB)), TopicFilter = emqx_topic:words(<<"foo/+">>), @@ -458,7 +371,7 @@ t_drop_generation_with_never_used_iterator(_Config) -> ok. -t_drop_generation_with_used_once_iterator(_Config) -> +t_drop_generation_with_used_once_iterator(Config) -> %% This test checks how the iterator behaves when: %% 1) it's created at generation 1 and consumes at least 1 message. %% 2) generation 2 is created and 1 dropped. @@ -466,7 +379,7 @@ t_drop_generation_with_used_once_iterator(_Config) -> %% In this case, the iterator should see no more messages and the stream will end. DB = ?FUNCTION_NAME, - ?assertMatch(ok, emqx_ds:open_db(DB, opts())), + ?assertMatch(ok, emqx_ds:open_db(DB, opts(Config))), [GenId0] = maps:keys(emqx_ds:list_generations_with_lifetimes(DB)), TopicFilter = emqx_topic:words(<<"foo/+">>), @@ -499,12 +412,12 @@ t_drop_generation_with_used_once_iterator(_Config) -> emqx_ds_test_helpers:consume_iter(DB, Iter1) ). -t_drop_generation_update_iterator(_Config) -> +t_drop_generation_update_iterator(Config) -> %% This checks the behavior of `emqx_ds:update_iterator' after the generation %% underlying the iterator has been dropped. DB = ?FUNCTION_NAME, - ?assertMatch(ok, emqx_ds:open_db(DB, opts())), + ?assertMatch(ok, emqx_ds:open_db(DB, opts(Config))), [GenId0] = maps:keys(emqx_ds:list_generations_with_lifetimes(DB)), TopicFilter = emqx_topic:words(<<"foo/+">>), @@ -528,12 +441,12 @@ t_drop_generation_update_iterator(_Config) -> emqx_ds:update_iterator(DB, Iter1, Key2) ). -t_make_iterator_stale_stream(_Config) -> +t_make_iterator_stale_stream(Config) -> %% This checks the behavior of `emqx_ds:make_iterator' after the generation underlying %% the stream has been dropped. DB = ?FUNCTION_NAME, - ?assertMatch(ok, emqx_ds:open_db(DB, opts())), + ?assertMatch(ok, emqx_ds:open_db(DB, opts(Config))), [GenId0] = maps:keys(emqx_ds:list_generations_with_lifetimes(DB)), TopicFilter = emqx_topic:words(<<"foo/+">>), @@ -556,7 +469,7 @@ t_make_iterator_stale_stream(_Config) -> ok. -t_get_streams_concurrently_with_drop_generation(_Config) -> +t_get_streams_concurrently_with_drop_generation(Config) -> %% This checks that we can get all streams while a generation is dropped %% mid-iteration. @@ -564,7 +477,7 @@ t_get_streams_concurrently_with_drop_generation(_Config) -> ?check_trace( #{timetrap => 5_000}, begin - ?assertMatch(ok, emqx_ds:open_db(DB, opts())), + ?assertMatch(ok, emqx_ds:open_db(DB, opts(Config))), [GenId0] = maps:keys(emqx_ds:list_generations_with_lifetimes(DB)), ok = emqx_ds:add_generation(DB), @@ -593,171 +506,6 @@ t_get_streams_concurrently_with_drop_generation(_Config) -> [] ). -t_error_mapping_replication_layer(_Config) -> - %% This checks that the replication layer maps recoverable errors correctly. - - ok = emqx_ds_test_helpers:mock_rpc(), - ok = snabbkaffe:start_trace(), - - DB = ?FUNCTION_NAME, - ?assertMatch(ok, emqx_ds:open_db(DB, (opts())#{n_shards => 2})), - [Shard1, Shard2] = emqx_ds_replication_layer_meta:shards(DB), - - TopicFilter = emqx_topic:words(<<"foo/#">>), - Msgs = [ - message(<<"C1">>, <<"foo/bar">>, <<"1">>, 0), - message(<<"C1">>, <<"foo/baz">>, <<"2">>, 1), - message(<<"C2">>, <<"foo/foo">>, <<"3">>, 2), - message(<<"C3">>, <<"foo/xyz">>, <<"4">>, 3), - message(<<"C4">>, <<"foo/bar">>, <<"5">>, 4), - message(<<"C5">>, <<"foo/oof">>, <<"6">>, 5) - ], - - ?assertMatch(ok, emqx_ds:store_batch(DB, Msgs)), - - ?block_until(#{?snk_kind := emqx_ds_replication_layer_egress_flush, shard := Shard1}), - ?block_until(#{?snk_kind := emqx_ds_replication_layer_egress_flush, shard := Shard2}), - - Streams0 = emqx_ds:get_streams(DB, TopicFilter, 0), - Iterators0 = lists:map( - fun({_Rank, S}) -> - {ok, Iter} = emqx_ds:make_iterator(DB, S, TopicFilter, 0), - Iter - end, - Streams0 - ), - - %% Disrupt the link to the second shard. - ok = emqx_ds_test_helpers:mock_rpc_result( - fun(_Node, emqx_ds_replication_layer, _Function, Args) -> - case Args of - [DB, Shard1 | _] -> passthrough; - [DB, Shard2 | _] -> unavailable - end - end - ), - - %% Result of `emqx_ds:get_streams/3` will just contain partial results, not an error. - Streams1 = emqx_ds:get_streams(DB, TopicFilter, 0), - ?assert( - length(Streams1) > 0 andalso length(Streams1) =< length(Streams0), - Streams1 - ), - - %% At least one of `emqx_ds:make_iterator/4` will end in an error. - Results1 = lists:map( - fun({_Rank, S}) -> - case emqx_ds:make_iterator(DB, S, TopicFilter, 0) of - Ok = {ok, _Iter} -> - Ok; - Error = {error, recoverable, {erpc, _}} -> - Error; - Other -> - ct:fail({unexpected_result, Other}) - end - end, - Streams0 - ), - ?assert( - length([error || {error, _, _} <- Results1]) > 0, - Results1 - ), - - %% At least one of `emqx_ds:next/3` over initial set of iterators will end in an error. - Results2 = lists:map( - fun(Iter) -> - case emqx_ds:next(DB, Iter, _BatchSize = 42) of - Ok = {ok, _Iter, [_ | _]} -> - Ok; - Error = {error, recoverable, {badrpc, _}} -> - Error; - Other -> - ct:fail({unexpected_result, Other}) - end - end, - Iterators0 - ), - ?assert( - length([error || {error, _, _} <- Results2]) > 0, - Results2 - ), - meck:unload(). - -%% This testcase verifies the behavior of `store_batch' operation -%% when the underlying code experiences recoverable or unrecoverable -%% problems. -t_store_batch_fail(_Config) -> - ?check_trace( - #{timetrap => 15_000}, - try - meck:new(emqx_ds_storage_layer, [passthrough, no_history]), - DB = ?FUNCTION_NAME, - ?assertMatch(ok, emqx_ds:open_db(DB, (opts())#{n_shards => 2})), - %% Success: - Batch1 = [ - message(<<"C1">>, <<"foo/bar">>, <<"1">>, 1), - message(<<"C1">>, <<"foo/bar">>, <<"2">>, 1) - ], - ?assertMatch(ok, emqx_ds:store_batch(DB, Batch1, #{sync => true})), - %% Inject unrecoverable error: - meck:expect(emqx_ds_storage_layer, store_batch, fun(_DB, _Shard, _Messages) -> - {error, unrecoverable, mock} - end), - Batch2 = [ - message(<<"C1">>, <<"foo/bar">>, <<"3">>, 1), - message(<<"C1">>, <<"foo/bar">>, <<"4">>, 1) - ], - ?assertMatch( - {error, unrecoverable, mock}, emqx_ds:store_batch(DB, Batch2, #{sync => true}) - ), - meck:unload(emqx_ds_storage_layer), - %% Inject a recoveralbe error: - meck:new(ra, [passthrough, no_history]), - meck:expect(ra, process_command, fun(Servers, Shard, Command) -> - ?tp(ra_command, #{servers => Servers, shard => Shard, command => Command}), - {timeout, mock} - end), - Batch3 = [ - message(<<"C1">>, <<"foo/bar">>, <<"5">>, 2), - message(<<"C2">>, <<"foo/bar">>, <<"6">>, 2), - message(<<"C1">>, <<"foo/bar">>, <<"7">>, 3), - message(<<"C2">>, <<"foo/bar">>, <<"8">>, 3) - ], - %% Note: due to idempotency issues the number of retries - %% is currently set to 0: - ?assertMatch( - {error, recoverable, {timeout, mock}}, - emqx_ds:store_batch(DB, Batch3, #{sync => true}) - ), - meck:unload(ra), - ?assertMatch(ok, emqx_ds:store_batch(DB, Batch3, #{sync => true})), - lists:sort(emqx_ds_test_helpers:consume_per_stream(DB, ['#'], 1)) - after - meck:unload() - end, - [ - {"message ordering", fun(StoredMessages, _Trace) -> - [{_, Stream1}, {_, Stream2}] = StoredMessages, - ?assertMatch( - [ - #message{payload = <<"1">>}, - #message{payload = <<"2">>}, - #message{payload = <<"5">>}, - #message{payload = <<"7">>} - ], - Stream1 - ), - ?assertMatch( - [ - #message{payload = <<"6">>}, - #message{payload = <<"8">>} - ], - Stream2 - ) - end} - ] - ). - update_data_set() -> [ [ @@ -802,12 +550,46 @@ delete(DB, It0, Selector, BatchSize, Acc) -> %% CT callbacks -all() -> emqx_common_test_helpers:all(?MODULE). +-if(?EMQX_RELEASE_EDITION == ee). +all() -> + [{group, builtin_local}, {group, builtin_raft}]. +-else. +all() -> + [{group, builtin_local}]. +-endif. + +groups() -> + TCs = emqx_common_test_helpers:all(?MODULE), + [ + {builtin_local, TCs}, + {builtin_raft, TCs} + ]. + +init_per_group(builtin_local, Config) -> + Conf = #{ + backend => builtin_local, + storage => {emqx_ds_storage_reference, #{}}, + n_shards => ?N_SHARDS + }, + [{ds_conf, Conf} | Config]; +init_per_group(builtin_raft, Config) -> + Conf = #{ + backend => builtin_raft, + storage => {emqx_ds_storage_reference, #{}}, + n_shards => ?N_SHARDS, + n_sites => 1, + replication_factor => 3, + replication_options => #{} + }, + [{ds_conf, Conf} | Config]. + +end_per_group(_Group, Config) -> + Config. init_per_suite(Config) -> emqx_common_test_helpers:clear_screen(), Apps = emqx_cth_suite:start( - [mria, emqx_durable_storage], + [mria, emqx_ds_backends], #{work_dir => ?config(priv_dir, Config)} ), [{apps, Apps} | Config]. @@ -820,7 +602,8 @@ init_per_testcase(_TC, Config) -> application:ensure_all_started(emqx_durable_storage), Config. -end_per_testcase(_TC, _Config) -> +end_per_testcase(TC, _Config) -> + ok = emqx_ds:drop_db(TC), snabbkaffe:stop(), ok = application:stop(emqx_durable_storage), mria:stop(), diff --git a/apps/emqx_ds_builtin_raft/test/emqx_ds_replication_SUITE.erl b/apps/emqx_ds_builtin_raft/test/emqx_ds_replication_SUITE.erl index e84abb78b..978da91a4 100644 --- a/apps/emqx_ds_builtin_raft/test/emqx_ds_replication_SUITE.erl +++ b/apps/emqx_ds_builtin_raft/test/emqx_ds_replication_SUITE.erl @@ -698,6 +698,81 @@ t_error_mapping_replication_layer(_Config) -> ), meck:unload(). +%% This testcase verifies the behavior of `store_batch' operation +%% when the underlying code experiences recoverable or unrecoverable +%% problems. +t_store_batch_fail(_Config) -> + ?check_trace( + #{timetrap => 15_000}, + try + meck:new(emqx_ds_storage_layer, [passthrough, no_history]), + DB = ?FUNCTION_NAME, + ?assertMatch(ok, emqx_ds:open_db(DB, (opts())#{n_shards => 2})), + %% Success: + Batch1 = [ + message(<<"C1">>, <<"foo/bar">>, <<"1">>, 1), + message(<<"C1">>, <<"foo/bar">>, <<"2">>, 1) + ], + ?assertMatch(ok, emqx_ds:store_batch(DB, Batch1, #{sync => true})), + %% Inject unrecoverable error: + meck:expect(emqx_ds_storage_layer, store_batch, fun(_DB, _Shard, _Messages) -> + {error, unrecoverable, mock} + end), + Batch2 = [ + message(<<"C1">>, <<"foo/bar">>, <<"3">>, 1), + message(<<"C1">>, <<"foo/bar">>, <<"4">>, 1) + ], + ?assertMatch( + {error, unrecoverable, mock}, emqx_ds:store_batch(DB, Batch2, #{sync => true}) + ), + meck:unload(emqx_ds_storage_layer), + %% Inject a recoveralbe error: + meck:new(ra, [passthrough, no_history]), + meck:expect(ra, process_command, fun(Servers, Shard, Command) -> + ?tp(ra_command, #{servers => Servers, shard => Shard, command => Command}), + {timeout, mock} + end), + Batch3 = [ + message(<<"C1">>, <<"foo/bar">>, <<"5">>, 2), + message(<<"C2">>, <<"foo/bar">>, <<"6">>, 2), + message(<<"C1">>, <<"foo/bar">>, <<"7">>, 3), + message(<<"C2">>, <<"foo/bar">>, <<"8">>, 3) + ], + %% Note: due to idempotency issues the number of retries + %% is currently set to 0: + ?assertMatch( + {error, recoverable, {timeout, mock}}, + emqx_ds:store_batch(DB, Batch3, #{sync => true}) + ), + meck:unload(ra), + ?assertMatch(ok, emqx_ds:store_batch(DB, Batch3, #{sync => true})), + lists:sort(emqx_ds_test_helpers:consume_per_stream(DB, ['#'], 1)) + after + meck:unload() + end, + [ + {"message ordering", fun(StoredMessages, _Trace) -> + [{_, Stream1}, {_, Stream2}] = StoredMessages, + ?assertMatch( + [ + #message{payload = <<"1">>}, + #message{payload = <<"2">>}, + #message{payload = <<"5">>}, + #message{payload = <<"7">>} + ], + Stream1 + ), + ?assertMatch( + [ + #message{payload = <<"6">>}, + #message{payload = <<"8">>} + ], + Stream2 + ) + end} + ] + ). + %% shard_server_info(Node, DB, Shard, Site, Info) -> diff --git a/apps/emqx_durable_storage/src/emqx_ds_storage_layer.erl b/apps/emqx_durable_storage/src/emqx_ds_storage_layer.erl index 3ca2dcefd..331c9806b 100644 --- a/apps/emqx_durable_storage/src/emqx_ds_storage_layer.erl +++ b/apps/emqx_durable_storage/src/emqx_ds_storage_layer.erl @@ -87,6 +87,8 @@ %% Type declarations %%================================================================================ +-define(APP, emqx_durable_storage). + %% # "Record" integer keys. We use maps with integer keys to avoid persisting and sending %% records over the wire. %% tags: diff --git a/apps/emqx_durable_storage/src/emqx_durable_storage.app.src b/apps/emqx_durable_storage/src/emqx_durable_storage.app.src index 7a20577d4..7bfa6efd3 100644 --- a/apps/emqx_durable_storage/src/emqx_durable_storage.app.src +++ b/apps/emqx_durable_storage/src/emqx_durable_storage.app.src @@ -2,10 +2,10 @@ {application, emqx_durable_storage, [ {description, "Message persistence and subscription replays for EMQX"}, % strict semver, bump manually! - {vsn, "0.2.1"}, + {vsn, "0.3.0"}, {modules, []}, {registered, []}, - {applications, [kernel, stdlib, rocksdb, gproc, mria, ra, emqx_utils]}, + {applications, [kernel, stdlib, rocksdb, gproc, mria, emqx_utils]}, {mod, {emqx_ds_app, []}}, {env, []} ]}. diff --git a/apps/emqx_machine/priv/reboot_lists.eterm b/apps/emqx_machine/priv/reboot_lists.eterm index c3311c09b..277d9fd66 100644 --- a/apps/emqx_machine/priv/reboot_lists.eterm +++ b/apps/emqx_machine/priv/reboot_lists.eterm @@ -42,6 +42,7 @@ esasl, emqx_utils, emqx_durable_storage, + emqx_ds_backends, emqx_http_lib, emqx_resource, emqx_connector, From a0fbd37e5843c0f34a04c4f4f93f4223e5ac826c Mon Sep 17 00:00:00 2001 From: ieQu1 <99872536+ieQu1@users.noreply.github.com> Date: Thu, 13 Jun 2024 15:46:38 +0200 Subject: [PATCH 07/26] refactor(emqx): Use emqx_ds_backends application --- apps/emqx/src/emqx.app.src | 2 +- apps/emqx/src/emqx_ds_schema.erl | 2 +- apps/emqx/test/emqx_persistent_messages_SUITE.erl | 2 +- apps/emqx_durable_storage/src/emqx_ds_builtin_metrics.erl | 4 ++-- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/apps/emqx/src/emqx.app.src b/apps/emqx/src/emqx.app.src index 97769fe1f..3d26c63ed 100644 --- a/apps/emqx/src/emqx.app.src +++ b/apps/emqx/src/emqx.app.src @@ -18,7 +18,7 @@ sasl, lc, hocon, - emqx_durable_storage, + emqx_ds_backends, bcrypt, pbkdf2, emqx_http_lib, diff --git a/apps/emqx/src/emqx_ds_schema.erl b/apps/emqx/src/emqx_ds_schema.erl index 5902bcfb7..2fd4752bd 100644 --- a/apps/emqx/src/emqx_ds_schema.erl +++ b/apps/emqx/src/emqx_ds_schema.erl @@ -62,7 +62,7 @@ translate_builtin( {emqx_ds_storage_reference, #{}} end, #{ - backend => builtin, + backend => builtin_raft, n_shards => NShards, n_sites => NSites, replication_factor => ReplFactor, diff --git a/apps/emqx/test/emqx_persistent_messages_SUITE.erl b/apps/emqx/test/emqx_persistent_messages_SUITE.erl index 0b54c2c55..951d72c8d 100644 --- a/apps/emqx/test/emqx_persistent_messages_SUITE.erl +++ b/apps/emqx/test/emqx_persistent_messages_SUITE.erl @@ -570,7 +570,7 @@ wait_shards_online(Nodes = [Node | _]) -> ?retry(500, 10, [?assertEqual(NShards, shards_online(N)) || N <- Nodes]). shards_online(Node) -> - length(erpc:call(Node, emqx_ds_builtin_db_sup, which_shards, [?PERSISTENT_MESSAGE_DB])). + length(erpc:call(Node, emqx_ds_builtin_raft_db_sup, which_shards, [?PERSISTENT_MESSAGE_DB])). get_mqtt_port(Node, Type) -> {_IP, Port} = erpc:call(Node, emqx_config, get, [[listeners, Type, default, bind]]), diff --git a/apps/emqx_durable_storage/src/emqx_ds_builtin_metrics.erl b/apps/emqx_durable_storage/src/emqx_ds_builtin_metrics.erl index 06bf7f045..a69ae22c2 100644 --- a/apps/emqx_durable_storage/src/emqx_ds_builtin_metrics.erl +++ b/apps/emqx_durable_storage/src/emqx_ds_builtin_metrics.erl @@ -180,7 +180,7 @@ prometheus_per_db(NodeOrAggr) -> prometheus_per_db(NodeOrAggr, DB, Acc) end, #{}, - emqx_ds_builtin_db_sup:which_dbs() + emqx_ds_builtin_raft_db_sup:which_dbs() ). %% This function returns the data in the following format: @@ -246,7 +246,7 @@ prometheus_per_shard(NodeOrAggr) -> ) end, #{}, - emqx_ds_builtin_db_sup:which_dbs() + emqx_ds_builtin_raft_db_sup:which_dbs() ). prometheus_per_shard(NodeOrAggr, DB, Shard, Acc0) -> From 09c3ae795dca9305f8b49cc329d10c35938e3f2d Mon Sep 17 00:00:00 2001 From: ieQu1 <99872536+ieQu1@users.noreply.github.com> Date: Thu, 13 Jun 2024 23:53:22 +0200 Subject: [PATCH 08/26] refactor(ds_raft): Replace egress server with common emqx_ds_buffer --- .../test/emqx_ds_backends_SUITE.erl | 4 +- .../src/emqx_ds_builtin_raft_db_sup.erl | 3 +- .../src/emqx_ds_replication_layer.erl | 53 ++- .../src/emqx_ds_replication_layer_egress.erl | 392 ------------------ .../test/emqx_ds_replication_SUITE.erl | 4 +- .../src/emqx_ds_buffer.erl | 29 +- .../emqx_ds_storage_bitfield_lts_SUITE.erl | 4 +- .../test/emqx_ds_test_helpers.erl | 2 +- 8 files changed, 62 insertions(+), 429 deletions(-) delete mode 100644 apps/emqx_ds_builtin_raft/src/emqx_ds_replication_layer_egress.erl diff --git a/apps/emqx_ds_backends/test/emqx_ds_backends_SUITE.erl b/apps/emqx_ds_backends/test/emqx_ds_backends_SUITE.erl index 11ea1417f..d119766f3 100644 --- a/apps/emqx_ds_backends/test/emqx_ds_backends_SUITE.erl +++ b/apps/emqx_ds_backends/test/emqx_ds_backends_SUITE.erl @@ -241,7 +241,7 @@ t_09_atomic_store_batch(Config) -> sync => true }) ), - {ok, Flush} = ?block_until(#{?snk_kind := emqx_ds_replication_layer_egress_flush}), + {ok, Flush} = ?block_until(#{?snk_kind := emqx_ds_buffer_flush}), ?assertMatch(#{batch := [_, _, _]}, Flush) end, [] @@ -271,7 +271,7 @@ t_10_non_atomic_store_batch(Config) -> end, fun(Trace) -> %% Should contain one flush per message. - Batches = ?projection(batch, ?of_kind(emqx_ds_replication_layer_egress_flush, Trace)), + Batches = ?projection(batch, ?of_kind(emqx_ds_buffer_flush, Trace)), ?assertMatch([_], Batches), ?assertMatch( [_, _, _], diff --git a/apps/emqx_ds_builtin_raft/src/emqx_ds_builtin_raft_db_sup.erl b/apps/emqx_ds_builtin_raft/src/emqx_ds_builtin_raft_db_sup.erl index 74e97bf52..1816e551f 100644 --- a/apps/emqx_ds_builtin_raft/src/emqx_ds_builtin_raft_db_sup.erl +++ b/apps/emqx_ds_builtin_raft/src/emqx_ds_builtin_raft_db_sup.erl @@ -267,9 +267,10 @@ shard_allocator_spec(DB) -> }. egress_spec(DB, Shard) -> + Options = #{}, #{ id => Shard, - start => {emqx_ds_replication_layer_egress, start_link, [DB, Shard]}, + start => {emqx_ds_buffer, start_link, [emqx_ds_replication_layer, Options, DB, Shard]}, shutdown => 5_000, restart => permanent, type => worker diff --git a/apps/emqx_ds_builtin_raft/src/emqx_ds_replication_layer.erl b/apps/emqx_ds_builtin_raft/src/emqx_ds_replication_layer.erl index 45f04e341..1c7e0c1c2 100644 --- a/apps/emqx_ds_builtin_raft/src/emqx_ds_replication_layer.erl +++ b/apps/emqx_ds_builtin_raft/src/emqx_ds_replication_layer.erl @@ -7,6 +7,7 @@ -module(emqx_ds_replication_layer). %-behaviour(emqx_ds). +-behaviour(emqx_ds_buffer). -export([ list_shards/1, @@ -25,8 +26,12 @@ update_iterator/3, next/3, delete_next/4, - shard_of_message/3, - current_timestamp/2 + + current_timestamp/2, + + shard_of_message/4, + flush_buffer/4, + init_buffer/3 ]). %% internal exports: @@ -234,7 +239,7 @@ drop_db(DB) -> emqx_ds:store_batch_result(). store_batch(DB, Messages, Opts) -> try - emqx_ds_replication_layer_egress:store_batch(DB, Messages, Opts) + emqx_ds_buffer:store_batch(DB, Messages, Opts) catch error:{Reason, _Call} when Reason == timeout; Reason == noproc -> {error, recoverable, Reason} @@ -350,17 +355,6 @@ delete_next(DB, Iter0, Selector, BatchSize) -> Other end. --spec shard_of_message(emqx_ds:db(), emqx_types:message(), clientid | topic) -> - emqx_ds_replication_layer:shard_id(). -shard_of_message(DB, #message{from = From, topic = Topic}, SerializeBy) -> - N = emqx_ds_replication_shard_allocator:n_shards(DB), - Hash = - case SerializeBy of - clientid -> erlang:phash2(From, N); - topic -> erlang:phash2(Topic, N) - end, - integer_to_binary(Hash). - -spec foreach_shard(emqx_ds:db(), fun((shard_id()) -> _)) -> ok. foreach_shard(DB, Fun) -> lists:foreach(Fun, list_shards(DB)). @@ -372,9 +366,38 @@ current_timestamp(DB, Shard) -> emqx_ds_builtin_raft_sup:get_gvar(DB, ?gv_timestamp(Shard), 0). %%================================================================================ -%% behavior callbacks +%% emqx_ds_buffer callbacks %%================================================================================ +-record(bs, {}). +-type egress_state() :: #bs{}. + +-spec init_buffer(emqx_ds:db(), shard_id(), _Options) -> {ok, egress_state()}. +init_buffer(_DB, _Shard, _Options) -> + {ok, #bs{}}. + +-spec flush_buffer(emqx_ds:db(), shard_id(), [emqx_types:message()], egress_state()) -> + {egress_state(), ok | {error, recoverable | unrecoverable, _}}. +flush_buffer(DB, Shard, Messages, State) -> + case ra_store_batch(DB, Shard, Messages) of + {timeout, ServerId} -> + Result = {error, recoverable, {timeout, ServerId}}; + Result -> + ok + end, + {State, Result}. + +-spec shard_of_message(emqx_ds:db(), emqx_types:message(), clientid | topic, _Options) -> + emqx_ds_replication_layer:shard_id(). +shard_of_message(DB, #message{from = From, topic = Topic}, SerializeBy, _Options) -> + N = emqx_ds_replication_shard_allocator:n_shards(DB), + Hash = + case SerializeBy of + clientid -> erlang:phash2(From, N); + topic -> erlang:phash2(Topic, N) + end, + integer_to_binary(Hash). + %%================================================================================ %% Internal exports (RPC targets) %%================================================================================ diff --git a/apps/emqx_ds_builtin_raft/src/emqx_ds_replication_layer_egress.erl b/apps/emqx_ds_builtin_raft/src/emqx_ds_replication_layer_egress.erl deleted file mode 100644 index ce117011c..000000000 --- a/apps/emqx_ds_builtin_raft/src/emqx_ds_replication_layer_egress.erl +++ /dev/null @@ -1,392 +0,0 @@ -%%-------------------------------------------------------------------- -%% Copyright (c) 2023-2024 EMQ Technologies Co., Ltd. All Rights Reserved. -%%-------------------------------------------------------------------- - -%% @doc Egress servers are responsible for proxing the outcoming -%% `store_batch' requests towards EMQX DS shards. -%% -%% They re-assemble messages from different local processes into -%% fixed-sized batches, and introduce centralized channels between the -%% nodes. They are also responsible for maintaining backpressure -%% towards the local publishers. -%% -%% There is (currently) one egress process for each shard running on -%% each node, but it should be possible to have a pool of egress -%% servers, if needed. --module(emqx_ds_replication_layer_egress). - --behaviour(gen_server). - -%% API: --export([start_link/2, store_batch/3]). - -%% behavior callbacks: --export([init/1, format_status/1, handle_call/3, handle_cast/2, handle_info/2, terminate/2]). - -%% internal exports: --export([]). - --export_type([]). - --include_lib("emqx_utils/include/emqx_message.hrl"). --include_lib("snabbkaffe/include/trace.hrl"). - -%%================================================================================ -%% Type declarations -%%================================================================================ - --define(via(DB, Shard), {via, gproc, {n, l, {?MODULE, DB, Shard}}}). --define(flush, flush). - --record(enqueue_req, { - messages :: [emqx_types:message()], - sync :: boolean(), - atomic :: boolean(), - n_messages :: non_neg_integer(), - payload_bytes :: non_neg_integer() -}). - -%%================================================================================ -%% API functions -%%================================================================================ - --spec start_link(emqx_ds:db(), emqx_ds_replication_layer:shard_id()) -> {ok, pid()}. -start_link(DB, Shard) -> - gen_server:start_link(?via(DB, Shard), ?MODULE, [DB, Shard], []). - --spec store_batch(emqx_ds:db(), [emqx_types:message()], emqx_ds:message_store_opts()) -> - emqx_ds:store_batch_result(). -store_batch(DB, Messages, Opts) -> - Sync = maps:get(sync, Opts, true), - Atomic = maps:get(atomic, Opts, false), - %% Usually we expect all messages in the batch to go into the - %% single shard, so this function is optimized for the happy case. - case shards_of_batch(DB, Messages) of - [{Shard, {NMsgs, NBytes}}] -> - %% Happy case: - enqueue_call_or_cast( - ?via(DB, Shard), - #enqueue_req{ - messages = Messages, - sync = Sync, - atomic = Atomic, - n_messages = NMsgs, - payload_bytes = NBytes - } - ); - [_, _ | _] when Atomic -> - %% It's impossible to commit a batch to multiple shards - %% atomically - {error, unrecoverable, atomic_commit_to_multiple_shards}; - _Shards -> - %% Use a slower implementation for the unlikely case: - repackage_messages(DB, Messages, Sync) - end. - -%%================================================================================ -%% behavior callbacks -%%================================================================================ - --record(s, { - db :: emqx_ds:db(), - shard :: emqx_ds_replication_layer:shard_id(), - metrics_id :: emqx_ds_builtin_metrics:shard_metrics_id(), - n_retries = 0 :: non_neg_integer(), - %% FIXME: Currently max_retries is always 0, because replication - %% layer doesn't guarantee idempotency. Retrying would create - %% duplicate messages. - max_retries = 0 :: non_neg_integer(), - n = 0 :: non_neg_integer(), - n_bytes = 0 :: non_neg_integer(), - tref :: undefined | reference(), - queue :: queue:queue(emqx_types:message()), - pending_replies = [] :: [gen_server:from()] -}). - -init([DB, Shard]) -> - process_flag(trap_exit, true), - process_flag(message_queue_data, off_heap), - logger:update_process_metadata(#{domain => [emqx, ds, egress, DB]}), - MetricsId = emqx_ds_builtin_metrics:shard_metric_id(DB, Shard), - ok = emqx_ds_builtin_metrics:init_for_shard(MetricsId), - S = #s{ - db = DB, - shard = Shard, - metrics_id = MetricsId, - queue = queue:new() - }, - {ok, S}. - -format_status(Status) -> - maps:map( - fun - (state, #s{db = DB, shard = Shard, queue = Q}) -> - #{ - db => DB, - shard => Shard, - queue => queue:len(Q) - }; - (_, Val) -> - Val - end, - Status - ). - -handle_call( - #enqueue_req{ - messages = Msgs, - sync = Sync, - atomic = Atomic, - n_messages = NMsgs, - payload_bytes = NBytes - }, - From, - S0 = #s{pending_replies = Replies0} -) -> - S = S0#s{pending_replies = [From | Replies0]}, - {noreply, enqueue(Sync, Atomic, Msgs, NMsgs, NBytes, S)}; -handle_call(_Call, _From, S) -> - {reply, {error, unknown_call}, S}. - -handle_cast( - #enqueue_req{ - messages = Msgs, - sync = Sync, - atomic = Atomic, - n_messages = NMsgs, - payload_bytes = NBytes - }, - S -) -> - {noreply, enqueue(Sync, Atomic, Msgs, NMsgs, NBytes, S)}; -handle_cast(_Cast, S) -> - {noreply, S}. - -handle_info(?flush, S) -> - {noreply, flush(S)}; -handle_info(_Info, S) -> - {noreply, S}. - -terminate(_Reason, _S) -> - ok. - -%%================================================================================ -%% Internal exports -%%================================================================================ - -%%================================================================================ -%% Internal functions -%%================================================================================ - -enqueue( - Sync, - Atomic, - Msgs, - BatchSize, - BatchBytes, - S0 = #s{n = NMsgs0, n_bytes = NBytes0, queue = Q0} -) -> - %% At this point we don't split the batches, even when they aren't - %% atomic. It wouldn't win us anything in terms of memory, and - %% EMQX currently feeds data to DS in very small batches, so - %% granularity should be fine enough. - NMax = application:get_env(emqx_durable_storage, egress_batch_size, 1000), - NBytesMax = application:get_env(emqx_durable_storage, egress_batch_bytes, infinity), - NMsgs = NMsgs0 + BatchSize, - NBytes = NBytes0 + BatchBytes, - case (NMsgs >= NMax orelse NBytes >= NBytesMax) andalso (NMsgs0 > 0) of - true -> - %% Adding this batch would cause buffer to overflow. Flush - %% it now, and retry: - S1 = flush(S0), - enqueue(Sync, Atomic, Msgs, BatchSize, BatchBytes, S1); - false -> - %% The buffer is empty, we enqueue the atomic batch in its - %% entirety: - Q1 = lists:foldl(fun queue:in/2, Q0, Msgs), - S1 = S0#s{n = NMsgs, n_bytes = NBytes, queue = Q1}, - case NMsgs >= NMax orelse NBytes >= NBytesMax of - true -> - flush(S1); - false -> - ensure_timer(S1) - end - end. - --define(COOLDOWN_MIN, 1000). --define(COOLDOWN_MAX, 5000). - -flush(S) -> - do_flush(cancel_timer(S)). - -do_flush(S0 = #s{n = 0}) -> - S0; -do_flush( - S = #s{ - queue = Q, - pending_replies = Replies, - db = DB, - shard = Shard, - metrics_id = Metrics, - n_retries = Retries, - max_retries = MaxRetries - } -) -> - Messages = queue:to_list(Q), - T0 = erlang:monotonic_time(microsecond), - Result = emqx_ds_replication_layer:ra_store_batch(DB, Shard, Messages), - T1 = erlang:monotonic_time(microsecond), - emqx_ds_builtin_metrics:observe_egress_flush_time(Metrics, T1 - T0), - case Result of - ok -> - emqx_ds_builtin_metrics:inc_egress_batches(Metrics), - emqx_ds_builtin_metrics:inc_egress_messages(Metrics, S#s.n), - emqx_ds_builtin_metrics:inc_egress_bytes(Metrics, S#s.n_bytes), - ?tp( - emqx_ds_replication_layer_egress_flush, - #{db => DB, shard => Shard, batch => Messages} - ), - lists:foreach(fun(From) -> gen_server:reply(From, ok) end, Replies), - erlang:garbage_collect(), - S#s{ - n = 0, - n_bytes = 0, - queue = queue:new(), - pending_replies = [] - }; - {timeout, ServerId} when Retries < MaxRetries -> - %% Note: this is a hot loop, so we report error messages - %% with `debug' level to avoid wiping the logs. Instead, - %% error the detection must rely on the metrics. Debug - %% logging can be enabled for the particular egress server - %% via logger domain. - ?tp( - debug, - emqx_ds_replication_layer_egress_flush_retry, - #{db => DB, shard => Shard, reason => timeout, server_id => ServerId} - ), - %% Retry sending the batch: - emqx_ds_builtin_metrics:inc_egress_batches_retry(Metrics), - erlang:garbage_collect(), - %% We block the gen_server until the next retry. - BlockTime = ?COOLDOWN_MIN + rand:uniform(?COOLDOWN_MAX - ?COOLDOWN_MIN), - timer:sleep(BlockTime), - S#s{n_retries = Retries + 1}; - Err -> - ?tp( - debug, - emqx_ds_replication_layer_egress_flush_failed, - #{db => DB, shard => Shard, error => Err} - ), - emqx_ds_builtin_metrics:inc_egress_batches_failed(Metrics), - Reply = - case Err of - {error, _, _} -> Err; - {timeout, ServerId} -> {error, recoverable, {timeout, ServerId}}; - _ -> {error, unrecoverable, Err} - end, - lists:foreach( - fun(From) -> gen_server:reply(From, Reply) end, Replies - ), - erlang:garbage_collect(), - S#s{ - n = 0, - n_bytes = 0, - queue = queue:new(), - pending_replies = [], - n_retries = 0 - } - end. - --spec shards_of_batch(emqx_ds:db(), [emqx_types:message()]) -> - [{emqx_ds_replication_layer:shard_id(), {NMessages, NBytes}}] -when - NMessages :: non_neg_integer(), - NBytes :: non_neg_integer(). -shards_of_batch(DB, Messages) -> - maps:to_list( - lists:foldl( - fun(Message, Acc) -> - %% TODO: sharding strategy must be part of the DS DB schema: - Shard = emqx_ds_replication_layer:shard_of_message(DB, Message, clientid), - Size = payload_size(Message), - maps:update_with( - Shard, - fun({N, S}) -> - {N + 1, S + Size} - end, - {1, Size}, - Acc - ) - end, - #{}, - Messages - ) - ). - -repackage_messages(DB, Messages, Sync) -> - Batches = lists:foldl( - fun(Message, Acc) -> - Shard = emqx_ds_replication_layer:shard_of_message(DB, Message, clientid), - Size = payload_size(Message), - maps:update_with( - Shard, - fun({N, S, Msgs}) -> - {N + 1, S + Size, [Message | Msgs]} - end, - {1, Size, [Message]}, - Acc - ) - end, - #{}, - Messages - ), - maps:fold( - fun(Shard, {NMsgs, ByteSize, RevMessages}, ErrAcc) -> - Err = enqueue_call_or_cast( - ?via(DB, Shard), - #enqueue_req{ - messages = lists:reverse(RevMessages), - sync = Sync, - atomic = false, - n_messages = NMsgs, - payload_bytes = ByteSize - } - ), - compose_errors(ErrAcc, Err) - end, - ok, - Batches - ). - -enqueue_call_or_cast(To, Req = #enqueue_req{sync = true}) -> - gen_server:call(To, Req, infinity); -enqueue_call_or_cast(To, Req = #enqueue_req{sync = false}) -> - gen_server:cast(To, Req). - -compose_errors(ErrAcc, ok) -> - ErrAcc; -compose_errors(ok, Err) -> - Err; -compose_errors({error, recoverable, _}, {error, unrecoverable, Err}) -> - {error, unrecoverable, Err}; -compose_errors(ErrAcc, _Err) -> - ErrAcc. - -ensure_timer(S = #s{tref = undefined}) -> - Interval = application:get_env(emqx_durable_storage, egress_flush_interval, 100), - Tref = erlang:send_after(Interval, self(), ?flush), - S#s{tref = Tref}; -ensure_timer(S) -> - S. - -cancel_timer(S = #s{tref = undefined}) -> - S; -cancel_timer(S = #s{tref = TRef}) -> - _ = erlang:cancel_timer(TRef), - S#s{tref = undefined}. - -%% @doc Return approximate size of the MQTT message (it doesn't take -%% all things into account, for example headers and extras) -payload_size(#message{payload = P, topic = T}) -> - size(P) + size(T). diff --git a/apps/emqx_ds_builtin_raft/test/emqx_ds_replication_SUITE.erl b/apps/emqx_ds_builtin_raft/test/emqx_ds_replication_SUITE.erl index 978da91a4..3bb2ba4c4 100644 --- a/apps/emqx_ds_builtin_raft/test/emqx_ds_replication_SUITE.erl +++ b/apps/emqx_ds_builtin_raft/test/emqx_ds_replication_SUITE.erl @@ -630,8 +630,8 @@ t_error_mapping_replication_layer(_Config) -> ?assertMatch(ok, emqx_ds:store_batch(DB, Msgs)), - ?block_until(#{?snk_kind := emqx_ds_replication_layer_egress_flush, shard := Shard1}), - ?block_until(#{?snk_kind := emqx_ds_replication_layer_egress_flush, shard := Shard2}), + ?block_until(#{?snk_kind := emqx_ds_buffer_flush, shard := Shard1}), + ?block_until(#{?snk_kind := emqx_ds_buffer_flush, shard := Shard2}), Streams0 = emqx_ds:get_streams(DB, TopicFilter, 0), Iterators0 = lists:map( diff --git a/apps/emqx_durable_storage/src/emqx_ds_buffer.erl b/apps/emqx_durable_storage/src/emqx_ds_buffer.erl index f6f6c6241..f0cf4fe83 100644 --- a/apps/emqx_durable_storage/src/emqx_ds_buffer.erl +++ b/apps/emqx_durable_storage/src/emqx_ds_buffer.erl @@ -21,7 +21,7 @@ -behaviour(gen_server). %% API: --export([start_link/4, store_batch/3]). +-export([start_link/4, store_batch/3, shard_of_message/3]). %% behavior callbacks: -export([init/1, format_status/1, handle_call/3, handle_cast/2, handle_info/2, terminate/2]). @@ -63,7 +63,7 @@ %% API functions %%================================================================================ --spec start_link(module(), _CallbackOptions, emqx_ds:db(), emqx_ds_replication_layer:shard_id()) -> +-spec start_link(module(), _CallbackOptions, emqx_ds:db(), _ShardId) -> {ok, pid()}. start_link(CallbackModule, CallbackOptions, DB, Shard) -> gen_server:start_link( @@ -99,6 +99,11 @@ store_batch(DB, Messages, Opts) -> repackage_messages(DB, Messages, Sync) end. +-spec shard_of_message(emqx_ds:db(), emqx_types:message(), clientid | topic) -> _Shard. +shard_of_message(DB, Message, ShardBy) -> + {CBM, Options} = persistent_term:get(?cbm(DB)), + CBM:shard_of_message(DB, Message, ShardBy, Options). + %%================================================================================ %% behavior callbacks %%================================================================================ @@ -107,7 +112,7 @@ store_batch(DB, Messages, Opts) -> callback_module :: module(), callback_state :: term(), db :: emqx_ds:db(), - shard :: emqx_ds_replication_layer:shard_id(), + shard :: _ShardId, metrics_id :: emqx_ds_builtin_metrics:shard_metrics_id(), n_retries = 0 :: non_neg_integer(), %% FIXME: Currently max_retries is always 0, because replication @@ -124,7 +129,7 @@ store_batch(DB, Messages, Opts) -> init([CBM, CBMOptions, DB, Shard]) -> process_flag(trap_exit, true), process_flag(message_queue_data, off_heap), - logger:update_process_metadata(#{domain => [emqx, ds, egress, DB]}), + logger:update_process_metadata(#{domain => [emqx, ds, buffer, DB]}), MetricsId = emqx_ds_builtin_metrics:shard_metric_id(DB, Shard), ok = emqx_ds_builtin_metrics:init_for_shard(MetricsId), {ok, CallbackS} = CBM:init_buffer(DB, Shard, CBMOptions), @@ -236,10 +241,6 @@ enqueue( end end. -shard_of_message(DB, Message, ShardBy) -> - {CBM, Options} = persistent_term:get(?cbm(DB)), - CBM:shard_of_message(DB, Message, ShardBy, Options). - -define(COOLDOWN_MIN, 1000). -define(COOLDOWN_MAX, 5000). @@ -273,7 +274,7 @@ do_flush( emqx_ds_builtin_metrics:inc_egress_messages(Metrics, S#s.n), emqx_ds_builtin_metrics:inc_egress_bytes(Metrics, S#s.n_bytes), ?tp( - emqx_ds_replication_layer_egress_flush, + emqx_ds_buffer_flush, #{db => DB, shard => Shard, batch => Messages} ), lists:foreach(fun(From) -> gen_server:reply(From, ok) end, Replies), @@ -285,7 +286,7 @@ do_flush( queue = queue:new(), pending_replies = [] }; - {timeout, ServerId} when Retries < MaxRetries -> + {error, recoverable, Err} when Retries < MaxRetries -> %% Note: this is a hot loop, so we report error messages %% with `debug' level to avoid wiping the logs. Instead, %% error the detection must rely on the metrics. Debug @@ -293,8 +294,8 @@ do_flush( %% via logger domain. ?tp( debug, - emqx_ds_replication_layer_egress_flush_retry, - #{db => DB, shard => Shard, reason => timeout, server_id => ServerId} + emqx_ds_buffer_flush_retry, + #{db => DB, shard => Shard, reason => Err} ), %% Retry sending the batch: emqx_ds_builtin_metrics:inc_egress_batches_retry(Metrics), @@ -306,7 +307,7 @@ do_flush( Err -> ?tp( debug, - emqx_ds_replication_layer_egress_flush_failed, + emqx_ds_buffer_flush_failed, #{db => DB, shard => Shard, error => Err} ), emqx_ds_builtin_metrics:inc_egress_batches_failed(Metrics), @@ -330,7 +331,7 @@ do_flush( end. -spec shards_of_batch(emqx_ds:db(), [emqx_types:message()]) -> - [{emqx_ds_replication_layer:shard_id(), {NMessages, NBytes}}] + [{_ShardId, {NMessages, NBytes}}] when NMessages :: non_neg_integer(), NBytes :: non_neg_integer(). diff --git a/apps/emqx_durable_storage/test/emqx_ds_storage_bitfield_lts_SUITE.erl b/apps/emqx_durable_storage/test/emqx_ds_storage_bitfield_lts_SUITE.erl index bf820e0bf..54033ae78 100644 --- a/apps/emqx_durable_storage/test/emqx_ds_storage_bitfield_lts_SUITE.erl +++ b/apps/emqx_durable_storage/test/emqx_ds_storage_bitfield_lts_SUITE.erl @@ -276,7 +276,7 @@ t_atomic_store_batch(_Config) -> %% Must contain exactly one flush with all messages. ?assertMatch( [#{batch := [_, _, _]}], - ?of_kind(emqx_ds_replication_layer_egress_flush, Trace) + ?of_kind(emqx_ds_buffer_flush, Trace) ), ok end @@ -305,7 +305,7 @@ t_non_atomic_store_batch(_Config) -> end, fun(ExpectedMsgs, Trace) -> ProcessedMsgs = lists:append( - ?projection(batch, ?of_kind(emqx_ds_replication_layer_egress_flush, Trace)) + ?projection(batch, ?of_kind(emqx_ds_buffer_flush, Trace)) ), ?assertEqual( ExpectedMsgs, diff --git a/apps/emqx_durable_storage/test/emqx_ds_test_helpers.erl b/apps/emqx_durable_storage/test/emqx_ds_test_helpers.erl index 5e7753058..7130041ec 100644 --- a/apps/emqx_durable_storage/test/emqx_ds_test_helpers.erl +++ b/apps/emqx_durable_storage/test/emqx_ds_test_helpers.erl @@ -310,7 +310,7 @@ nodes_of_clientid(DB, ClientId, Nodes = [N0 | _]) -> shard_of_clientid(DB, Node, ClientId) -> ?ON( Node, - emqx_ds_replication_layer:shard_of_message(DB, #message{from = ClientId}, clientid) + emqx_ds_buffer:shard_of_message(DB, #message{from = ClientId}, clientid) ). %% Consume eagerly: From ecb172b07e6d1f33d29a5c904072db2519de2d2d Mon Sep 17 00:00:00 2001 From: ieQu1 <99872536+ieQu1@users.noreply.github.com> Date: Fri, 14 Jun 2024 02:03:50 +0200 Subject: [PATCH 09/26] refactor(ds): Rename egress metrics to 'buffer' --- .../include/emqx_ds_metrics.hrl | 12 ++++---- .../src/emqx_ds_builtin_metrics.erl | 28 +++++++++---------- apps/emqx_prometheus/src/emqx_prometheus.erl | 12 ++++---- changes/ce/breaking-13248.en.md | 9 ++++++ 4 files changed, 35 insertions(+), 26 deletions(-) diff --git a/apps/emqx_durable_storage/include/emqx_ds_metrics.hrl b/apps/emqx_durable_storage/include/emqx_ds_metrics.hrl index 0a82a6682..a76289eb9 100644 --- a/apps/emqx_durable_storage/include/emqx_ds_metrics.hrl +++ b/apps/emqx_durable_storage/include/emqx_ds_metrics.hrl @@ -19,17 +19,17 @@ %%%% Egress metrics: %% Number of successfully flushed batches: --define(DS_EGRESS_BATCHES, emqx_ds_egress_batches). +-define(DS_BUFFER_BATCHES, emqx_ds_buffer_batches). %% Number of batch flush retries: --define(DS_EGRESS_BATCHES_RETRY, emqx_ds_egress_batches_retry). +-define(DS_BUFFER_BATCHES_RETRY, emqx_ds_buffer_batches_retry). %% Number of batches that weren't flushed due to unrecoverable errors: --define(DS_EGRESS_BATCHES_FAILED, emqx_ds_egress_batches_failed). +-define(DS_BUFFER_BATCHES_FAILED, emqx_ds_buffer_batches_failed). %% Total number of messages that were successfully committed to the storage: --define(DS_EGRESS_MESSAGES, emqx_ds_egress_messages). +-define(DS_BUFFER_MESSAGES, emqx_ds_buffer_messages). %% Total size of payloads that were successfully committed to the storage: --define(DS_EGRESS_BYTES, emqx_ds_egress_bytes). +-define(DS_BUFFER_BYTES, emqx_ds_buffer_bytes). %% Sliding average of flush time (microseconds): --define(DS_EGRESS_FLUSH_TIME, emqx_ds_egress_flush_time). +-define(DS_BUFFER_FLUSH_TIME, emqx_ds_buffer_flush_time). %%%% Storage layer metrics: -define(DS_STORE_BATCH_TIME, emqx_ds_store_batch_time). diff --git a/apps/emqx_durable_storage/src/emqx_ds_builtin_metrics.erl b/apps/emqx_durable_storage/src/emqx_ds_builtin_metrics.erl index a69ae22c2..2d9f9ea16 100644 --- a/apps/emqx_durable_storage/src/emqx_ds_builtin_metrics.erl +++ b/apps/emqx_durable_storage/src/emqx_ds_builtin_metrics.erl @@ -68,16 +68,16 @@ -define(DB_METRICS, ?STORAGE_LAYER_METRICS ++ ?FETCH_METRICS). --define(EGRESS_METRICS, [ - {counter, ?DS_EGRESS_BATCHES}, - {counter, ?DS_EGRESS_BATCHES_RETRY}, - {counter, ?DS_EGRESS_BATCHES_FAILED}, - {counter, ?DS_EGRESS_MESSAGES}, - {counter, ?DS_EGRESS_BYTES}, - {slide, ?DS_EGRESS_FLUSH_TIME} +-define(BUFFER_METRICS, [ + {counter, ?DS_BUFFER_BATCHES}, + {counter, ?DS_BUFFER_BATCHES_RETRY}, + {counter, ?DS_BUFFER_BATCHES_FAILED}, + {counter, ?DS_BUFFER_MESSAGES}, + {counter, ?DS_BUFFER_BYTES}, + {slide, ?DS_BUFFER_FLUSH_TIME} ]). --define(SHARD_METRICS, ?EGRESS_METRICS). +-define(SHARD_METRICS, ?BUFFER_METRICS). -type shard_metrics_id() :: binary(). @@ -108,35 +108,35 @@ init_for_shard(ShardId) -> %% @doc Increase the number of successfully flushed batches -spec inc_egress_batches(shard_metrics_id()) -> ok. inc_egress_batches(Id) -> - catch emqx_metrics_worker:inc(?WORKER, Id, ?DS_EGRESS_BATCHES). + catch emqx_metrics_worker:inc(?WORKER, Id, ?DS_BUFFER_BATCHES). %% @doc Increase the number of time the egress worker had to retry %% flushing the batch -spec inc_egress_batches_retry(shard_metrics_id()) -> ok. inc_egress_batches_retry(Id) -> - catch emqx_metrics_worker:inc(?WORKER, Id, ?DS_EGRESS_BATCHES_RETRY). + catch emqx_metrics_worker:inc(?WORKER, Id, ?DS_BUFFER_BATCHES_RETRY). %% @doc Increase the number of time the egress worker encountered an %% unrecoverable error while trying to flush the batch -spec inc_egress_batches_failed(shard_metrics_id()) -> ok. inc_egress_batches_failed(Id) -> - catch emqx_metrics_worker:inc(?WORKER, Id, ?DS_EGRESS_BATCHES_FAILED). + catch emqx_metrics_worker:inc(?WORKER, Id, ?DS_BUFFER_BATCHES_FAILED). %% @doc Increase the number of messages successfully saved to the shard -spec inc_egress_messages(shard_metrics_id(), non_neg_integer()) -> ok. inc_egress_messages(Id, NMessages) -> - catch emqx_metrics_worker:inc(?WORKER, Id, ?DS_EGRESS_MESSAGES, NMessages). + catch emqx_metrics_worker:inc(?WORKER, Id, ?DS_BUFFER_MESSAGES, NMessages). %% @doc Increase the number of messages successfully saved to the shard -spec inc_egress_bytes(shard_metrics_id(), non_neg_integer()) -> ok. inc_egress_bytes(Id, NMessages) -> - catch emqx_metrics_worker:inc(?WORKER, Id, ?DS_EGRESS_BYTES, NMessages). + catch emqx_metrics_worker:inc(?WORKER, Id, ?DS_BUFFER_BYTES, NMessages). %% @doc Add a sample of elapsed time spent flushing the egress to the %% Raft log (in microseconds) -spec observe_egress_flush_time(shard_metrics_id(), non_neg_integer()) -> ok. observe_egress_flush_time(Id, FlushTime) -> - catch emqx_metrics_worker:observe(?WORKER, Id, ?DS_EGRESS_FLUSH_TIME, FlushTime). + catch emqx_metrics_worker:observe(?WORKER, Id, ?DS_BUFFER_FLUSH_TIME, FlushTime). -spec observe_store_batch_time(emqx_ds_storage_layer:shard_id(), non_neg_integer()) -> ok. observe_store_batch_time({DB, _}, StoreTime) -> diff --git a/apps/emqx_prometheus/src/emqx_prometheus.erl b/apps/emqx_prometheus/src/emqx_prometheus.erl index f4d0ff2c0..5d88ebd17 100644 --- a/apps/emqx_prometheus/src/emqx_prometheus.erl +++ b/apps/emqx_prometheus/src/emqx_prometheus.erl @@ -504,12 +504,12 @@ emqx_collect(K = emqx_mria_bootstrap_num_keys, D) -> gauge_metrics(?MG(K, D, []) emqx_collect(K = emqx_mria_message_queue_len, D) -> gauge_metrics(?MG(K, D, [])); emqx_collect(K = emqx_mria_replayq_len, D) -> gauge_metrics(?MG(K, D, [])); %% DS -emqx_collect(K = ?DS_EGRESS_BATCHES, D) -> counter_metrics(?MG(K, D, [])); -emqx_collect(K = ?DS_EGRESS_BATCHES_RETRY, D) -> counter_metrics(?MG(K, D, [])); -emqx_collect(K = ?DS_EGRESS_BATCHES_FAILED, D) -> counter_metrics(?MG(K, D, [])); -emqx_collect(K = ?DS_EGRESS_MESSAGES, D) -> counter_metrics(?MG(K, D, [])); -emqx_collect(K = ?DS_EGRESS_BYTES, D) -> counter_metrics(?MG(K, D, [])); -emqx_collect(K = ?DS_EGRESS_FLUSH_TIME, D) -> gauge_metrics(?MG(K, D, [])); +emqx_collect(K = ?DS_BUFFER_BATCHES, D) -> counter_metrics(?MG(K, D, [])); +emqx_collect(K = ?DS_BUFFER_BATCHES_RETRY, D) -> counter_metrics(?MG(K, D, [])); +emqx_collect(K = ?DS_BUFFER_BATCHES_FAILED, D) -> counter_metrics(?MG(K, D, [])); +emqx_collect(K = ?DS_BUFFER_MESSAGES, D) -> counter_metrics(?MG(K, D, [])); +emqx_collect(K = ?DS_BUFFER_BYTES, D) -> counter_metrics(?MG(K, D, [])); +emqx_collect(K = ?DS_BUFFER_FLUSH_TIME, D) -> gauge_metrics(?MG(K, D, [])); emqx_collect(K = ?DS_STORE_BATCH_TIME, D) -> gauge_metrics(?MG(K, D, [])); emqx_collect(K = ?DS_BUILTIN_NEXT_TIME, D) -> gauge_metrics(?MG(K, D, [])); emqx_collect(K = ?DS_LTS_SEEK_COUNTER, D) -> counter_metrics(?MG(K, D, [])); diff --git a/changes/ce/breaking-13248.en.md b/changes/ce/breaking-13248.en.md index 9f2ad2bd8..731196e0b 100644 --- a/changes/ce/breaking-13248.en.md +++ b/changes/ce/breaking-13248.en.md @@ -5,3 +5,12 @@ This backend is available in both open source and enterprise editions. - `builtin_raft`: A durable storage backend that uses Raft algorithm for replication. This backend is available enterprise edition. + +The following Prometheus metrics have been renamed: + +- `emqx_ds_egress_batches` -> `emqx_ds_buffer_batches` +- `emqx_ds_egress_batches_retry` -> `emqx_ds_buffer_batches_retry` +- `emqx_ds_egress_batches_failed` -> `emqx_ds_buffer_batches_failed` +- `emqx_ds_egress_messages` -> `emqx_ds_buffer_messages` +- `emqx_ds_egress_bytes` -> `emqx_ds_buffer_bytes` +- `emqx_ds_egress_flush_time` -> `emqx_ds_buffer_flush_time` From abe41de19b7ba3b1b4ae7fcd79ba42c25be2a065 Mon Sep 17 00:00:00 2001 From: ieQu1 <99872536+ieQu1@users.noreply.github.com> Date: Fri, 14 Jun 2024 10:46:36 +0200 Subject: [PATCH 10/26] refactor(ds_schema): builtin_local_buffer -> builtin_buffer --- apps/emqx/src/emqx_ds_schema.erl | 14 +++++++------- rel/i18n/emqx_ds_schema.hocon | 12 ++++++------ 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/apps/emqx/src/emqx_ds_schema.erl b/apps/emqx/src/emqx_ds_schema.erl index 2fd4752bd..6f766c45e 100644 --- a/apps/emqx/src/emqx_ds_schema.erl +++ b/apps/emqx/src/emqx_ds_schema.erl @@ -160,10 +160,10 @@ fields(builtin) -> )}, {local_write_buffer, sc( - ref(builtin_local_write_buffer), + ref(builtin_write_buffer), #{ importance => ?IMPORTANCE_HIDDEN, - desc => ?DESC(builtin_local_write_buffer) + desc => ?DESC(builtin_write_buffer) } )}, {layout, @@ -179,7 +179,7 @@ fields(builtin) -> } )} ]; -fields(builtin_local_write_buffer) -> +fields(builtin_write_buffer) -> [ {max_items, sc( @@ -188,7 +188,7 @@ fields(builtin_local_write_buffer) -> default => 1000, mapping => "emqx_durable_storage.egress_batch_size", importance => ?IMPORTANCE_HIDDEN, - desc => ?DESC(builtin_local_write_buffer_max_items) + desc => ?DESC(builtin_write_buffer_max_items) } )}, {flush_interval, @@ -198,7 +198,7 @@ fields(builtin_local_write_buffer) -> default => 100, mapping => "emqx_durable_storage.egress_flush_interval", importance => ?IMPORTANCE_HIDDEN, - desc => ?DESC(builtin_local_write_buffer_flush_interval) + desc => ?DESC(builtin_write_buffer_flush_interval) } )} ]; @@ -254,8 +254,8 @@ fields(layout_builtin_reference) -> desc(builtin) -> ?DESC(builtin); -desc(builtin_local_write_buffer) -> - ?DESC(builtin_local_write_buffer); +desc(builtin_write_buffer) -> + ?DESC(builtin_write_buffer); desc(layout_builtin_wildcard_optimized) -> ?DESC(layout_builtin_wildcard_optimized); desc(layout_builtin_reference) -> diff --git a/rel/i18n/emqx_ds_schema.hocon b/rel/i18n/emqx_ds_schema.hocon index 65b76b6fa..7875295e6 100644 --- a/rel/i18n/emqx_ds_schema.hocon +++ b/rel/i18n/emqx_ds_schema.hocon @@ -39,21 +39,21 @@ builtin_n_sites.desc: During this phase at least that many sites should come online to distribute shards between them, otherwise message storage will be unavailable until then. After the initialization is complete, sites may be offline, which will affect availability depending on the number of offline sites and replication factor.~""" -builtin_local_write_buffer.label: "Local write buffer" -builtin_local_write_buffer.desc: +builtin_write_buffer.label: "Local write buffer" +builtin_write_buffer.desc: """~ Configuration related to the buffering of messages sent from the local node to the shard leader. EMQX accumulates PUBLISH messages from the local clients in a write buffer before committing them to the durable storage. This helps to hide network latency between EMQX nodes and improves write throughput.~""" -builtin_local_write_buffer_max_items.label: "Max items" -builtin_local_write_buffer_max_items.desc: +builtin_write_buffer_max_items.label: "Max items" +builtin_write_buffer_max_items.desc: """~ This configuration parameter defines maximum number of messages stored in the local write buffer.~""" -builtin_local_write_buffer_flush_interval.label: "Flush interval" -builtin_local_write_buffer_flush_interval.desc: +builtin_write_buffer_flush_interval.label: "Flush interval" +builtin_write_buffer_flush_interval.desc: """~ Maximum linger time for the buffered messages. Local write buffer will be flushed _at least_ as often as `flush_interval`. From ea48b1265d63accb5d45661a729ccd6d83a4a311 Mon Sep 17 00:00:00 2001 From: ieQu1 <99872536+ieQu1@users.noreply.github.com> Date: Fri, 14 Jun 2024 10:49:37 +0200 Subject: [PATCH 11/26] refactor(ds_schema): Extract common builtin fields --- apps/emqx/src/emqx_ds_schema.erl | 83 +++++++++++++++++--------------- 1 file changed, 44 insertions(+), 39 deletions(-) diff --git a/apps/emqx/src/emqx_ds_schema.erl b/apps/emqx/src/emqx_ds_schema.erl index 6f766c45e..5ec7910eb 100644 --- a/apps/emqx/src/emqx_ds_schema.erl +++ b/apps/emqx/src/emqx_ds_schema.erl @@ -112,25 +112,6 @@ fields(builtin) -> importance => ?IMPORTANCE_HIDDEN } )}, - {data_dir, - sc( - string(), - #{ - mapping => "emqx_durable_storage.db_data_dir", - required => false, - importance => ?IMPORTANCE_MEDIUM, - desc => ?DESC(builtin_data_dir) - } - )}, - {n_shards, - sc( - pos_integer(), - #{ - default => 12, - importance => ?IMPORTANCE_MEDIUM, - desc => ?DESC(builtin_n_shards) - } - )}, %% TODO: Deprecate once cluster management and rebalancing is implemented. {"n_sites", sc( @@ -157,27 +138,8 @@ fields(builtin) -> default => #{}, importance => ?IMPORTANCE_HIDDEN } - )}, - {local_write_buffer, - sc( - ref(builtin_write_buffer), - #{ - importance => ?IMPORTANCE_HIDDEN, - desc => ?DESC(builtin_write_buffer) - } - )}, - {layout, - sc( - hoconsc:union(builtin_layouts()), - #{ - desc => ?DESC(builtin_layout), - importance => ?IMPORTANCE_MEDIUM, - default => - #{ - <<"type">> => wildcard_optimized - } - } )} + | common_builtin_fields() ]; fields(builtin_write_buffer) -> [ @@ -252,6 +214,49 @@ fields(layout_builtin_reference) -> )} ]. +common_builtin_fields() -> + [ + {data_dir, + sc( + string(), + #{ + mapping => "emqx_durable_storage.db_data_dir", + required => false, + importance => ?IMPORTANCE_MEDIUM, + desc => ?DESC(builtin_data_dir) + } + )}, + {n_shards, + sc( + pos_integer(), + #{ + default => 12, + importance => ?IMPORTANCE_MEDIUM, + desc => ?DESC(builtin_n_shards) + } + )}, + {local_write_buffer, + sc( + ref(builtin_write_buffer), + #{ + importance => ?IMPORTANCE_HIDDEN, + desc => ?DESC(builtin_write_buffer) + } + )}, + {layout, + sc( + hoconsc:union(builtin_layouts()), + #{ + desc => ?DESC(builtin_layout), + importance => ?IMPORTANCE_MEDIUM, + default => + #{ + <<"type">> => wildcard_optimized + } + } + )} + ]. + desc(builtin) -> ?DESC(builtin); desc(builtin_write_buffer) -> From 8990b1312be6cbffc8fd6863efbc153dac9c0f5f Mon Sep 17 00:00:00 2001 From: ieQu1 <99872536+ieQu1@users.noreply.github.com> Date: Fri, 14 Jun 2024 10:53:55 +0200 Subject: [PATCH 12/26] refactor(ds_schema): Rename backend builtin -> builtin_raft --- apps/emqx/src/emqx_ds_schema.erl | 26 +++++++++++++------------- rel/i18n/emqx_ds_schema.hocon | 10 +++++----- 2 files changed, 18 insertions(+), 18 deletions(-) diff --git a/apps/emqx/src/emqx_ds_schema.erl b/apps/emqx/src/emqx_ds_schema.erl index 5ec7910eb..46368477f 100644 --- a/apps/emqx/src/emqx_ds_schema.erl +++ b/apps/emqx/src/emqx_ds_schema.erl @@ -18,7 +18,7 @@ -module(emqx_ds_schema). %% API: --export([schema/0, translate_builtin/1]). +-export([schema/0, translate_builtin_raft/1]). %% Behavior callbacks: -export([fields/1, desc/1, namespace/0]). @@ -36,9 +36,9 @@ %% API %%================================================================================ -translate_builtin( +translate_builtin_raft( Backend = #{ - backend := builtin, + backend := builtin_raft, n_shards := NShards, n_sites := NSites, replication_factor := ReplFactor, @@ -83,24 +83,24 @@ schema() -> ds_schema(#{ default => #{ - <<"backend">> => builtin + <<"backend">> => builtin_raft }, importance => ?IMPORTANCE_MEDIUM, desc => ?DESC(messages) })} ]. -fields(builtin) -> - %% Schema for the builtin backend: +fields(builtin_raft) -> + %% Schema for the builtin_raft backend: [ {backend, sc( - builtin, + builtin_raft, #{ 'readOnly' => true, - default => builtin, + default => builtin_raft, importance => ?IMPORTANCE_MEDIUM, - desc => ?DESC(builtin_backend) + desc => ?DESC(backend_type) } )}, {'_config_handler', @@ -108,7 +108,7 @@ fields(builtin) -> {module(), atom()}, #{ 'readOnly' => true, - default => {?MODULE, translate_builtin}, + default => {?MODULE, translate_builtin_raft}, importance => ?IMPORTANCE_HIDDEN } )}, @@ -257,8 +257,8 @@ common_builtin_fields() -> )} ]. -desc(builtin) -> - ?DESC(builtin); +desc(builtin_raft) -> + ?DESC(builtin_raft); desc(builtin_write_buffer) -> ?DESC(builtin_write_buffer); desc(layout_builtin_wildcard_optimized) -> @@ -275,7 +275,7 @@ desc(_) -> ds_schema(Options) -> sc( hoconsc:union([ - ref(builtin) + ref(builtin_raft) | emqx_schema_hooks:injection_point('durable_storage.backends', []) ]), Options diff --git a/rel/i18n/emqx_ds_schema.hocon b/rel/i18n/emqx_ds_schema.hocon index 7875295e6..cc2bad5a0 100644 --- a/rel/i18n/emqx_ds_schema.hocon +++ b/rel/i18n/emqx_ds_schema.hocon @@ -5,15 +5,15 @@ messages.desc: """~ Configuration related to the durable storage of MQTT messages.~""" -builtin.label: "Builtin backend" -builtin.desc: +builtin_raft.label: "Builtin backend with Raft replication" +builtin_raft.desc: """~ Builtin session storage backend utilizing embedded RocksDB key-value store.~""" -builtin_backend.label: "Backend type" -builtin_backend.desc: +backend_type.label: "Backend type" +backend_type.desc: """~ - Built-in backend.~""" + Backend type.~""" builtin_data_dir.label: "Database location" builtin_data_dir.desc: From 4484f30021f6b6389d8a94ab5e3fc4f7613508dc Mon Sep 17 00:00:00 2001 From: ieQu1 <99872536+ieQu1@users.noreply.github.com> Date: Fri, 14 Jun 2024 11:06:52 +0200 Subject: [PATCH 13/26] feat(ds_schema): Add schema for builtin_local backend --- apps/emqx/src/emqx_ds_schema.erl | 92 ++++++++++++++++++++++++-------- rel/i18n/emqx_ds_schema.hocon | 8 ++- 2 files changed, 76 insertions(+), 24 deletions(-) diff --git a/apps/emqx/src/emqx_ds_schema.erl b/apps/emqx/src/emqx_ds_schema.erl index 46368477f..bc6bf6e25 100644 --- a/apps/emqx/src/emqx_ds_schema.erl +++ b/apps/emqx/src/emqx_ds_schema.erl @@ -18,7 +18,7 @@ -module(emqx_ds_schema). %% API: --export([schema/0, translate_builtin_raft/1]). +-export([schema/0, translate_builtin_raft/1, translate_builtin_local/1]). %% Behavior callbacks: -export([fields/1, desc/1, namespace/0]). @@ -32,6 +32,14 @@ %% Type declarations %%================================================================================ +-if(defined(EMQX_RELEASE_EDITION) andalso EMQX_RELEASE_EDITION == ee). +-define(DEFAULT_BACKEND, builtin_raft). +-define(BUILTIN_BACKENDS, [ref(builtin_raft), ref(builtin_local)]). +-else. +-define(DEFAULT_BACKEND, builtin_local). +-define(BUILTIN_BACKENDS, [ref(builtin_local)]). +-endif. + %%================================================================================ %% API %%================================================================================ @@ -45,29 +53,26 @@ translate_builtin_raft( layout := Layout } ) -> - Storage = - case Layout of - #{ - type := wildcard_optimized, - bits_per_topic_level := BitsPerTopicLevel, - epoch_bits := EpochBits, - topic_index_bytes := TIBytes - } -> - {emqx_ds_storage_bitfield_lts, #{ - bits_per_topic_level => BitsPerTopicLevel, - topic_index_bytes => TIBytes, - epoch_bits => EpochBits - }}; - #{type := reference} -> - {emqx_ds_storage_reference, #{}} - end, #{ backend => builtin_raft, n_shards => NShards, n_sites => NSites, replication_factor => ReplFactor, replication_options => maps:get(replication_options, Backend, #{}), - storage => Storage + storage => translate_layout(Layout) + }. + +translate_builtin_local( + #{ + backend := builtin_local, + n_shards := NShards, + layout := Layout + } +) -> + #{ + backend => builtin_local, + n_shards => NShards, + storage => translate_layout(Layout) }. %%================================================================================ @@ -83,13 +88,37 @@ schema() -> ds_schema(#{ default => #{ - <<"backend">> => builtin_raft + <<"backend">> => ?DEFAULT_BACKEND }, importance => ?IMPORTANCE_MEDIUM, desc => ?DESC(messages) })} ]. +fields(builtin_local) -> + %% Schema for the builtin_raft backend: + [ + {backend, + sc( + builtin_local, + #{ + 'readOnly' => true, + default => builtin_local, + importance => ?IMPORTANCE_MEDIUM, + desc => ?DESC(backend_type) + } + )}, + {'_config_handler', + sc( + {module(), atom()}, + #{ + 'readOnly' => true, + default => {?MODULE, translate_builtin_local}, + importance => ?IMPORTANCE_HIDDEN + } + )} + | common_builtin_fields() + ]; fields(builtin_raft) -> %% Schema for the builtin_raft backend: [ @@ -259,6 +288,8 @@ common_builtin_fields() -> desc(builtin_raft) -> ?DESC(builtin_raft); +desc(builtin_local) -> + ?DESC(builtin_local); desc(builtin_write_buffer) -> ?DESC(builtin_write_buffer); desc(layout_builtin_wildcard_optimized) -> @@ -272,12 +303,27 @@ desc(_) -> %% Internal functions %%================================================================================ +translate_layout( + #{ + type := wildcard_optimized, + bits_per_topic_level := BitsPerTopicLevel, + epoch_bits := EpochBits, + topic_index_bytes := TIBytes + } +) -> + {emqx_ds_storage_bitfield_lts, #{ + bits_per_topic_level => BitsPerTopicLevel, + topic_index_bytes => TIBytes, + epoch_bits => EpochBits + }}; +translate_layout(#{type := reference}) -> + {emqx_ds_storage_reference, #{}}. + ds_schema(Options) -> sc( - hoconsc:union([ - ref(builtin_raft) - | emqx_schema_hooks:injection_point('durable_storage.backends', []) - ]), + hoconsc:union( + ?BUILTIN_BACKENDS ++ emqx_schema_hooks:injection_point('durable_storage.backends', []) + ), Options ). diff --git a/rel/i18n/emqx_ds_schema.hocon b/rel/i18n/emqx_ds_schema.hocon index cc2bad5a0..52268b8ac 100644 --- a/rel/i18n/emqx_ds_schema.hocon +++ b/rel/i18n/emqx_ds_schema.hocon @@ -8,7 +8,13 @@ messages.desc: builtin_raft.label: "Builtin backend with Raft replication" builtin_raft.desc: """~ - Builtin session storage backend utilizing embedded RocksDB key-value store.~""" + Builtin storage backend utilizing embedded RocksDB key-value store.~""" + +builtin_local.label: "Builtin backend" +builtin_local.desc: + """~ + Builtin storage backend utilizing embedded RocksDB key-value store. + This backend doesn't support clustering.~""" backend_type.label: "Backend type" backend_type.desc: From 5a8818edf30351065b12e7b1f1d6883d6fd9c310 Mon Sep 17 00:00:00 2001 From: ieQu1 <99872536+ieQu1@users.noreply.github.com> Date: Fri, 14 Jun 2024 15:47:19 +0200 Subject: [PATCH 14/26] feat(ds): Add schema for builtin_local backend --- apps/emqx/src/emqx_ds_schema.erl | 6 ++- .../test/emqx_ds_backends_SUITE.erl | 43 +++++++++++-------- apps/emqx_machine/src/emqx_machine_boot.erl | 2 + 3 files changed, 31 insertions(+), 20 deletions(-) diff --git a/apps/emqx/src/emqx_ds_schema.erl b/apps/emqx/src/emqx_ds_schema.erl index bc6bf6e25..1c932c822 100644 --- a/apps/emqx/src/emqx_ds_schema.erl +++ b/apps/emqx/src/emqx_ds_schema.erl @@ -32,7 +32,11 @@ %% Type declarations %%================================================================================ --if(defined(EMQX_RELEASE_EDITION) andalso EMQX_RELEASE_EDITION == ee). +-ifndef(EMQX_RELEASE_EDITION). +-define(EMQX_RELEASE_EDITION, ce). +-endif. + +-if(?EMQX_RELEASE_EDITION == ee). -define(DEFAULT_BACKEND, builtin_raft). -define(BUILTIN_BACKENDS, [ref(builtin_raft), ref(builtin_local)]). -else. diff --git a/apps/emqx_ds_backends/test/emqx_ds_backends_SUITE.erl b/apps/emqx_ds_backends/test/emqx_ds_backends_SUITE.erl index d119766f3..ff5495a2d 100644 --- a/apps/emqx_ds_backends/test/emqx_ds_backends_SUITE.erl +++ b/apps/emqx_ds_backends/test/emqx_ds_backends_SUITE.erl @@ -553,9 +553,17 @@ delete(DB, It0, Selector, BatchSize, Acc) -> -if(?EMQX_RELEASE_EDITION == ee). all() -> [{group, builtin_local}, {group, builtin_raft}]. + +%% kernel-10 OTP application (OTP 27) introduces +%% `optional_applications` application spec flag. Once we migrate to +%% OTP27, this won't be needed, as application controller will +%% automatically load raft backend when available: +-define(MAYBE_RAFT, [emqx_ds_builtin_raft]). -else. all() -> [{group, builtin_local}]. + +-define(MAYBE_RAFT, []). -endif. groups() -> @@ -587,25 +595,22 @@ end_per_group(_Group, Config) -> Config. init_per_suite(Config) -> - emqx_common_test_helpers:clear_screen(), - Apps = emqx_cth_suite:start( - [mria, emqx_ds_backends], - #{work_dir => ?config(priv_dir, Config)} - ), - [{apps, Apps} | Config]. - -end_per_suite(Config) -> - ok = emqx_cth_suite:stop(?config(apps, Config)), - ok. - -init_per_testcase(_TC, Config) -> - application:ensure_all_started(emqx_durable_storage), Config. -end_per_testcase(TC, _Config) -> - ok = emqx_ds:drop_db(TC), - snabbkaffe:stop(), - ok = application:stop(emqx_durable_storage), - mria:stop(), - _ = mnesia:delete_schema([node()]), +end_per_suite(_Config) -> + ok. + +init_per_testcase(TC, Config) -> + Apps = emqx_cth_suite:start( + [emqx_durable_storage, emqx_ds_backends | ?MAYBE_RAFT], + #{work_dir => emqx_cth_suite:work_dir(TC, Config)} + ), + ct:pal("Apps: ~p", [Apps]), + [{apps, Apps} | Config]. + +end_per_testcase(TC, Config) -> + ok = emqx_ds:drop_db(TC), + ok = emqx_cth_suite:stop(?config(apps, Config)), + _ = mnesia:delete_schema([node()]), + snabbkaffe:stop(), ok. diff --git a/apps/emqx_machine/src/emqx_machine_boot.erl b/apps/emqx_machine/src/emqx_machine_boot.erl index 777ad2959..0a01c1d20 100644 --- a/apps/emqx_machine/src/emqx_machine_boot.erl +++ b/apps/emqx_machine/src/emqx_machine_boot.erl @@ -188,6 +188,8 @@ runtime_deps() -> {emqx_connector, fun(App) -> lists:prefix("emqx_bridge_", atom_to_list(App)) end}, %% emqx_fdb is an EE app {emqx_durable_storage, emqx_fdb}, + %% emqx_ds_builtin is an EE app + {emqx_ds_backends, emqx_ds_builtin_raft}, {emqx_dashboard, emqx_license} ]. From 99c9b56cf3895e921d6d65792d9fa013ec6c951f Mon Sep 17 00:00:00 2001 From: ieQu1 <99872536+ieQu1@users.noreply.github.com> Date: Fri, 14 Jun 2024 22:00:00 +0200 Subject: [PATCH 15/26] feat(ds_buffer): Add `ls' function to list all local buffers --- .../src/emqx_ds_buffer.erl | 18 ++++--- .../src/emqx_ds_builtin_metrics.erl | 48 +++++++++---------- 2 files changed, 36 insertions(+), 30 deletions(-) diff --git a/apps/emqx_durable_storage/src/emqx_ds_buffer.erl b/apps/emqx_durable_storage/src/emqx_ds_buffer.erl index f0cf4fe83..3fcbec3b9 100644 --- a/apps/emqx_durable_storage/src/emqx_ds_buffer.erl +++ b/apps/emqx_durable_storage/src/emqx_ds_buffer.erl @@ -22,6 +22,7 @@ %% API: -export([start_link/4, store_batch/3, shard_of_message/3]). +-export([ls/0]). %% behavior callbacks: -export([init/1, format_status/1, handle_call/3, handle_cast/2, handle_info/2, terminate/2]). @@ -63,6 +64,11 @@ %% API functions %%================================================================================ +-spec ls() -> [{emqx_ds:db(), _Shard}]. +ls() -> + MS = {{n, l, {?MODULE, '$1', '$2'}}, [], ['$1', '$2']}, + gproc:select({local, names}, [MS]). + -spec start_link(module(), _CallbackOptions, emqx_ds:db(), _ShardId) -> {ok, pid()}. start_link(CallbackModule, CallbackOptions, DB, Shard) -> @@ -267,12 +273,12 @@ do_flush( {CallbackS, Result} = CBM:flush_buffer(DB, Shard, Messages, CallbackS0), S = S0#s{callback_state = CallbackS}, T1 = erlang:monotonic_time(microsecond), - emqx_ds_builtin_metrics:observe_egress_flush_time(Metrics, T1 - T0), + emqx_ds_builtin_metrics:observe_buffer_flush_time(Metrics, T1 - T0), case Result of ok -> - emqx_ds_builtin_metrics:inc_egress_batches(Metrics), - emqx_ds_builtin_metrics:inc_egress_messages(Metrics, S#s.n), - emqx_ds_builtin_metrics:inc_egress_bytes(Metrics, S#s.n_bytes), + emqx_ds_builtin_metrics:inc_buffer_batches(Metrics), + emqx_ds_builtin_metrics:inc_buffer_messages(Metrics, S#s.n), + emqx_ds_builtin_metrics:inc_buffer_bytes(Metrics, S#s.n_bytes), ?tp( emqx_ds_buffer_flush, #{db => DB, shard => Shard, batch => Messages} @@ -298,7 +304,7 @@ do_flush( #{db => DB, shard => Shard, reason => Err} ), %% Retry sending the batch: - emqx_ds_builtin_metrics:inc_egress_batches_retry(Metrics), + emqx_ds_builtin_metrics:inc_buffer_batches_retry(Metrics), erlang:garbage_collect(), %% We block the gen_server until the next retry. BlockTime = ?COOLDOWN_MIN + rand:uniform(?COOLDOWN_MAX - ?COOLDOWN_MIN), @@ -310,7 +316,7 @@ do_flush( emqx_ds_buffer_flush_failed, #{db => DB, shard => Shard, error => Err} ), - emqx_ds_builtin_metrics:inc_egress_batches_failed(Metrics), + emqx_ds_builtin_metrics:inc_buffer_batches_failed(Metrics), Reply = case Err of {error, _, _} -> Err; diff --git a/apps/emqx_durable_storage/src/emqx_ds_builtin_metrics.erl b/apps/emqx_durable_storage/src/emqx_ds_builtin_metrics.erl index 2d9f9ea16..994368df0 100644 --- a/apps/emqx_durable_storage/src/emqx_ds_builtin_metrics.erl +++ b/apps/emqx_durable_storage/src/emqx_ds_builtin_metrics.erl @@ -22,13 +22,13 @@ -export([prometheus_meta/0, prometheus_collect/1]). -export([ - inc_egress_batches/1, - inc_egress_batches_retry/1, - inc_egress_batches_failed/1, - inc_egress_messages/2, - inc_egress_bytes/2, + inc_buffer_batches/1, + inc_buffer_batches_retry/1, + inc_buffer_batches_failed/1, + inc_buffer_messages/2, + inc_buffer_bytes/2, - observe_egress_flush_time/2, + observe_buffer_flush_time/2, observe_store_batch_time/2, @@ -106,36 +106,36 @@ init_for_shard(ShardId) -> emqx_metrics_worker:create_metrics(?WORKER, ShardId, ?SHARD_METRICS, []). %% @doc Increase the number of successfully flushed batches --spec inc_egress_batches(shard_metrics_id()) -> ok. -inc_egress_batches(Id) -> +-spec inc_buffer_batches(shard_metrics_id()) -> ok. +inc_buffer_batches(Id) -> catch emqx_metrics_worker:inc(?WORKER, Id, ?DS_BUFFER_BATCHES). -%% @doc Increase the number of time the egress worker had to retry +%% @doc Increase the number of time the buffer worker had to retry %% flushing the batch --spec inc_egress_batches_retry(shard_metrics_id()) -> ok. -inc_egress_batches_retry(Id) -> +-spec inc_buffer_batches_retry(shard_metrics_id()) -> ok. +inc_buffer_batches_retry(Id) -> catch emqx_metrics_worker:inc(?WORKER, Id, ?DS_BUFFER_BATCHES_RETRY). -%% @doc Increase the number of time the egress worker encountered an +%% @doc Increase the number of time the buffer worker encountered an %% unrecoverable error while trying to flush the batch --spec inc_egress_batches_failed(shard_metrics_id()) -> ok. -inc_egress_batches_failed(Id) -> +-spec inc_buffer_batches_failed(shard_metrics_id()) -> ok. +inc_buffer_batches_failed(Id) -> catch emqx_metrics_worker:inc(?WORKER, Id, ?DS_BUFFER_BATCHES_FAILED). %% @doc Increase the number of messages successfully saved to the shard --spec inc_egress_messages(shard_metrics_id(), non_neg_integer()) -> ok. -inc_egress_messages(Id, NMessages) -> +-spec inc_buffer_messages(shard_metrics_id(), non_neg_integer()) -> ok. +inc_buffer_messages(Id, NMessages) -> catch emqx_metrics_worker:inc(?WORKER, Id, ?DS_BUFFER_MESSAGES, NMessages). %% @doc Increase the number of messages successfully saved to the shard --spec inc_egress_bytes(shard_metrics_id(), non_neg_integer()) -> ok. -inc_egress_bytes(Id, NMessages) -> +-spec inc_buffer_bytes(shard_metrics_id(), non_neg_integer()) -> ok. +inc_buffer_bytes(Id, NMessages) -> catch emqx_metrics_worker:inc(?WORKER, Id, ?DS_BUFFER_BYTES, NMessages). -%% @doc Add a sample of elapsed time spent flushing the egress to the +%% @doc Add a sample of elapsed time spent flushing the buffer to the %% Raft log (in microseconds) --spec observe_egress_flush_time(shard_metrics_id(), non_neg_integer()) -> ok. -observe_egress_flush_time(Id, FlushTime) -> +-spec observe_buffer_flush_time(shard_metrics_id(), non_neg_integer()) -> ok. +observe_buffer_flush_time(Id, FlushTime) -> catch emqx_metrics_worker:observe(?WORKER, Id, ?DS_BUFFER_FLUSH_TIME, FlushTime). -spec observe_store_batch_time(emqx_ds_storage_layer:shard_id(), non_neg_integer()) -> ok. @@ -221,13 +221,13 @@ prometheus_per_db(NodeOrAggr, DB, Acc0) -> %% This function returns the data in the following format: %% ``` -%% #{emqx_ds_egress_batches => +%% #{emqx_ds_buffer_batches => %% [{[{db,messages},{shard,<<"1">>}],99408}, %% {[{db,messages},{shard,<<"0">>}],99409}], -%% emqx_ds_egress_batches_retry => +%% emqx_ds_buffer_batches_retry => %% [{[{db,messages},{shard,<<"1">>}],0}, %% {[{db,messages},{shard,<<"0">>}],0}], -%% emqx_ds_egress_messages => +%% emqx_ds_buffer_messages => %% ... %% } %% ''' From be6c5e172fdf9c892a2912c6463029f69fcb3a18 Mon Sep 17 00:00:00 2001 From: ieQu1 <99872536+ieQu1@users.noreply.github.com> Date: Fri, 14 Jun 2024 23:32:38 +0200 Subject: [PATCH 16/26] fix(ds): Disable DS management APIs for builtin_local backend --- apps/emqx_management/src/emqx_mgmt_api_ds.erl | 19 +++++++++---------- apps/emqx_management/src/emqx_mgmt_cli.erl | 5 +++++ 2 files changed, 14 insertions(+), 10 deletions(-) diff --git a/apps/emqx_management/src/emqx_mgmt_api_ds.erl b/apps/emqx_management/src/emqx_mgmt_api_ds.erl index 5d0bd8763..bc949cd8a 100644 --- a/apps/emqx_management/src/emqx_mgmt_api_ds.erl +++ b/apps/emqx_management/src/emqx_mgmt_api_ds.erl @@ -15,13 +15,9 @@ %%-------------------------------------------------------------------- -module(emqx_mgmt_api_ds). --behaviour(minirest_api). +-if(?EMQX_RELEASE_EDITION == ee). --include_lib("emqx/include/logger.hrl"). --include_lib("typerefl/include/types.hrl"). --include_lib("hocon/include/hoconsc.hrl"). --include_lib("emqx_utils/include/emqx_utils_api.hrl"). --include_lib("emqx/include/emqx_persistent_message.hrl"). +-behaviour(minirest_api). -import(hoconsc, [mk/2, ref/1, enum/1, array/1]). @@ -50,10 +46,11 @@ fields/1 ]). -%% internal exports: --export([]). - --export_type([]). +-include_lib("emqx/include/logger.hrl"). +-include_lib("typerefl/include/types.hrl"). +-include_lib("hocon/include/hoconsc.hrl"). +-include_lib("emqx_utils/include/emqx_utils_api.hrl"). +-include_lib("emqx/include/emqx_persistent_message.hrl"). %%================================================================================ %% Type declarations @@ -494,3 +491,5 @@ meta_result_to_binary({error, {member_of_replica_sets, DBNames}}) -> meta_result_to_binary({error, Err}) -> IOList = io_lib:format("Error: ~p", [Err]), {error, iolist_to_binary(IOList)}. + +-endif. diff --git a/apps/emqx_management/src/emqx_mgmt_cli.erl b/apps/emqx_management/src/emqx_mgmt_cli.erl index 7dc614c6d..8d327efe6 100644 --- a/apps/emqx_management/src/emqx_mgmt_cli.erl +++ b/apps/emqx_management/src/emqx_mgmt_cli.erl @@ -848,6 +848,7 @@ ds(CMD) -> emqx_ctl:usage([{"ds", "Durable storage is disabled"}]) end. +-if(?EMQX_RELEASE_EDITION == ee). do_ds(["info"]) -> emqx_ds_replication_layer_meta:print_status(); do_ds(["set_replicas", DBStr | SitesStr]) -> @@ -907,6 +908,10 @@ do_ds(_) -> {"ds leave ", "Remove site from the replica set of the storage"}, {"ds forget ", "Forcefully remove a site from the list of known sites"} ]). +-else. +do_ds(_CMD) -> + emqx_ctl:usage([{"ds", "DS CLI is not available in this edition of EMQX"}]). +-endif. %%-------------------------------------------------------------------- %% Dump ETS From bc915216a0ff802e4f37f4a07d511f5b083746a7 Mon Sep 17 00:00:00 2001 From: ieQu1 <99872536+ieQu1@users.noreply.github.com> Date: Fri, 14 Jun 2024 23:50:59 +0200 Subject: [PATCH 17/26] feat(ds): Support metrics for builtin_local backend --- .../test/emqx_persistent_messages_SUITE.erl | 15 +++++++++++- apps/emqx_durable_storage/src/emqx_ds.erl | 7 ++++++ .../src/emqx_ds_buffer.erl | 5 ++-- .../src/emqx_ds_builtin_metrics.erl | 24 +++++++++---------- apps/emqx_durable_storage/src/emqx_ds_sup.erl | 14 +++++++++++ 5 files changed, 50 insertions(+), 15 deletions(-) diff --git a/apps/emqx/test/emqx_persistent_messages_SUITE.erl b/apps/emqx/test/emqx_persistent_messages_SUITE.erl index 951d72c8d..f58b21fb7 100644 --- a/apps/emqx/test/emqx_persistent_messages_SUITE.erl +++ b/apps/emqx/test/emqx_persistent_messages_SUITE.erl @@ -33,7 +33,12 @@ all() -> emqx_common_test_helpers:all(?MODULE). init_per_suite(Config) -> - Config. + case is_standalone() of + true -> + {skip, standalone_not_supported}; + false -> + Config + end. end_per_suite(_Config) -> ok. @@ -590,3 +595,11 @@ on_message_dropped(Msg, Context, Res, TestPid) -> ErrCtx = #{msg => Msg, ctx => Context, res => Res}, ct:pal("this hook should not be called.\n ~p", [ErrCtx]), exit(TestPid, {hookpoint_called, ErrCtx}). + +is_standalone() -> + try + emqx_conf:module_info(), + false + catch + error:undef -> true + end. diff --git a/apps/emqx_durable_storage/src/emqx_ds.erl b/apps/emqx_durable_storage/src/emqx_ds.erl index c2f1e7eb3..594c4f30e 100644 --- a/apps/emqx_durable_storage/src/emqx_ds.erl +++ b/apps/emqx_durable_storage/src/emqx_ds.erl @@ -27,6 +27,7 @@ open_db/2, close_db/1, + which_dbs/0, update_db_config/2, add_generation/1, list_generations_with_lifetimes/1, @@ -264,13 +265,19 @@ open_db(DB, Opts = #{backend := Backend}) -> error({no_such_backend, Backend}); Module -> persistent_term:put(?persistent_term(DB), Module), + emqx_ds_sup:register_db(DB, Backend), ?module(DB):open_db(DB, Opts) end. -spec close_db(db()) -> ok. close_db(DB) -> + emqx_ds_sup:unregister_db(DB), ?module(DB):close_db(DB). +-spec which_dbs() -> [{db(), _Backend :: atom()}]. +which_dbs() -> + emqx_ds_sup:which_dbs(). + -spec add_generation(db()) -> ok. add_generation(DB) -> ?module(DB):add_generation(DB). diff --git a/apps/emqx_durable_storage/src/emqx_ds_buffer.erl b/apps/emqx_durable_storage/src/emqx_ds_buffer.erl index 3fcbec3b9..56e98eee3 100644 --- a/apps/emqx_durable_storage/src/emqx_ds_buffer.erl +++ b/apps/emqx_durable_storage/src/emqx_ds_buffer.erl @@ -39,7 +39,8 @@ %% Type declarations %%================================================================================ --define(via(DB, Shard), {via, gproc, {n, l, {?MODULE, DB, Shard}}}). +-define(name(DB, SHARD), {n, l, {?MODULE, DB, SHARD}}). +-define(via(DB, SHARD), {via, gproc, ?name(DB, SHARD)}). -define(flush, flush). -define(cbm(DB), {?MODULE, DB}). @@ -66,7 +67,7 @@ -spec ls() -> [{emqx_ds:db(), _Shard}]. ls() -> - MS = {{n, l, {?MODULE, '$1', '$2'}}, [], ['$1', '$2']}, + MS = {{?name('$1', '$2'), '_', '_'}, [], [{{'$1', '$2'}}]}, gproc:select({local, names}, [MS]). -spec start_link(module(), _CallbackOptions, emqx_ds:db(), _ShardId) -> diff --git a/apps/emqx_durable_storage/src/emqx_ds_builtin_metrics.erl b/apps/emqx_durable_storage/src/emqx_ds_builtin_metrics.erl index 994368df0..83c7c2d53 100644 --- a/apps/emqx_durable_storage/src/emqx_ds_builtin_metrics.erl +++ b/apps/emqx_durable_storage/src/emqx_ds_builtin_metrics.erl @@ -176,11 +176,14 @@ prometheus_collect(NodeOrAggr) -> prometheus_per_db(NodeOrAggr) -> lists:foldl( - fun(DB, Acc) -> - prometheus_per_db(NodeOrAggr, DB, Acc) + fun + ({DB, Backend}, Acc) when Backend =:= builtin_local; Backend =:= builtin_raft -> + prometheus_per_db(NodeOrAggr, DB, Acc); + ({_, _}, Acc) -> + Acc end, #{}, - emqx_ds_builtin_raft_db_sup:which_dbs() + emqx_ds:which_dbs() ). %% This function returns the data in the following format: @@ -235,18 +238,15 @@ prometheus_per_db(NodeOrAggr, DB, Acc0) -> %% If `NodeOrAggr' = `node' then node name is appended to the list of %% labels. prometheus_per_shard(NodeOrAggr) -> + prometheus_buffer_metrics(NodeOrAggr). + +prometheus_buffer_metrics(NodeOrAggr) -> lists:foldl( - fun(DB, Acc0) -> - lists:foldl( - fun(Shard, Acc) -> - prometheus_per_shard(NodeOrAggr, DB, Shard, Acc) - end, - Acc0, - emqx_ds_replication_layer_meta:shards(DB) - ) + fun({DB, Shard}, Acc) -> + prometheus_per_shard(NodeOrAggr, DB, Shard, Acc) end, #{}, - emqx_ds_builtin_raft_db_sup:which_dbs() + emqx_ds_buffer:ls() ). prometheus_per_shard(NodeOrAggr, DB, Shard, Acc0) -> diff --git a/apps/emqx_durable_storage/src/emqx_ds_sup.erl b/apps/emqx_durable_storage/src/emqx_ds_sup.erl index c4bd0e873..41631d6d6 100644 --- a/apps/emqx_durable_storage/src/emqx_ds_sup.erl +++ b/apps/emqx_durable_storage/src/emqx_ds_sup.erl @@ -19,6 +19,7 @@ %% API: -export([start_link/0, attach_backend/2]). +-export([register_db/2, unregister_db/1, which_dbs/0]). %% behaviour callbacks: -export([init/1]). @@ -28,6 +29,7 @@ %%================================================================================ -define(SUP, ?MODULE). +-define(TAB, ?MODULE). %%================================================================================ %% API functions @@ -58,11 +60,23 @@ attach_backend(Backend, Start) -> {error, Err} end. +register_db(DB, Backend) -> + ets:insert(?TAB, {DB, Backend}), + ok. + +unregister_db(DB) -> + ets:delete(?TAB, DB), + ok. + +which_dbs() -> + ets:tab2list(?TAB). + %%================================================================================ %% behaviour callbacks %%================================================================================ init(top) -> + _ = ets:new(?TAB, [public, set, named_table]), Children = [], SupFlags = #{ strategy => one_for_one, From 3851fc189fc2600b2e5efa1a9a46214cc849a9c4 Mon Sep 17 00:00:00 2001 From: ieQu1 <99872536+ieQu1@users.noreply.github.com> Date: Mon, 17 Jun 2024 10:21:28 +0200 Subject: [PATCH 18/26] fix(ds): Avoid reverse dependencies from storage to repl. layer --- .../emqx_ds_builtin_raft/src/emqx_ds_replication_layer.erl | 3 --- apps/emqx_durable_storage/src/emqx_ds.erl | 7 +------ apps/emqx_durable_storage/src/emqx_ds_builtin_metrics.erl | 2 +- apps/emqx_durable_storage/src/emqx_ds_storage_layer.erl | 2 +- 4 files changed, 3 insertions(+), 11 deletions(-) diff --git a/apps/emqx_ds_builtin_raft/src/emqx_ds_replication_layer.erl b/apps/emqx_ds_builtin_raft/src/emqx_ds_replication_layer.erl index 1c7e0c1c2..0a1173e70 100644 --- a/apps/emqx_ds_builtin_raft/src/emqx_ds_replication_layer.erl +++ b/apps/emqx_ds_builtin_raft/src/emqx_ds_replication_layer.erl @@ -73,7 +73,6 @@ delete_stream/0, iterator/0, delete_iterator/0, - message_id/0, batch/0 ]). @@ -133,8 +132,6 @@ ?enc := emqx_ds_storage_layer:delete_iterator() }. --type message_id() :: emqx_ds:message_id(). - %% TODO: this type is obsolete and is kept only for compatibility with %% BPAPIs. Remove it when emqx_ds_proto_v4 is gone (EMQX 5.6) -type batch() :: #{ diff --git a/apps/emqx_durable_storage/src/emqx_ds.erl b/apps/emqx_durable_storage/src/emqx_ds.erl index 594c4f30e..7f6996bd7 100644 --- a/apps/emqx_durable_storage/src/emqx_ds.erl +++ b/apps/emqx_durable_storage/src/emqx_ds.erl @@ -16,9 +16,7 @@ %% @doc Main interface module for `emqx_durable_storage' application. %% -%% It takes care of forwarding calls to the underlying DBMS. Currently -%% only the embedded `emqx_ds_replication_layer' storage is supported, -%% so all the calls are simply passed through. +%% It takes care of forwarding calls to the underlying DBMS. -module(emqx_ds). %% Management API: @@ -63,7 +61,6 @@ iterator/0, delete_iterator/0, iterator_id/0, - message_id/0, message_key/0, message_store_opts/0, next_result/1, next_result/0, @@ -178,8 +175,6 @@ -type create_db_opts() :: generic_db_opts(). --type message_id() :: emqx_ds_replication_layer:message_id(). - %% An opaque term identifying a generation. Each implementation will possibly add %% information to this term to match its inner structure (e.g.: by embedding the shard id, %% in the case of `emqx_ds_replication_layer'). diff --git a/apps/emqx_durable_storage/src/emqx_ds_builtin_metrics.erl b/apps/emqx_durable_storage/src/emqx_ds_builtin_metrics.erl index 83c7c2d53..d48cd0e34 100644 --- a/apps/emqx_durable_storage/src/emqx_ds_builtin_metrics.erl +++ b/apps/emqx_durable_storage/src/emqx_ds_builtin_metrics.erl @@ -96,7 +96,7 @@ child_spec() -> init_for_db(DB) -> emqx_metrics_worker:create_metrics(?WORKER, DB, ?DB_METRICS, []). --spec shard_metric_id(emqx_ds:db(), emqx_ds_replication_layer:shard_id()) -> shard_metrics_id(). +-spec shard_metric_id(emqx_ds:db(), binary()) -> shard_metrics_id(). shard_metric_id(DB, ShardId) -> iolist_to_binary([atom_to_list(DB), $/, ShardId]). diff --git a/apps/emqx_durable_storage/src/emqx_ds_storage_layer.erl b/apps/emqx_durable_storage/src/emqx_ds_storage_layer.erl index 331c9806b..818d0bcb7 100644 --- a/apps/emqx_durable_storage/src/emqx_ds_storage_layer.erl +++ b/apps/emqx_durable_storage/src/emqx_ds_storage_layer.erl @@ -106,7 +106,7 @@ {emqx_ds_storage_reference, emqx_ds_storage_reference:options()} | {emqx_ds_storage_bitfield_lts, emqx_ds_storage_bitfield_lts:options()}. --type shard_id() :: {emqx_ds:db(), emqx_ds_replication_layer:shard_id()}. +-type shard_id() :: {emqx_ds:db(), binary()}. -type cf_refs() :: [{string(), rocksdb:cf_handle()}]. From b2f7815a7f3eb5303113df22d895bee7e823dd50 Mon Sep 17 00:00:00 2001 From: ieQu1 <99872536+ieQu1@users.noreply.github.com> Date: Tue, 18 Jun 2024 00:12:31 +0200 Subject: [PATCH 19/26] test(ds): Don't start raft explicitly --- .../emqx_persistent_session_ds_SUITE.erl | 3 ++- apps/emqx/rebar.config | 1 - .../test/emqx_persistent_messages_SUITE.erl | 2 +- .../test/emqx_persistent_session_SUITE.erl | 3 ++- .../test/emqx_dashboard_monitor_SUITE.erl | 7 +++++ apps/emqx_ds_backends/rebar.config | 5 ---- apps/emqx_ds_backends/rebar.config.script | 26 +++++++++++++++++++ .../src/emqx_ds_backends.app.src | 11 -------- .../src/emqx_ds_backends.app.src.script | 26 +++++++++++++++++++ .../test/emqx_ds_backends_SUITE.erl | 5 +--- .../test/emqx_mgmt_api_clients_SUITE.erl | 1 + 11 files changed, 66 insertions(+), 24 deletions(-) delete mode 100644 apps/emqx_ds_backends/rebar.config create mode 100644 apps/emqx_ds_backends/rebar.config.script delete mode 100644 apps/emqx_ds_backends/src/emqx_ds_backends.app.src create mode 100644 apps/emqx_ds_backends/src/emqx_ds_backends.app.src.script diff --git a/apps/emqx/integration_test/emqx_persistent_session_ds_SUITE.erl b/apps/emqx/integration_test/emqx_persistent_session_ds_SUITE.erl index ae4aab097..63c89c595 100644 --- a/apps/emqx/integration_test/emqx_persistent_session_ds_SUITE.erl +++ b/apps/emqx/integration_test/emqx_persistent_session_ds_SUITE.erl @@ -60,7 +60,8 @@ init_per_testcase(t_session_gc = TestCase, Config) -> "\n heartbeat_interval = 500ms " "\n session_gc_interval = 1s " "\n session_gc_batch_size = 2 " - "\n }" + "\n }\n" + "durable_storage.messages.backend = builtin_local\n" }, Cluster = cluster(Opts), ClusterOpts = #{work_dir => emqx_cth_suite:work_dir(TestCase, Config)}, diff --git a/apps/emqx/rebar.config b/apps/emqx/rebar.config index 6ff0de648..df9f69f87 100644 --- a/apps/emqx/rebar.config +++ b/apps/emqx/rebar.config @@ -24,7 +24,6 @@ {deps, [ {emqx_utils, {path, "../emqx_utils"}}, {emqx_durable_storage, {path, "../emqx_durable_storage"}}, - {emqx_ds_builtin_local, {path, "../emqx_ds_builtin_local"}}, {emqx_ds_backends, {path, "../emqx_ds_backends"}}, {lc, {git, "https://github.com/emqx/lc.git", {tag, "0.3.2"}}}, {gproc, {git, "https://github.com/emqx/gproc", {tag, "0.9.0.1"}}}, diff --git a/apps/emqx/test/emqx_persistent_messages_SUITE.erl b/apps/emqx/test/emqx_persistent_messages_SUITE.erl index f58b21fb7..d17c08bce 100644 --- a/apps/emqx/test/emqx_persistent_messages_SUITE.erl +++ b/apps/emqx/test/emqx_persistent_messages_SUITE.erl @@ -470,7 +470,7 @@ t_metrics_not_dropped(_Config) -> t_replication_options(_Config) -> ?assertMatch( #{ - backend := builtin, + backend := builtin_raft, replication_options := #{ wal_max_size_bytes := 16000000, wal_max_batch_size := 1024, diff --git a/apps/emqx/test/emqx_persistent_session_SUITE.erl b/apps/emqx/test/emqx_persistent_session_SUITE.erl index e9d09b980..54a8e7d51 100644 --- a/apps/emqx/test/emqx_persistent_session_SUITE.erl +++ b/apps/emqx/test/emqx_persistent_session_SUITE.erl @@ -81,7 +81,8 @@ init_per_group(persistence_enabled, Config) -> " heartbeat_interval = 100ms\n" " renew_streams_interval = 100ms\n" " session_gc_interval = 2s\n" - "}"}, + "}\n" + "durable_storage.messages.backend = builtin_local"}, {persistence, ds} | Config ]; diff --git a/apps/emqx_dashboard/test/emqx_dashboard_monitor_SUITE.erl b/apps/emqx_dashboard/test/emqx_dashboard_monitor_SUITE.erl index c82367faf..2760576a3 100644 --- a/apps/emqx_dashboard/test/emqx_dashboard_monitor_SUITE.erl +++ b/apps/emqx_dashboard/test/emqx_dashboard_monitor_SUITE.erl @@ -56,11 +56,18 @@ %% CT boilerplate %%-------------------------------------------------------------------- +-if(?EMQX_RELEASE_EDITION == ee). all() -> [ {group, common}, {group, persistent_sessions} ]. +-else. +all() -> + [ + {group, common} + ]. +-endif. groups() -> AllTCs = emqx_common_test_helpers:all(?MODULE), diff --git a/apps/emqx_ds_backends/rebar.config b/apps/emqx_ds_backends/rebar.config deleted file mode 100644 index 7af4ea8e3..000000000 --- a/apps/emqx_ds_backends/rebar.config +++ /dev/null @@ -1,5 +0,0 @@ -%% -*- mode:erlang -*- -{deps, [ - {emqx_utils, {path, "../emqx_utils"}}, - {emqx_durable_storage, {path, "../emqx_durable_storage"}} -]}. diff --git a/apps/emqx_ds_backends/rebar.config.script b/apps/emqx_ds_backends/rebar.config.script new file mode 100644 index 000000000..b9e2bafb9 --- /dev/null +++ b/apps/emqx_ds_backends/rebar.config.script @@ -0,0 +1,26 @@ +%% -*- mode:erlang -*- +Profile = case os:getenv("PROFILE") of + "emqx-enterprise" ++ _ -> + ee; + false -> + io:format(user, "WARN: environment variable PROFILE is not set, using 'emqx-enterprise'~n", []), + ee; + _ -> + ce + end, +CEDeps = + [ + {emqx_utils, {path, "../emqx_utils"}}, + {emqx_durable_storage, {path, "../emqx_durable_storage"}}, + {emqx_ds_builtin_local, {path, "../emqx_ds_builtin_local"}} + ], +EEDeps = + [ + {emqx_ds_builtin_raft, {path, "../emqx_ds_builtin_raft"}} + ], +case Profile of + ee -> + {deps, CEDeps ++ EEDeps}; + ce -> + {deps, CEDeps} +end. diff --git a/apps/emqx_ds_backends/src/emqx_ds_backends.app.src b/apps/emqx_ds_backends/src/emqx_ds_backends.app.src deleted file mode 100644 index 5215124e4..000000000 --- a/apps/emqx_ds_backends/src/emqx_ds_backends.app.src +++ /dev/null @@ -1,11 +0,0 @@ -%% -*- mode: erlang -*- -{application, emqx_ds_backends, [ - {description, "A placeholder application that depends on all available DS backends"}, - % strict semver, bump manually! - {vsn, "0.1.0"}, - {modules, []}, - {registered, []}, - {applications, [kernel, stdlib, emqx_durable_storage, emqx_ds_builtin_local]}, - {optional_applications, [emqx_ds_builtin_raft]}, - {env, []} -]}. diff --git a/apps/emqx_ds_backends/src/emqx_ds_backends.app.src.script b/apps/emqx_ds_backends/src/emqx_ds_backends.app.src.script new file mode 100644 index 000000000..b3950edbc --- /dev/null +++ b/apps/emqx_ds_backends/src/emqx_ds_backends.app.src.script @@ -0,0 +1,26 @@ +%% -*- mode: erlang -*- +Profile = case os:getenv("PROFILE") of + "emqx-enterprise" ++ _ -> + ee; + false -> + io:format(user, "WARN: environment variable PROFILE is not set, using 'emqx-enterprise'~n", []), + ee; + _ -> + ce + end, + +{application, emqx_ds_backends, [ + {description, "A placeholder application that depends on all available DS backends"}, + % strict semver, bump manually! + {vsn, "0.1.0"}, + {modules, []}, + {registered, []}, + {applications, [kernel, stdlib, emqx_durable_storage, emqx_ds_builtin_local | + case Profile of + ee -> + [emqx_ds_builtin_raft]; + ce -> + [] + end]}, + {env, []} +]}. diff --git a/apps/emqx_ds_backends/test/emqx_ds_backends_SUITE.erl b/apps/emqx_ds_backends/test/emqx_ds_backends_SUITE.erl index ff5495a2d..6b27c307d 100644 --- a/apps/emqx_ds_backends/test/emqx_ds_backends_SUITE.erl +++ b/apps/emqx_ds_backends/test/emqx_ds_backends_SUITE.erl @@ -558,12 +558,9 @@ all() -> %% `optional_applications` application spec flag. Once we migrate to %% OTP27, this won't be needed, as application controller will %% automatically load raft backend when available: --define(MAYBE_RAFT, [emqx_ds_builtin_raft]). -else. all() -> [{group, builtin_local}]. - --define(MAYBE_RAFT, []). -endif. groups() -> @@ -602,7 +599,7 @@ end_per_suite(_Config) -> init_per_testcase(TC, Config) -> Apps = emqx_cth_suite:start( - [emqx_durable_storage, emqx_ds_backends | ?MAYBE_RAFT], + [emqx_durable_storage, emqx_ds_backends], #{work_dir => emqx_cth_suite:work_dir(TC, Config)} ), ct:pal("Apps: ~p", [Apps]), diff --git a/apps/emqx_management/test/emqx_mgmt_api_clients_SUITE.erl b/apps/emqx_management/test/emqx_mgmt_api_clients_SUITE.erl index 2c71e9822..37b769655 100644 --- a/apps/emqx_management/test/emqx_mgmt_api_clients_SUITE.erl +++ b/apps/emqx_management/test/emqx_mgmt_api_clients_SUITE.erl @@ -52,6 +52,7 @@ persistent_session_testcases() -> t_persistent_sessions_subscriptions1, t_list_clients_v2 ]. + client_msgs_testcases() -> [ t_inflight_messages, From 1d3b1868fb76d744ee15dee1b9977a30ea6ec84c Mon Sep 17 00:00:00 2001 From: ieQu1 <99872536+ieQu1@users.noreply.github.com> Date: Tue, 18 Jun 2024 01:52:02 +0200 Subject: [PATCH 20/26] test(ds): Use close_db API --- .../test/emqx_ds_storage_bitfield_lts_SUITE.erl | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/apps/emqx_durable_storage/test/emqx_ds_storage_bitfield_lts_SUITE.erl b/apps/emqx_durable_storage/test/emqx_ds_storage_bitfield_lts_SUITE.erl index 54033ae78..bd0f382b2 100644 --- a/apps/emqx_durable_storage/test/emqx_ds_storage_bitfield_lts_SUITE.erl +++ b/apps/emqx_durable_storage/test/emqx_ds_storage_bitfield_lts_SUITE.erl @@ -185,8 +185,7 @@ t_new_generation_inherit_trie(_Config) -> %% learned trie. ok = emqx_ds_storage_layer:add_generation(?SHARD, _Since = 1_000), %% Restart the shard, to verify that LTS is persisted. - ok = application:stop(emqx_durable_storage), - ok = application:start(emqx_durable_storage), + ok = emqx_ds:close_db(?FUNCTION_NAME), ok = emqx_ds:open_db(?FUNCTION_NAME, ?DEFAULT_CONFIG), %% Store a batch of messages with the same set of topics. TS2 = 1_500, From 8aa27488b6b97257789830c4ec05fc0236bc02d0 Mon Sep 17 00:00:00 2001 From: ieQu1 <99872536+ieQu1@users.noreply.github.com> Date: Tue, 18 Jun 2024 05:31:44 +0200 Subject: [PATCH 21/26] test: Disable certain DS-related suites in CE --- .../emqx_persistent_session_ds_SUITE.erl | 18 +++-- .../test/emqx_persistent_messages_SUITE.erl | 16 ++--- .../test/emqx_dashboard_monitor_SUITE.erl | 72 +++++++++---------- .../test/emqx_ds_backends_SUITE.erl | 33 ++++----- apps/emqx_ds_builtin_raft/rebar.config | 3 +- .../test/emqx_ds_test_helpers.erl | 11 +++ apps/emqx_management/test/emqx_mgmt_SUITE.erl | 37 +++++----- 7 files changed, 98 insertions(+), 92 deletions(-) diff --git a/apps/emqx/integration_test/emqx_persistent_session_ds_SUITE.erl b/apps/emqx/integration_test/emqx_persistent_session_ds_SUITE.erl index 63c89c595..920e2528f 100644 --- a/apps/emqx/integration_test/emqx_persistent_session_ds_SUITE.erl +++ b/apps/emqx/integration_test/emqx_persistent_session_ds_SUITE.erl @@ -25,11 +25,16 @@ all() -> emqx_common_test_helpers:all(?MODULE). init_per_suite(Config) -> - TCApps = emqx_cth_suite:start( - app_specs(), - #{work_dir => emqx_cth_suite:work_dir(Config)} - ), - [{tc_apps, TCApps} | Config]. + case emqx_ds_test_helpers:skip_if_norepl() of + false -> + TCApps = emqx_cth_suite:start( + app_specs(), + #{work_dir => emqx_cth_suite:work_dir(Config)} + ), + [{tc_apps, TCApps} | Config]; + Yes -> + Yes + end. end_per_suite(Config) -> TCApps = ?config(tc_apps, Config), @@ -60,8 +65,7 @@ init_per_testcase(t_session_gc = TestCase, Config) -> "\n heartbeat_interval = 500ms " "\n session_gc_interval = 1s " "\n session_gc_batch_size = 2 " - "\n }\n" - "durable_storage.messages.backend = builtin_local\n" + "\n }" }, Cluster = cluster(Opts), ClusterOpts = #{work_dir => emqx_cth_suite:work_dir(TestCase, Config)}, diff --git a/apps/emqx/test/emqx_persistent_messages_SUITE.erl b/apps/emqx/test/emqx_persistent_messages_SUITE.erl index d17c08bce..d9c0a61f3 100644 --- a/apps/emqx/test/emqx_persistent_messages_SUITE.erl +++ b/apps/emqx/test/emqx_persistent_messages_SUITE.erl @@ -33,11 +33,11 @@ all() -> emqx_common_test_helpers:all(?MODULE). init_per_suite(Config) -> - case is_standalone() of - true -> - {skip, standalone_not_supported}; + case emqx_ds_test_helpers:skip_if_norepl() of false -> - Config + Config; + Yes -> + Yes end. end_per_suite(_Config) -> @@ -595,11 +595,3 @@ on_message_dropped(Msg, Context, Res, TestPid) -> ErrCtx = #{msg => Msg, ctx => Context, res => Res}, ct:pal("this hook should not be called.\n ~p", [ErrCtx]), exit(TestPid, {hookpoint_called, ErrCtx}). - -is_standalone() -> - try - emqx_conf:module_info(), - false - catch - error:undef -> true - end. diff --git a/apps/emqx_dashboard/test/emqx_dashboard_monitor_SUITE.erl b/apps/emqx_dashboard/test/emqx_dashboard_monitor_SUITE.erl index 2760576a3..8f18aa685 100644 --- a/apps/emqx_dashboard/test/emqx_dashboard_monitor_SUITE.erl +++ b/apps/emqx_dashboard/test/emqx_dashboard_monitor_SUITE.erl @@ -56,18 +56,11 @@ %% CT boilerplate %%-------------------------------------------------------------------- --if(?EMQX_RELEASE_EDITION == ee). all() -> [ {group, common}, {group, persistent_sessions} ]. --else. -all() -> - [ - {group, common} - ]. --endif. groups() -> AllTCs = emqx_common_test_helpers:all(?MODULE), @@ -89,37 +82,42 @@ end_per_suite(_Config) -> ok. init_per_group(persistent_sessions = Group, Config) -> - AppSpecsFn = fun(Enable) -> - Port = - case Enable of - true -> "18083"; - false -> "0" + case emqx_ds_test_helpers:skip_if_norepl() of + false -> + AppSpecsFn = fun(Enable) -> + Port = + case Enable of + true -> "18083"; + false -> "0" + end, + [ + emqx_conf, + {emqx, "durable_sessions {enable = true}"}, + {emqx_retainer, ?BASE_RETAINER_CONF}, + emqx_management, + emqx_mgmt_api_test_util:emqx_dashboard( + lists:concat([ + "dashboard.listeners.http { bind = " ++ Port ++ " }\n", + "dashboard.sample_interval = 1s\n", + "dashboard.listeners.http.enable = " ++ atom_to_list(Enable) + ]) + ) + ] end, - [ - emqx_conf, - {emqx, "durable_sessions {enable = true}"}, - {emqx_retainer, ?BASE_RETAINER_CONF}, - emqx_management, - emqx_mgmt_api_test_util:emqx_dashboard( - lists:concat([ - "dashboard.listeners.http { bind = " ++ Port ++ " }\n", - "dashboard.sample_interval = 1s\n", - "dashboard.listeners.http.enable = " ++ atom_to_list(Enable) - ]) - ) - ] - end, - NodeSpecs = [ - {dashboard_monitor1, #{apps => AppSpecsFn(true)}}, - {dashboard_monitor2, #{apps => AppSpecsFn(false)}} - ], - Nodes = - [N1 | _] = emqx_cth_cluster:start( - NodeSpecs, - #{work_dir => emqx_cth_suite:work_dir(Group, Config)} - ), - ?ON(N1, {ok, _} = emqx_common_test_http:create_default_app()), - [{cluster, Nodes} | Config]; + NodeSpecs = [ + {dashboard_monitor1, #{apps => AppSpecsFn(true)}}, + {dashboard_monitor2, #{apps => AppSpecsFn(false)}} + ], + Nodes = + [N1 | _] = emqx_cth_cluster:start( + NodeSpecs, + #{work_dir => emqx_cth_suite:work_dir(Group, Config)} + ), + ?ON(N1, {ok, _} = emqx_common_test_http:create_default_app()), + [{cluster, Nodes} | Config]; + Yes -> + Yes + end; init_per_group(common = Group, Config) -> Apps = emqx_cth_suite:start( [ diff --git a/apps/emqx_ds_backends/test/emqx_ds_backends_SUITE.erl b/apps/emqx_ds_backends/test/emqx_ds_backends_SUITE.erl index 6b27c307d..c6d2db224 100644 --- a/apps/emqx_ds_backends/test/emqx_ds_backends_SUITE.erl +++ b/apps/emqx_ds_backends/test/emqx_ds_backends_SUITE.erl @@ -550,19 +550,9 @@ delete(DB, It0, Selector, BatchSize, Acc) -> %% CT callbacks --if(?EMQX_RELEASE_EDITION == ee). all() -> [{group, builtin_local}, {group, builtin_raft}]. -%% kernel-10 OTP application (OTP 27) introduces -%% `optional_applications` application spec flag. Once we migrate to -%% OTP27, this won't be needed, as application controller will -%% automatically load raft backend when available: --else. -all() -> - [{group, builtin_local}]. --endif. - groups() -> TCs = emqx_common_test_helpers:all(?MODULE), [ @@ -578,15 +568,20 @@ init_per_group(builtin_local, Config) -> }, [{ds_conf, Conf} | Config]; init_per_group(builtin_raft, Config) -> - Conf = #{ - backend => builtin_raft, - storage => {emqx_ds_storage_reference, #{}}, - n_shards => ?N_SHARDS, - n_sites => 1, - replication_factor => 3, - replication_options => #{} - }, - [{ds_conf, Conf} | Config]. + case emqx_ds_test_helpers:skip_if_norepl() of + false -> + Conf = #{ + backend => builtin_raft, + storage => {emqx_ds_storage_reference, #{}}, + n_shards => ?N_SHARDS, + n_sites => 1, + replication_factor => 3, + replication_options => #{} + }, + [{ds_conf, Conf} | Config]; + Yes -> + Yes + end. end_per_group(_Group, Config) -> Config. diff --git a/apps/emqx_ds_builtin_raft/rebar.config b/apps/emqx_ds_builtin_raft/rebar.config index d70aa75e0..2d2671571 100644 --- a/apps/emqx_ds_builtin_raft/rebar.config +++ b/apps/emqx_ds_builtin_raft/rebar.config @@ -1,5 +1,6 @@ %% -*- mode:erlang -*- {deps, [ - {emqx_durable_storage, {path, "../emqx_durable_storage"}} + {emqx_durable_storage, {path, "../emqx_durable_storage"}}, + {ra, "2.7.3"} ]}. diff --git a/apps/emqx_durable_storage/test/emqx_ds_test_helpers.erl b/apps/emqx_durable_storage/test/emqx_ds_test_helpers.erl index 7130041ec..ba9589e97 100644 --- a/apps/emqx_durable_storage/test/emqx_ds_test_helpers.erl +++ b/apps/emqx_durable_storage/test/emqx_ds_test_helpers.erl @@ -26,6 +26,17 @@ emqx_ds_test_helpers:on(NODE, fun() -> BODY end) ). +skip_if_norepl() -> + try emqx_release:edition() of + ee -> + false; + _ -> + {skip, no_ds_replication} + catch + error:undef -> + {skip, standalone_not_supported} + end. + -spec on([node()] | node(), fun(() -> A)) -> A | [A]. on(Node, Fun) when is_atom(Node) -> [Ret] = on([Node], Fun), diff --git a/apps/emqx_management/test/emqx_mgmt_SUITE.erl b/apps/emqx_management/test/emqx_mgmt_SUITE.erl index e5de64b5a..0cde87465 100644 --- a/apps/emqx_management/test/emqx_mgmt_SUITE.erl +++ b/apps/emqx_management/test/emqx_mgmt_SUITE.erl @@ -56,22 +56,27 @@ init_per_group(persistence_disabled, Config) -> | Config ]; init_per_group(persistence_enabled, Config) -> - Apps = emqx_cth_suite:start( - [ - {emqx, - "durable_sessions {\n" - " enable = true\n" - " heartbeat_interval = 100ms\n" - " renew_streams_interval = 100ms\n" - "}"}, - emqx_management - ], - #{work_dir => emqx_cth_suite:work_dir(Config)} - ), - [ - {apps, Apps} - | Config - ]; + case emqx_ds_test_helpers:skip_if_norepl() of + false -> + Apps = emqx_cth_suite:start( + [ + {emqx, + "durable_sessions {\n" + " enable = true\n" + " heartbeat_interval = 100ms\n" + " renew_streams_interval = 100ms\n" + "}"}, + emqx_management + ], + #{work_dir => emqx_cth_suite:work_dir(Config)} + ), + [ + {apps, Apps} + | Config + ]; + Yes -> + Yes + end; init_per_group(cm_registry_enabled, Config) -> [{emqx_config, "broker.enable_session_registry = true"} | Config]; init_per_group(cm_registry_disabled, Config) -> From d7d878fd43ca9330070ce58e3919c0b76936f534 Mon Sep 17 00:00:00 2001 From: ieQu1 <99872536+ieQu1@users.noreply.github.com> Date: Tue, 18 Jun 2024 17:48:19 +0200 Subject: [PATCH 22/26] test(ds): Fix emqx standalone test profile selection --- .../test/emqx_persistent_messages_SUITE.erl | 21 +++++++++++----- apps/emqx_ds_backends/rebar.config.script | 4 +-- .../test/emqx_mgmt_api_ds_SUITE.erl | 25 +++++++++++-------- 3 files changed, 32 insertions(+), 18 deletions(-) diff --git a/apps/emqx/test/emqx_persistent_messages_SUITE.erl b/apps/emqx/test/emqx_persistent_messages_SUITE.erl index d9c0a61f3..f225ba43d 100644 --- a/apps/emqx/test/emqx_persistent_messages_SUITE.erl +++ b/apps/emqx/test/emqx_persistent_messages_SUITE.erl @@ -32,13 +32,22 @@ all() -> emqx_common_test_helpers:all(?MODULE). +%% Needed for standalone mode: +-ifndef(EMQX_RELEASE_EDITION). +-define(EMQX_RELEASE_EDITION, ce). +-endif. + +-if(?EMQX_RELEASE_EDITION == ee). + init_per_suite(Config) -> - case emqx_ds_test_helpers:skip_if_norepl() of - false -> - Config; - Yes -> - Yes - end. + Config. + +-else. + +init_per_suite(Config) -> + {skip, no_replication}. + +-endif. end_per_suite(_Config) -> ok. diff --git a/apps/emqx_ds_backends/rebar.config.script b/apps/emqx_ds_backends/rebar.config.script index b9e2bafb9..6caf605f6 100644 --- a/apps/emqx_ds_backends/rebar.config.script +++ b/apps/emqx_ds_backends/rebar.config.script @@ -20,7 +20,7 @@ EEDeps = ], case Profile of ee -> - {deps, CEDeps ++ EEDeps}; + [{deps, CEDeps ++ EEDeps}]; ce -> - {deps, CEDeps} + [{deps, CEDeps}] end. diff --git a/apps/emqx_management/test/emqx_mgmt_api_ds_SUITE.erl b/apps/emqx_management/test/emqx_mgmt_api_ds_SUITE.erl index 881ce8e3f..f707ed1ac 100644 --- a/apps/emqx_management/test/emqx_mgmt_api_ds_SUITE.erl +++ b/apps/emqx_management/test/emqx_mgmt_api_ds_SUITE.erl @@ -27,16 +27,21 @@ all() -> emqx_common_test_helpers:all(?MODULE). init_per_suite(Config) -> - Apps = emqx_cth_suite:start( - [ - {emqx, "durable_sessions.enable = true"}, - emqx_management, - {emqx_dashboard, "dashboard.listeners.http { enable = true, bind = 18083 }"} - ], - #{work_dir => emqx_cth_suite:work_dir(Config)} - ), - {ok, _} = emqx_common_test_http:create_default_app(), - [{suite_apps, Apps} | Config]. + case emqx_ds_test_helpers:skip_if_norepl() of + false -> + Apps = emqx_cth_suite:start( + [ + {emqx, "durable_sessions.enable = true"}, + emqx_management, + {emqx_dashboard, "dashboard.listeners.http { enable = true, bind = 18083 }"} + ], + #{work_dir => emqx_cth_suite:work_dir(Config)} + ), + {ok, _} = emqx_common_test_http:create_default_app(), + [{suite_apps, Apps} | Config]; + Yes -> + Yes + end. end_per_suite(Config) -> ok = emqx_cth_suite:stop(?config(suite_apps, Config)). From 3d69ec496aca16da4e3a679934ffe2aa81885639 Mon Sep 17 00:00:00 2001 From: ieQu1 <99872536+ieQu1@users.noreply.github.com> Date: Tue, 18 Jun 2024 19:23:08 +0200 Subject: [PATCH 23/26] fix(schema): Transform config of `singleton' discovery_strategy --- apps/emqx_conf/src/emqx_conf_schema.erl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/apps/emqx_conf/src/emqx_conf_schema.erl b/apps/emqx_conf/src/emqx_conf_schema.erl index 5c5dd0d50..8481a8f79 100644 --- a/apps/emqx_conf/src/emqx_conf_schema.erl +++ b/apps/emqx_conf/src/emqx_conf_schema.erl @@ -1457,6 +1457,8 @@ cluster_options(k8s, Conf) -> {suffix, conf_get("cluster.k8s.suffix", Conf, "")} ]; cluster_options(manual, _Conf) -> + []; +cluster_options(singleton, _Conf) -> []. to_atom(Atom) when is_atom(Atom) -> From 9a58d713785da3fa8b6b7a748c90c3391e43c2fa Mon Sep 17 00:00:00 2001 From: ieQu1 <99872536+ieQu1@users.noreply.github.com> Date: Fri, 21 Jun 2024 16:49:49 +0200 Subject: [PATCH 24/26] fix(ds): Move DS backend supervision trees to their own apps --- .../src/emqx_ds_builtin_local.app.src | 2 +- .../src/emqx_ds_builtin_local_app.erl | 3 +-- .../src/emqx_ds_builtin_local_sup.erl | 16 ++++-------- .../test/emqx_ds_builtin_local_SUITE.erl | 4 +-- .../src/emqx_ds_builtin_raft_app.erl | 2 +- .../src/emqx_ds_builtin_raft_sup.erl | 20 ++++++--------- apps/emqx_durable_storage/src/emqx_ds_sup.erl | 25 ++----------------- changes/ce/breaking-13248.en.md | 2 +- 8 files changed, 20 insertions(+), 54 deletions(-) diff --git a/apps/emqx_ds_builtin_local/src/emqx_ds_builtin_local.app.src b/apps/emqx_ds_builtin_local/src/emqx_ds_builtin_local.app.src index e7531ae45..e8bcc1b48 100644 --- a/apps/emqx_ds_builtin_local/src/emqx_ds_builtin_local.app.src +++ b/apps/emqx_ds_builtin_local/src/emqx_ds_builtin_local.app.src @@ -5,7 +5,7 @@ {vsn, "0.1.0"}, {modules, []}, {registered, []}, - {applications, [kernel, stdlib, rocksdb, emqx_durable_storage, emqx_utils]}, + {applications, [kernel, stdlib, gproc, mria, rocksdb, emqx_durable_storage, emqx_utils]}, {mod, {emqx_ds_builtin_local_app, []}}, {env, []} ]}. diff --git a/apps/emqx_ds_builtin_local/src/emqx_ds_builtin_local_app.erl b/apps/emqx_ds_builtin_local/src/emqx_ds_builtin_local_app.erl index 1b64405d6..b09ef3fe4 100644 --- a/apps/emqx_ds_builtin_local/src/emqx_ds_builtin_local_app.erl +++ b/apps/emqx_ds_builtin_local/src/emqx_ds_builtin_local_app.erl @@ -27,8 +27,7 @@ start(_StartType, _StartArgs) -> emqx_ds:register_backend(builtin_local, emqx_ds_builtin_local), - %% TODO: fixme - {ok, self()}. + emqx_ds_builtin_local_sup:start_top(). %%================================================================================ %% Internal exports diff --git a/apps/emqx_ds_builtin_local/src/emqx_ds_builtin_local_sup.erl b/apps/emqx_ds_builtin_local/src/emqx_ds_builtin_local_sup.erl index 5994588ec..f6a9b1757 100644 --- a/apps/emqx_ds_builtin_local/src/emqx_ds_builtin_local_sup.erl +++ b/apps/emqx_ds_builtin_local/src/emqx_ds_builtin_local_sup.erl @@ -43,10 +43,13 @@ %% API functions %%================================================================================ +-spec start_top() -> {ok, pid()}. +start_top() -> + supervisor:start_link({local, ?top}, ?MODULE, ?top). + -spec start_db(emqx_ds:db(), emqx_ds_builtin_local:db_opts()) -> supervisor:startchild_ret(). start_db(DB, Opts) -> - ensure_top(), ChildSpec = #{ id => DB, start => {?databases, start_db, [DB, Opts]}, @@ -81,7 +84,6 @@ stop_db(DB) -> %% Chidren are attached dynamically to this one. init(?top) -> %% Children: - MetricsWorker = emqx_ds_builtin_metrics:child_spec(), MetadataServer = #{ id => metadata_server, start => {emqx_ds_builtin_local_meta, start_link, []}, @@ -103,7 +105,7 @@ init(?top) -> period => 1, auto_shutdown => never }, - {ok, {SupFlags, [MetricsWorker, MetadataServer, DBsSup]}}; + {ok, {SupFlags, [MetadataServer, DBsSup]}}; init(?databases) -> %% Children are added dynamically: SupFlags = #{ @@ -117,17 +119,9 @@ init(?databases) -> %% Internal exports %%================================================================================ --spec start_top() -> {ok, pid()}. -start_top() -> - supervisor:start_link({local, ?top}, ?MODULE, ?top). - start_databases_sup() -> supervisor:start_link({local, ?databases}, ?MODULE, ?databases). %%================================================================================ %% Internal functions %%================================================================================ - -ensure_top() -> - {ok, _} = emqx_ds_sup:attach_backend(builtin_local, {?MODULE, start_top, []}), - ok. diff --git a/apps/emqx_ds_builtin_local/test/emqx_ds_builtin_local_SUITE.erl b/apps/emqx_ds_builtin_local/test/emqx_ds_builtin_local_SUITE.erl index 67db21b4b..d8593ce40 100644 --- a/apps/emqx_ds_builtin_local/test/emqx_ds_builtin_local_SUITE.erl +++ b/apps/emqx_ds_builtin_local/test/emqx_ds_builtin_local_SUITE.erl @@ -335,12 +335,12 @@ end_per_suite(Config) -> ok. init_per_testcase(_TC, Config) -> - application:ensure_all_started(emqx_durable_storage), + application:ensure_all_started(emqx_ds_builtin_local), Config. end_per_testcase(_TC, _Config) -> snabbkaffe:stop(), - ok = application:stop(emqx_durable_storage), + ok = application:stop(emqx_ds_builtin_local), mria:stop(), _ = mnesia:delete_schema([node()]), ok. diff --git a/apps/emqx_ds_builtin_raft/src/emqx_ds_builtin_raft_app.erl b/apps/emqx_ds_builtin_raft/src/emqx_ds_builtin_raft_app.erl index 65e640ed5..2b1cae64d 100644 --- a/apps/emqx_ds_builtin_raft/src/emqx_ds_builtin_raft_app.erl +++ b/apps/emqx_ds_builtin_raft/src/emqx_ds_builtin_raft_app.erl @@ -8,4 +8,4 @@ start(_Type, _Args) -> emqx_ds:register_backend(builtin_raft, emqx_ds_replication_layer), - {ok, self()}. + emqx_ds_builtin_raft_sup:start_top(). diff --git a/apps/emqx_ds_builtin_raft/src/emqx_ds_builtin_raft_sup.erl b/apps/emqx_ds_builtin_raft/src/emqx_ds_builtin_raft_sup.erl index a88e7fc2e..70c9bbe16 100644 --- a/apps/emqx_ds_builtin_raft/src/emqx_ds_builtin_raft_sup.erl +++ b/apps/emqx_ds_builtin_raft/src/emqx_ds_builtin_raft_sup.erl @@ -10,14 +10,14 @@ -behaviour(supervisor). %% API: --export([start_db/2, stop_db/1]). +-export([start_top/0, start_db/2, stop_db/1]). -export([set_gvar/3, get_gvar/3, clean_gvars/1]). %% behavior callbacks: -export([init/1]). %% internal exports: --export([start_top/0, start_databases_sup/0]). +-export([start_databases_sup/0]). -export_type([]). @@ -38,10 +38,13 @@ %% API functions %%================================================================================ +-spec start_top() -> {ok, pid()}. +start_top() -> + supervisor:start_link({local, ?top}, ?MODULE, ?top). + -spec start_db(emqx_ds:db(), emqx_ds_replication_layer:builtin_db_opts()) -> supervisor:startchild_ret(). start_db(DB, Opts) -> - ensure_top(), ChildSpec = #{ id => DB, start => {emqx_ds_builtin_raft_db_sup, start_db, [DB, Opts]}, @@ -96,7 +99,6 @@ clean_gvars(DB) -> %% Chidren are attached dynamically to this one. init(?top) -> %% Children: - MetricsWorker = emqx_ds_builtin_metrics:child_spec(), MetadataServer = #{ id => metadata_server, start => {emqx_ds_replication_layer_meta, start_link, []}, @@ -119,7 +121,7 @@ init(?top) -> period => 1, auto_shutdown => never }, - {ok, {SupFlags, [MetricsWorker, MetadataServer, DBsSup]}}; + {ok, {SupFlags, [MetadataServer, DBsSup]}}; init(?databases) -> %% Children are added dynamically: SupFlags = #{ @@ -133,17 +135,9 @@ init(?databases) -> %% Internal exports %%================================================================================ --spec start_top() -> {ok, pid()}. -start_top() -> - supervisor:start_link({local, ?top}, ?MODULE, ?top). - start_databases_sup() -> supervisor:start_link({local, ?databases}, ?MODULE, ?databases). %%================================================================================ %% Internal functions %%================================================================================ - -ensure_top() -> - {ok, _} = emqx_ds_sup:attach_backend(builtin_raft, {?MODULE, start_top, []}), - ok. diff --git a/apps/emqx_durable_storage/src/emqx_ds_sup.erl b/apps/emqx_durable_storage/src/emqx_ds_sup.erl index 41631d6d6..0a8d3c2ba 100644 --- a/apps/emqx_durable_storage/src/emqx_ds_sup.erl +++ b/apps/emqx_durable_storage/src/emqx_ds_sup.erl @@ -18,7 +18,7 @@ -behaviour(supervisor). %% API: --export([start_link/0, attach_backend/2]). +-export([start_link/0]). -export([register_db/2, unregister_db/1, which_dbs/0]). %% behaviour callbacks: @@ -39,27 +39,6 @@ start_link() -> supervisor:start_link({local, ?SUP}, ?MODULE, top). -%% @doc Attach a child backend-specific supervisor to the top -%% application supervisor, if not yet present --spec attach_backend(_BackendId, {module(), atom(), list()}) -> - {ok, pid()} | {error, _}. -attach_backend(Backend, Start) -> - Spec = #{ - id => Backend, - start => Start, - significant => false, - shutdown => infinity, - type => supervisor - }, - case supervisor:start_child(?SUP, Spec) of - {ok, Pid} -> - {ok, Pid}; - {error, {already_started, Pid}} -> - {ok, Pid}; - {error, Err} -> - {error, Err} - end. - register_db(DB, Backend) -> ets:insert(?TAB, {DB, Backend}), ok. @@ -77,7 +56,7 @@ which_dbs() -> init(top) -> _ = ets:new(?TAB, [public, set, named_table]), - Children = [], + Children = [emqx_ds_builtin_metrics:child_spec()], SupFlags = #{ strategy => one_for_one, intensity => 10, diff --git a/changes/ce/breaking-13248.en.md b/changes/ce/breaking-13248.en.md index 731196e0b..a359cc960 100644 --- a/changes/ce/breaking-13248.en.md +++ b/changes/ce/breaking-13248.en.md @@ -4,7 +4,7 @@ It can't be used in a multi-node cluster. This backend is available in both open source and enterprise editions. - `builtin_raft`: A durable storage backend that uses Raft algorithm for replication. - This backend is available enterprise edition. + This backend is available only in the enterprise edition. The following Prometheus metrics have been renamed: From c0472a06f129fcb04cdcddc61825a2cb369bb8cf Mon Sep 17 00:00:00 2001 From: ieQu1 <99872536+ieQu1@users.noreply.github.com> Date: Fri, 21 Jun 2024 21:40:04 +0200 Subject: [PATCH 25/26] test(ds): Set initial conditions in repl. suite explicitly --- apps/emqx/src/emqx_ds_schema.erl | 2 +- .../test/emqx_ds_replication_SUITE.erl | 95 +++++++++++-------- .../test/emqx_ds_shared_sub_SUITE.erl | 2 +- 3 files changed, 60 insertions(+), 39 deletions(-) diff --git a/apps/emqx/src/emqx_ds_schema.erl b/apps/emqx/src/emqx_ds_schema.erl index 1c932c822..1cda81d1d 100644 --- a/apps/emqx/src/emqx_ds_schema.erl +++ b/apps/emqx/src/emqx_ds_schema.erl @@ -263,7 +263,7 @@ common_builtin_fields() -> sc( pos_integer(), #{ - default => 12, + default => 16, importance => ?IMPORTANCE_MEDIUM, desc => ?DESC(builtin_n_shards) } diff --git a/apps/emqx_ds_builtin_raft/test/emqx_ds_replication_SUITE.erl b/apps/emqx_ds_builtin_raft/test/emqx_ds_replication_SUITE.erl index 3bb2ba4c4..abe154807 100644 --- a/apps/emqx_ds_builtin_raft/test/emqx_ds_replication_SUITE.erl +++ b/apps/emqx_ds_builtin_raft/test/emqx_ds_replication_SUITE.erl @@ -57,12 +57,12 @@ appspec(emqx_durable_storage) -> }}. t_metadata(init, Config) -> - emqx_cth_suite:start([emqx_ds_builtin_raft], #{ + Apps = emqx_cth_suite:start([emqx_ds_builtin_raft], #{ work_dir => emqx_cth_suite:work_dir(?FUNCTION_NAME, Config) }), - Config; + [{apps, Apps} | Config]; t_metadata('end', Config) -> - emqx_cth_suite:stop([emqx_ds_builtin_raft]), + emqx_cth_suite:stop(?config(apps, Config)), Config. t_metadata(_Config) -> @@ -203,18 +203,23 @@ t_rebalance(Config) -> ?check_trace( #{timetrap => 30_000}, begin + Sites = [S1, S2 | _] = [ds_repl_meta(N, this_site) || N <- Nodes], %% 1. Initialize DB on the first node. Opts = opts(#{n_shards => 16, n_sites => 1, replication_factor => 3}), - ?assertEqual(ok, ?ON(N1, emqx_ds:open_db(?DB, Opts))), - ?assertMatch(Shards when length(Shards) == 16, shards_online(N1, ?DB)), - - %% 1.1 Open DB on the rest of the nodes: [ ?assertEqual(ok, ?ON(Node, emqx_ds:open_db(?DB, Opts))) || Node <- Nodes ], - Sites = [S1, S2 | _] = [ds_repl_meta(N, this_site) || N <- Nodes], + %% 1.1 Kick all sites except S1 from the replica set as + %% the initial condition: + ?assertMatch( + {ok, [_]}, + ?ON(N1, emqx_ds_replication_layer_meta:assign_db_sites(?DB, [S1])) + ), + ?retry(1000, 10, ?assertEqual([], emqx_ds_test_helpers:transitions(N1, ?DB))), + ?retry(500, 10, ?assertMatch(Shards when length(Shards) == 16, shards_online(N1, ?DB))), + ct:pal("Sites: ~p~n", [Sites]), Sequence = [ @@ -319,16 +324,15 @@ t_join_leave_errors('end', Config) -> t_join_leave_errors(Config) -> %% This testcase verifies that logical errors arising during handling of %% join/leave operations are reported correctly. - [N1, N2] = ?config(nodes, Config), Opts = opts(#{n_shards => 16, n_sites => 1, replication_factor => 3}), - ?assertEqual(ok, erpc:call(N1, emqx_ds, open_db, [?DB, Opts])), - ?assertEqual(ok, erpc:call(N2, emqx_ds, open_db, [?DB, Opts])), + ?assertEqual(ok, erpc:call(N1, emqx_ds, open_db, [?FUNCTION_NAME, Opts])), + ?assertEqual(ok, erpc:call(N2, emqx_ds, open_db, [?FUNCTION_NAME, Opts])), [S1, S2] = [ds_repl_meta(N, this_site) || N <- [N1, N2]], - ?assertEqual([S1], ds_repl_meta(N1, db_sites, [?DB])), + ?assertEqual(lists:sort([S1, S2]), lists:sort(ds_repl_meta(N1, db_sites, [?FUNCTION_NAME]))), %% Attempts to join a nonexistent DB / site. ?assertEqual( @@ -337,33 +341,40 @@ t_join_leave_errors(Config) -> ), ?assertEqual( {error, {nonexistent_sites, [<<"NO-MANS-SITE">>]}}, - ds_repl_meta(N1, join_db_site, [?DB, <<"NO-MANS-SITE">>]) + ds_repl_meta(N1, join_db_site, [?FUNCTION_NAME, <<"NO-MANS-SITE">>]) ), %% NOTE: Leaving a non-existent site is not an error. ?assertEqual( {ok, unchanged}, - ds_repl_meta(N1, leave_db_site, [?DB, <<"NO-MANS-SITE">>]) + ds_repl_meta(N1, leave_db_site, [?FUNCTION_NAME, <<"NO-MANS-SITE">>]) ), %% Should be no-op. - ?assertEqual({ok, unchanged}, ds_repl_meta(N1, join_db_site, [?DB, S1])), - ?assertEqual([], emqx_ds_test_helpers:transitions(N1, ?DB)), + ?assertEqual({ok, unchanged}, ds_repl_meta(N1, join_db_site, [?FUNCTION_NAME, S1])), + ?assertEqual([], emqx_ds_test_helpers:transitions(N1, ?FUNCTION_NAME)), - %% Impossible to leave the last site. + %% Leave S2: + ?assertEqual( + {ok, [S1]}, + ds_repl_meta(N1, leave_db_site, [?FUNCTION_NAME, S2]) + ), + %% Impossible to leave the last site: ?assertEqual( {error, {too_few_sites, []}}, - ds_repl_meta(N1, leave_db_site, [?DB, S1]) + ds_repl_meta(N1, leave_db_site, [?FUNCTION_NAME, S1]) ), %% "Move" the DB to the other node. - ?assertMatch({ok, _}, ds_repl_meta(N1, join_db_site, [?DB, S2])), - ?assertMatch({ok, _}, ds_repl_meta(N2, leave_db_site, [?DB, S1])), - ?assertMatch([_ | _], emqx_ds_test_helpers:transitions(N1, ?DB)), - ?retry(1000, 10, ?assertEqual([], emqx_ds_test_helpers:transitions(N1, ?DB))), + ?assertMatch({ok, _}, ds_repl_meta(N1, join_db_site, [?FUNCTION_NAME, S2])), + ?assertMatch({ok, _}, ds_repl_meta(N2, leave_db_site, [?FUNCTION_NAME, S1])), + ?assertMatch([_ | _], emqx_ds_test_helpers:transitions(N1, ?FUNCTION_NAME)), + ?retry( + 1000, 10, ?assertEqual([], emqx_ds_test_helpers:transitions(N1, ?FUNCTION_NAME)) + ), %% Should be no-op. - ?assertMatch({ok, _}, ds_repl_meta(N2, leave_db_site, [?DB, S1])), - ?assertEqual([], emqx_ds_test_helpers:transitions(N1, ?DB)). + ?assertMatch({ok, _}, ds_repl_meta(N2, leave_db_site, [?FUNCTION_NAME, S1])), + ?assertEqual([], emqx_ds_test_helpers:transitions(N1, ?FUNCTION_NAME)). t_rebalance_chaotic_converges(init, Config) -> Apps = [appspec(emqx_durable_storage), emqx_ds_builtin_raft], @@ -395,23 +406,24 @@ t_rebalance_chaotic_converges(Config) -> ?check_trace( #{}, begin + Sites = [S1, S2, S3] = [ds_repl_meta(N, this_site) || N <- Nodes], + ct:pal("Sites: ~p~n", [Sites]), + %% Initialize DB on first two nodes. Opts = opts(#{n_shards => 16, n_sites => 2, replication_factor => 3}), + %% Open DB: ?assertEqual( - [{ok, ok}, {ok, ok}], - erpc:multicall([N1, N2], emqx_ds, open_db, [?DB, Opts]) + [{ok, ok}, {ok, ok}, {ok, ok}], + erpc:multicall([N1, N2, N3], emqx_ds, open_db, [?DB, Opts]) ), - %% Open DB on the last node. - ?assertEqual( - ok, - erpc:call(N3, emqx_ds, open_db, [?DB, Opts]) + %% Kick N3 from the replica set as the initial condition: + ?assertMatch( + {ok, [_, _]}, + ?ON(N1, emqx_ds_replication_layer_meta:assign_db_sites(?DB, [S1, S2])) ), - - %% Find out which sites there are. - Sites = [S1, S2, S3] = [ds_repl_meta(N, this_site) || N <- Nodes], - ct:pal("Sites: ~p~n", [Sites]), + ?retry(1000, 10, ?assertEqual([], emqx_ds_test_helpers:transitions(N1, ?DB))), Sequence = [ {N1, join_db_site, S3}, @@ -600,12 +612,12 @@ t_drop_generation(Config) -> ). t_error_mapping_replication_layer(init, Config) -> - emqx_cth_suite:start([emqx_ds_builtin_raft], #{ + Apps = emqx_cth_suite:start([emqx_ds_builtin_raft], #{ work_dir => emqx_cth_suite:work_dir(?FUNCTION_NAME, Config) }), - Config; + [{apps, Apps} | Config]; t_error_mapping_replication_layer('end', Config) -> - emqx_cth_suite:stop([emqx_ds_builtin_raft]), + emqx_cth_suite:stop(?config(apps, Config)), Config. t_error_mapping_replication_layer(_Config) -> @@ -701,6 +713,15 @@ t_error_mapping_replication_layer(_Config) -> %% This testcase verifies the behavior of `store_batch' operation %% when the underlying code experiences recoverable or unrecoverable %% problems. +t_store_batch_fail(init, Config) -> + Apps = emqx_cth_suite:start([emqx_ds_builtin_raft], #{ + work_dir => emqx_cth_suite:work_dir(?FUNCTION_NAME, Config) + }), + [{apps, Apps} | Config]; +t_store_batch_fail('end', Config) -> + emqx_cth_suite:stop(?config(apps, Config)), + Config. + t_store_batch_fail(_Config) -> ?check_trace( #{timetrap => 15_000}, diff --git a/apps/emqx_ds_shared_sub/test/emqx_ds_shared_sub_SUITE.erl b/apps/emqx_ds_shared_sub/test/emqx_ds_shared_sub_SUITE.erl index bca8eb0eb..f18114918 100644 --- a/apps/emqx_ds_shared_sub/test/emqx_ds_shared_sub_SUITE.erl +++ b/apps/emqx_ds_shared_sub/test/emqx_ds_shared_sub_SUITE.erl @@ -26,7 +26,7 @@ init_per_suite(Config) -> }, <<"durable_storage">> => #{ <<"messages">> => #{ - <<"backend">> => <<"builtin">> + <<"backend">> => <<"builtin_raft">> } } } From d349f84f04ef197ebf91bc3122c82bec0848be3b Mon Sep 17 00:00:00 2001 From: ieQu1 <99872536+ieQu1@users.noreply.github.com> Date: Mon, 24 Jun 2024 14:37:14 +0200 Subject: [PATCH 26/26] fix(ds): Apply remarks --- .../test/emqx_ds_backends_SUITE.erl | 21 ++++++++++++------- .../src/emqx_ds_builtin_local.erl | 12 ++++++++++- .../src/emqx_ds_buffer.erl | 1 - 3 files changed, 25 insertions(+), 9 deletions(-) diff --git a/apps/emqx_ds_backends/test/emqx_ds_backends_SUITE.erl b/apps/emqx_ds_backends/test/emqx_ds_backends_SUITE.erl index c6d2db224..ab1e0feb0 100644 --- a/apps/emqx_ds_backends/test/emqx_ds_backends_SUITE.erl +++ b/apps/emqx_ds_backends/test/emqx_ds_backends_SUITE.erl @@ -135,16 +135,23 @@ t_05_update_iterator(Config) -> t_06_smoke_add_generation(Config) -> DB = ?FUNCTION_NAME, + BeginTime = os:system_time(millisecond), + ?assertMatch(ok, emqx_ds:open_db(DB, opts(Config))), - ?assertMatch( - [{_, _}], - maps:to_list(emqx_ds:list_generations_with_lifetimes(DB)) + [{Gen1, #{created_at := Created1, since := Since1, until := undefined}}] = maps:to_list( + emqx_ds:list_generations_with_lifetimes(DB) ), + ?assertMatch(ok, emqx_ds:add_generation(DB)), - ?assertMatch( - [{_, _}, {_, _}], - maps:to_list(emqx_ds:list_generations_with_lifetimes(DB)) - ). + [ + {Gen1, #{created_at := Created1, since := Since1, until := Until1}}, + {Gen2, #{created_at := Created2, since := Since2, until := undefined}} + ] = maps:to_list(emqx_ds:list_generations_with_lifetimes(DB)), + %% Check units of the return values (+/- 10s from test begin time): + ?give_or_take(BeginTime, 10_000, Created1), + ?give_or_take(BeginTime, 10_000, Created2), + ?give_or_take(BeginTime, 10_000, Since2), + ?give_or_take(BeginTime, 10_000, Until1). t_07_smoke_update_config(Config) -> DB = ?FUNCTION_NAME, diff --git a/apps/emqx_ds_builtin_local/src/emqx_ds_builtin_local.erl b/apps/emqx_ds_builtin_local/src/emqx_ds_builtin_local.erl index d7e5972ab..5fe6eb559 100644 --- a/apps/emqx_ds_builtin_local/src/emqx_ds_builtin_local.erl +++ b/apps/emqx_ds_builtin_local/src/emqx_ds_builtin_local.erl @@ -155,7 +155,12 @@ list_generations_with_lifetimes(DB) -> lists:foldl( fun(Shard, Acc) -> maps:fold( - fun(GenId, Data, Acc1) -> + fun(GenId, Data0, Acc1) -> + Data = maps:update_with( + until, + fun timeus_to_timestamp/1, + maps:update_with(since, fun timeus_to_timestamp/1, Data0) + ), Acc1#{{Shard, GenId} => Data} end, Acc, @@ -370,3 +375,8 @@ current_timestamp(ShardId) -> timestamp_to_timeus(TimestampMs) -> TimestampMs * 1000. + +timeus_to_timestamp(undefined) -> + undefined; +timeus_to_timestamp(TimestampUs) -> + TimestampUs div 1000. diff --git a/apps/emqx_durable_storage/src/emqx_ds_buffer.erl b/apps/emqx_durable_storage/src/emqx_ds_buffer.erl index 56e98eee3..e93bb33be 100644 --- a/apps/emqx_durable_storage/src/emqx_ds_buffer.erl +++ b/apps/emqx_durable_storage/src/emqx_ds_buffer.erl @@ -287,7 +287,6 @@ do_flush( lists:foreach(fun(From) -> gen_server:reply(From, ok) end, Replies), erlang:garbage_collect(), S#s{ - callback_state = CallbackS, n = 0, n_bytes = 0, queue = queue:new(),