fix(dsstore): persist inherited LTS trie
Before this commit, inherited trie was actually only kept in memory cache. Also simplify storage backend behaviour around inheriting previous generation's legacy.
This commit is contained in:
parent
3143475769
commit
68f6556856
|
@ -20,6 +20,7 @@
|
||||||
-export([
|
-export([
|
||||||
trie_create/1, trie_create/0,
|
trie_create/1, trie_create/0,
|
||||||
destroy/1,
|
destroy/1,
|
||||||
|
trie_dump/2,
|
||||||
trie_restore/2,
|
trie_restore/2,
|
||||||
trie_update/2,
|
trie_update/2,
|
||||||
trie_copy_learned_paths/2,
|
trie_copy_learned_paths/2,
|
||||||
|
@ -76,6 +77,8 @@
|
||||||
static_key_size => pos_integer()
|
static_key_size => pos_integer()
|
||||||
}.
|
}.
|
||||||
|
|
||||||
|
-type dump() :: [{_Key, _Val}].
|
||||||
|
|
||||||
-record(trie, {
|
-record(trie, {
|
||||||
persist :: persist_callback(),
|
persist :: persist_callback(),
|
||||||
static_key_size :: pos_integer(),
|
static_key_size :: pos_integer(),
|
||||||
|
@ -125,12 +128,12 @@ destroy(#trie{trie = Trie, stats = Stats}) ->
|
||||||
ok.
|
ok.
|
||||||
|
|
||||||
%% @doc Restore trie from a dump
|
%% @doc Restore trie from a dump
|
||||||
-spec trie_restore(options(), [{_Key, _Val}]) -> trie().
|
-spec trie_restore(options(), dump()) -> trie().
|
||||||
trie_restore(Options, Dump) ->
|
trie_restore(Options, Dump) ->
|
||||||
trie_update(trie_create(Options), Dump).
|
trie_update(trie_create(Options), Dump).
|
||||||
|
|
||||||
%% @doc Update a trie with a dump of operations (used for replication)
|
%% @doc Update a trie with a dump of operations (used for replication)
|
||||||
-spec trie_update(trie(), [{_Key, _Val}]) -> trie().
|
-spec trie_update(trie(), dump()) -> trie().
|
||||||
trie_update(Trie, Dump) ->
|
trie_update(Trie, Dump) ->
|
||||||
lists:foreach(
|
lists:foreach(
|
||||||
fun({{StateFrom, Token}, StateTo}) ->
|
fun({{StateFrom, Token}, StateTo}) ->
|
||||||
|
@ -140,14 +143,23 @@ trie_update(Trie, Dump) ->
|
||||||
),
|
),
|
||||||
Trie.
|
Trie.
|
||||||
|
|
||||||
|
-spec trie_dump(trie(), _Filter :: all | wildcard) -> dump().
|
||||||
|
trie_dump(Trie, Filter) ->
|
||||||
|
case Filter of
|
||||||
|
all ->
|
||||||
|
Fun = fun(_) -> true end;
|
||||||
|
wildcard ->
|
||||||
|
Fun = fun contains_wildcard/1
|
||||||
|
end,
|
||||||
|
lists:append([P || P <- paths(Trie), Fun(P)]).
|
||||||
|
|
||||||
-spec trie_copy_learned_paths(trie(), trie()) -> trie().
|
-spec trie_copy_learned_paths(trie(), trie()) -> trie().
|
||||||
trie_copy_learned_paths(OldTrie, NewTrie) ->
|
trie_copy_learned_paths(OldTrie, NewTrie) ->
|
||||||
WildcardPaths = [P || P <- paths(OldTrie), contains_wildcard(P)],
|
|
||||||
lists:foreach(
|
lists:foreach(
|
||||||
fun({{StateFrom, Token}, StateTo}) ->
|
fun({{StateFrom, Token}, StateTo}) ->
|
||||||
trie_insert(NewTrie, StateFrom, Token, StateTo)
|
trie_insert(NewTrie, StateFrom, Token, StateTo)
|
||||||
end,
|
end,
|
||||||
lists:flatten(WildcardPaths)
|
trie_dump(OldTrie, wildcard)
|
||||||
),
|
),
|
||||||
NewTrie.
|
NewTrie.
|
||||||
|
|
||||||
|
|
|
@ -25,7 +25,7 @@
|
||||||
|
|
||||||
%% behavior callbacks:
|
%% behavior callbacks:
|
||||||
-export([
|
-export([
|
||||||
create/4,
|
create/5,
|
||||||
open/5,
|
open/5,
|
||||||
drop/5,
|
drop/5,
|
||||||
prepare_batch/4,
|
prepare_batch/4,
|
||||||
|
@ -37,7 +37,6 @@
|
||||||
update_iterator/4,
|
update_iterator/4,
|
||||||
next/6,
|
next/6,
|
||||||
delete_next/6,
|
delete_next/6,
|
||||||
post_creation_actions/1,
|
|
||||||
|
|
||||||
handle_event/4
|
handle_event/4
|
||||||
]).
|
]).
|
||||||
|
@ -179,10 +178,11 @@
|
||||||
emqx_ds_storage_layer:shard_id(),
|
emqx_ds_storage_layer:shard_id(),
|
||||||
rocksdb:db_handle(),
|
rocksdb:db_handle(),
|
||||||
emqx_ds_storage_layer:gen_id(),
|
emqx_ds_storage_layer:gen_id(),
|
||||||
options()
|
options(),
|
||||||
|
_PrevGeneration :: s() | undefined
|
||||||
) ->
|
) ->
|
||||||
{schema(), emqx_ds_storage_layer:cf_refs()}.
|
{schema(), emqx_ds_storage_layer:cf_refs()}.
|
||||||
create(_ShardId, DBHandle, GenId, Options) ->
|
create(_ShardId, DBHandle, GenId, Options, SPrev) ->
|
||||||
%% Get options:
|
%% Get options:
|
||||||
BitsPerTopicLevel = maps:get(bits_per_wildcard_level, Options, 64),
|
BitsPerTopicLevel = maps:get(bits_per_wildcard_level, Options, 64),
|
||||||
TopicIndexBytes = maps:get(topic_index_bytes, Options, 4),
|
TopicIndexBytes = maps:get(topic_index_bytes, Options, 4),
|
||||||
|
@ -193,6 +193,14 @@ create(_ShardId, DBHandle, GenId, Options) ->
|
||||||
TrieCFName = trie_cf(GenId),
|
TrieCFName = trie_cf(GenId),
|
||||||
{ok, DataCFHandle} = rocksdb:create_column_family(DBHandle, DataCFName, []),
|
{ok, DataCFHandle} = rocksdb:create_column_family(DBHandle, DataCFName, []),
|
||||||
{ok, TrieCFHandle} = rocksdb:create_column_family(DBHandle, TrieCFName, []),
|
{ok, TrieCFHandle} = rocksdb:create_column_family(DBHandle, TrieCFName, []),
|
||||||
|
case SPrev of
|
||||||
|
#s{trie = TriePrev} ->
|
||||||
|
ok = copy_previous_trie(DBHandle, TrieCFHandle, TriePrev),
|
||||||
|
?tp(bitfield_lts_inherited_trie, #{}),
|
||||||
|
ok;
|
||||||
|
undefined ->
|
||||||
|
ok
|
||||||
|
end,
|
||||||
%% Create schema:
|
%% Create schema:
|
||||||
Schema = #{
|
Schema = #{
|
||||||
bits_per_wildcard_level => BitsPerTopicLevel,
|
bits_per_wildcard_level => BitsPerTopicLevel,
|
||||||
|
@ -241,20 +249,6 @@ open(_Shard, DBHandle, GenId, CFRefs, Schema) ->
|
||||||
gvars = ets:new(?MODULE, [public, set, {read_concurrency, true}])
|
gvars = ets:new(?MODULE, [public, set, {read_concurrency, true}])
|
||||||
}.
|
}.
|
||||||
|
|
||||||
-spec post_creation_actions(emqx_ds_storage_layer:post_creation_context()) ->
|
|
||||||
s().
|
|
||||||
post_creation_actions(
|
|
||||||
#{
|
|
||||||
new_gen_runtime_data := NewGenData,
|
|
||||||
old_gen_runtime_data := OldGenData
|
|
||||||
}
|
|
||||||
) ->
|
|
||||||
#s{trie = OldTrie} = OldGenData,
|
|
||||||
#s{trie = NewTrie0} = NewGenData,
|
|
||||||
NewTrie = copy_previous_trie(OldTrie, NewTrie0),
|
|
||||||
?tp(bitfield_lts_inherited_trie, #{}),
|
|
||||||
NewGenData#s{trie = NewTrie}.
|
|
||||||
|
|
||||||
-spec drop(
|
-spec drop(
|
||||||
emqx_ds_storage_layer:shard_id(),
|
emqx_ds_storage_layer:shard_id(),
|
||||||
rocksdb:db_handle(),
|
rocksdb:db_handle(),
|
||||||
|
@ -905,9 +899,19 @@ restore_trie(TopicIndexBytes, DB, CF) ->
|
||||||
rocksdb:iterator_close(IT)
|
rocksdb:iterator_close(IT)
|
||||||
end.
|
end.
|
||||||
|
|
||||||
-spec copy_previous_trie(emqx_ds_lts:trie(), emqx_ds_lts:trie()) -> emqx_ds_lts:trie().
|
-spec copy_previous_trie(rocksdb:db_handle(), rocksdb:cf_handle(), emqx_ds_lts:trie()) ->
|
||||||
copy_previous_trie(OldTrie, NewTrie) ->
|
ok.
|
||||||
emqx_ds_lts:trie_copy_learned_paths(OldTrie, NewTrie).
|
copy_previous_trie(DB, TrieCF, TriePrev) ->
|
||||||
|
{ok, Batch} = rocksdb:batch(),
|
||||||
|
lists:foreach(
|
||||||
|
fun({Key, Val}) ->
|
||||||
|
ok = rocksdb:batch_put(Batch, TrieCF, term_to_binary(Key), term_to_binary(Val))
|
||||||
|
end,
|
||||||
|
emqx_ds_lts:trie_dump(TriePrev, wildcard)
|
||||||
|
),
|
||||||
|
Result = rocksdb:write_batch(DB, Batch, []),
|
||||||
|
rocksdb:release_batch(Batch),
|
||||||
|
Result.
|
||||||
|
|
||||||
read_persisted_trie(IT, {ok, KeyB, ValB}) ->
|
read_persisted_trie(IT, {ok, KeyB, ValB}) ->
|
||||||
[
|
[
|
||||||
|
|
|
@ -69,7 +69,6 @@
|
||||||
shard_id/0,
|
shard_id/0,
|
||||||
options/0,
|
options/0,
|
||||||
prototype/0,
|
prototype/0,
|
||||||
post_creation_context/0,
|
|
||||||
cooked_batch/0
|
cooked_batch/0
|
||||||
]).
|
]).
|
||||||
|
|
||||||
|
@ -194,25 +193,19 @@
|
||||||
|
|
||||||
-type options() :: map().
|
-type options() :: map().
|
||||||
|
|
||||||
-type post_creation_context() ::
|
|
||||||
#{
|
|
||||||
shard_id := emqx_ds_storage_layer:shard_id(),
|
|
||||||
db := rocksdb:db_handle(),
|
|
||||||
new_gen_id := emqx_ds_storage_layer:gen_id(),
|
|
||||||
old_gen_id := emqx_ds_storage_layer:gen_id(),
|
|
||||||
new_cf_refs := cf_refs(),
|
|
||||||
old_cf_refs := cf_refs(),
|
|
||||||
new_gen_runtime_data := _NewData,
|
|
||||||
old_gen_runtime_data := _OldData
|
|
||||||
}.
|
|
||||||
|
|
||||||
%%================================================================================
|
%%================================================================================
|
||||||
%% Generation callbacks
|
%% Generation callbacks
|
||||||
%%================================================================================
|
%%================================================================================
|
||||||
|
|
||||||
%% Create the new schema given generation id and the options.
|
%% Create the new schema given generation id and the options.
|
||||||
%% Create rocksdb column families.
|
%% Create rocksdb column families.
|
||||||
-callback create(shard_id(), rocksdb:db_handle(), gen_id(), Options :: map()) ->
|
-callback create(
|
||||||
|
shard_id(),
|
||||||
|
rocksdb:db_handle(),
|
||||||
|
gen_id(),
|
||||||
|
Options :: map(),
|
||||||
|
PrevRuntimeData :: term()
|
||||||
|
) ->
|
||||||
{_Schema, cf_refs()}.
|
{_Schema, cf_refs()}.
|
||||||
|
|
||||||
%% Open the existing schema
|
%% Open the existing schema
|
||||||
|
@ -258,9 +251,7 @@
|
||||||
|
|
||||||
-callback handle_event(shard_id(), _Data, emqx_ds:time(), CustomEvent | tick) -> [CustomEvent].
|
-callback handle_event(shard_id(), _Data, emqx_ds:time(), CustomEvent | tick) -> [CustomEvent].
|
||||||
|
|
||||||
-callback post_creation_actions(post_creation_context()) -> _Data.
|
-optional_callbacks([handle_event/4]).
|
||||||
|
|
||||||
-optional_callbacks([post_creation_actions/1, handle_event/4]).
|
|
||||||
|
|
||||||
%%================================================================================
|
%%================================================================================
|
||||||
%% API for the replication layer
|
%% API for the replication layer
|
||||||
|
@ -686,42 +677,14 @@ open_shard(ShardId, DB, CFRefs, ShardSchema) ->
|
||||||
server_state() | {error, overlaps_existing_generations}.
|
server_state() | {error, overlaps_existing_generations}.
|
||||||
handle_add_generation(S0, Since) ->
|
handle_add_generation(S0, Since) ->
|
||||||
#s{shard_id = ShardId, db = DB, schema = Schema0, shard = Shard0, cf_refs = CFRefs0} = S0,
|
#s{shard_id = ShardId, db = DB, schema = Schema0, shard = Shard0, cf_refs = CFRefs0} = S0,
|
||||||
|
|
||||||
#{current_generation := OldGenId, prototype := {CurrentMod, _ModConf}} = Schema0,
|
|
||||||
OldKey = ?GEN_KEY(OldGenId),
|
|
||||||
#{OldKey := OldGenSchema} = Schema0,
|
|
||||||
#{cf_refs := OldCFRefs} = OldGenSchema,
|
|
||||||
#{OldKey := #{module := OldMod, data := OldGenData}} = Shard0,
|
|
||||||
|
|
||||||
Schema1 = update_last_until(Schema0, Since),
|
Schema1 = update_last_until(Schema0, Since),
|
||||||
Shard1 = update_last_until(Shard0, Since),
|
Shard1 = update_last_until(Shard0, Since),
|
||||||
|
|
||||||
case Schema1 of
|
case Schema1 of
|
||||||
_Updated = #{} ->
|
_Updated = #{} ->
|
||||||
{GenId, Schema, NewCFRefs} = new_generation(ShardId, DB, Schema1, Since),
|
{GenId, Schema, NewCFRefs} = new_generation(ShardId, DB, Schema1, Shard0, Since),
|
||||||
CFRefs = NewCFRefs ++ CFRefs0,
|
CFRefs = NewCFRefs ++ CFRefs0,
|
||||||
Key = ?GEN_KEY(GenId),
|
Key = ?GEN_KEY(GenId),
|
||||||
Generation0 =
|
Generation = open_generation(ShardId, DB, CFRefs, GenId, maps:get(Key, Schema)),
|
||||||
#{data := NewGenData0} =
|
|
||||||
open_generation(ShardId, DB, CFRefs, GenId, maps:get(Key, Schema)),
|
|
||||||
%% When the new generation's module is the same as the last one, we might want to
|
|
||||||
%% perform actions like inheriting some of the previous (meta)data.
|
|
||||||
NewGenData =
|
|
||||||
run_post_creation_actions(
|
|
||||||
#{
|
|
||||||
shard_id => ShardId,
|
|
||||||
db => DB,
|
|
||||||
new_gen_id => GenId,
|
|
||||||
old_gen_id => OldGenId,
|
|
||||||
new_cf_refs => NewCFRefs,
|
|
||||||
old_cf_refs => OldCFRefs,
|
|
||||||
new_gen_runtime_data => NewGenData0,
|
|
||||||
old_gen_runtime_data => OldGenData,
|
|
||||||
new_module => CurrentMod,
|
|
||||||
old_module => OldMod
|
|
||||||
}
|
|
||||||
),
|
|
||||||
Generation = Generation0#{data := NewGenData},
|
|
||||||
Shard = Shard1#{current_generation := GenId, Key => Generation},
|
Shard = Shard1#{current_generation := GenId, Key => Generation},
|
||||||
S0#s{
|
S0#s{
|
||||||
cf_refs = CFRefs,
|
cf_refs = CFRefs,
|
||||||
|
@ -834,9 +797,28 @@ create_new_shard_schema(ShardId, DB, CFRefs, Prototype) ->
|
||||||
-spec new_generation(shard_id(), rocksdb:db_handle(), shard_schema(), emqx_ds:time()) ->
|
-spec new_generation(shard_id(), rocksdb:db_handle(), shard_schema(), emqx_ds:time()) ->
|
||||||
{gen_id(), shard_schema(), cf_refs()}.
|
{gen_id(), shard_schema(), cf_refs()}.
|
||||||
new_generation(ShardId, DB, Schema0, Since) ->
|
new_generation(ShardId, DB, Schema0, Since) ->
|
||||||
|
new_generation(ShardId, DB, Schema0, undefined, Since).
|
||||||
|
|
||||||
|
-spec new_generation(
|
||||||
|
shard_id(),
|
||||||
|
rocksdb:db_handle(),
|
||||||
|
shard_schema(),
|
||||||
|
shard() | undefined,
|
||||||
|
emqx_ds:time()
|
||||||
|
) ->
|
||||||
|
{gen_id(), shard_schema(), cf_refs()}.
|
||||||
|
new_generation(ShardId, DB, Schema0, Shard0, Since) ->
|
||||||
#{current_generation := PrevGenId, prototype := {Mod, ModConf}} = Schema0,
|
#{current_generation := PrevGenId, prototype := {Mod, ModConf}} = Schema0,
|
||||||
|
case Shard0 of
|
||||||
|
#{?GEN_KEY(PrevGenId) := #{module := Mod} = PrevGen} ->
|
||||||
|
%% When the new generation's module is the same as the last one, we might want
|
||||||
|
%% to perform actions like inheriting some of the previous (meta)data.
|
||||||
|
PrevRuntimeData = maps:get(data, PrevGen);
|
||||||
|
_ ->
|
||||||
|
PrevRuntimeData = undefined
|
||||||
|
end,
|
||||||
GenId = next_generation_id(PrevGenId),
|
GenId = next_generation_id(PrevGenId),
|
||||||
{GenData, NewCFRefs} = Mod:create(ShardId, DB, GenId, ModConf),
|
{GenData, NewCFRefs} = Mod:create(ShardId, DB, GenId, ModConf, PrevRuntimeData),
|
||||||
GenSchema = #{
|
GenSchema = #{
|
||||||
module => Mod,
|
module => Mod,
|
||||||
data => GenData,
|
data => GenData,
|
||||||
|
@ -918,23 +900,6 @@ update_last_until(Schema = #{current_generation := GenId}, Until) ->
|
||||||
{error, overlaps_existing_generations}
|
{error, overlaps_existing_generations}
|
||||||
end.
|
end.
|
||||||
|
|
||||||
run_post_creation_actions(
|
|
||||||
#{
|
|
||||||
new_module := Mod,
|
|
||||||
old_module := Mod,
|
|
||||||
new_gen_runtime_data := NewGenData
|
|
||||||
} = Context
|
|
||||||
) ->
|
|
||||||
case erlang:function_exported(Mod, post_creation_actions, 1) of
|
|
||||||
true ->
|
|
||||||
Mod:post_creation_actions(Context);
|
|
||||||
false ->
|
|
||||||
NewGenData
|
|
||||||
end;
|
|
||||||
run_post_creation_actions(#{new_gen_runtime_data := NewGenData}) ->
|
|
||||||
%% Different implementation modules
|
|
||||||
NewGenData.
|
|
||||||
|
|
||||||
handle_take_snapshot(#s{db = DB, shard_id = ShardId}) ->
|
handle_take_snapshot(#s{db = DB, shard_id = ShardId}) ->
|
||||||
Name = integer_to_list(erlang:system_time(millisecond)),
|
Name = integer_to_list(erlang:system_time(millisecond)),
|
||||||
Dir = checkpoint_dir(ShardId, Name),
|
Dir = checkpoint_dir(ShardId, Name),
|
||||||
|
|
|
@ -28,7 +28,7 @@
|
||||||
|
|
||||||
%% behavior callbacks:
|
%% behavior callbacks:
|
||||||
-export([
|
-export([
|
||||||
create/4,
|
create/5,
|
||||||
open/5,
|
open/5,
|
||||||
drop/5,
|
drop/5,
|
||||||
prepare_batch/4,
|
prepare_batch/4,
|
||||||
|
@ -88,7 +88,7 @@
|
||||||
%% behavior callbacks
|
%% behavior callbacks
|
||||||
%%================================================================================
|
%%================================================================================
|
||||||
|
|
||||||
create(_ShardId, DBHandle, GenId, _Options) ->
|
create(_ShardId, DBHandle, GenId, _Options, _SPrev) ->
|
||||||
CFName = data_cf(GenId),
|
CFName = data_cf(GenId),
|
||||||
{ok, CFHandle} = rocksdb:create_column_family(DBHandle, CFName, []),
|
{ok, CFHandle} = rocksdb:create_column_family(DBHandle, CFName, []),
|
||||||
Schema = #schema{},
|
Schema = #schema{},
|
||||||
|
|
|
@ -2,7 +2,7 @@
|
||||||
{application, emqx_durable_storage, [
|
{application, emqx_durable_storage, [
|
||||||
{description, "Message persistence and subscription replays for EMQX"},
|
{description, "Message persistence and subscription replays for EMQX"},
|
||||||
% strict semver, bump manually!
|
% strict semver, bump manually!
|
||||||
{vsn, "0.2.0"},
|
{vsn, "0.2.1"},
|
||||||
{modules, []},
|
{modules, []},
|
||||||
{registered, []},
|
{registered, []},
|
||||||
{applications, [kernel, stdlib, rocksdb, gproc, mria, ra, emqx_utils]},
|
{applications, [kernel, stdlib, rocksdb, gproc, mria, ra, emqx_utils]},
|
||||||
|
|
Loading…
Reference in New Issue