feat(dsraft): support atomic batches + preconditions
This commit is contained in:
parent
11951f8f6c
commit
5356d678cc
|
@ -83,6 +83,7 @@
|
||||||
ra_state/0
|
ra_state/0
|
||||||
]).
|
]).
|
||||||
|
|
||||||
|
-include_lib("emqx_durable_storage/include/emqx_ds.hrl").
|
||||||
-include_lib("emqx_utils/include/emqx_message.hrl").
|
-include_lib("emqx_utils/include/emqx_message.hrl").
|
||||||
-include_lib("snabbkaffe/include/trace.hrl").
|
-include_lib("snabbkaffe/include/trace.hrl").
|
||||||
-include("emqx_ds_replication_layer.hrl").
|
-include("emqx_ds_replication_layer.hrl").
|
||||||
|
@ -135,11 +136,12 @@
|
||||||
?enc := emqx_ds_storage_layer:delete_iterator()
|
?enc := emqx_ds_storage_layer:delete_iterator()
|
||||||
}.
|
}.
|
||||||
|
|
||||||
%% TODO: this type is obsolete and is kept only for compatibility with
|
%% Write batch.
|
||||||
%% BPAPIs. Remove it when emqx_ds_proto_v4 is gone (EMQX 5.6)
|
%% Instances of this type currently form the mojority of the Raft log.
|
||||||
-type batch() :: #{
|
-type batch() :: #{
|
||||||
?tag := ?BATCH,
|
?tag := ?BATCH,
|
||||||
?batch_messages := [emqx_types:message()]
|
?batch_operations := [emqx_ds:operation()],
|
||||||
|
?batch_preconditions => [emqx_ds:precondition()]
|
||||||
}.
|
}.
|
||||||
|
|
||||||
-type generation_rank() :: {shard_id(), term()}.
|
-type generation_rank() :: {shard_id(), term()}.
|
||||||
|
@ -240,16 +242,45 @@ drop_db(DB) ->
|
||||||
_ = emqx_ds_proto_v4:drop_db(list_nodes(), DB),
|
_ = emqx_ds_proto_v4:drop_db(list_nodes(), DB),
|
||||||
emqx_ds_replication_layer_meta:drop_db(DB).
|
emqx_ds_replication_layer_meta:drop_db(DB).
|
||||||
|
|
||||||
-spec store_batch(emqx_ds:db(), [emqx_types:message(), ...], emqx_ds:message_store_opts()) ->
|
-spec store_batch(emqx_ds:db(), emqx_ds:batch(), emqx_ds:message_store_opts()) ->
|
||||||
emqx_ds:store_batch_result().
|
emqx_ds:store_batch_result().
|
||||||
store_batch(DB, Messages, Opts) ->
|
store_batch(DB, Batch = #dsbatch{preconditions = [_ | _]}, Opts) ->
|
||||||
|
%% NOTE: Atomic batch is implied, will not check with DB config.
|
||||||
|
store_batch_atomic(DB, Batch, Opts);
|
||||||
|
store_batch(DB, Batch, Opts) ->
|
||||||
|
case emqx_ds_replication_layer_meta:db_config(DB) of
|
||||||
|
#{atomic_batches := true} ->
|
||||||
|
store_batch_atomic(DB, Batch, Opts);
|
||||||
|
#{} ->
|
||||||
|
store_batch_buffered(DB, Batch, Opts)
|
||||||
|
end.
|
||||||
|
|
||||||
|
store_batch_buffered(DB, #dsbatch{operations = Operations}, Opts) ->
|
||||||
|
store_batch_buffered(DB, Operations, Opts);
|
||||||
|
store_batch_buffered(DB, Batch, Opts) ->
|
||||||
try
|
try
|
||||||
emqx_ds_buffer:store_batch(DB, Messages, Opts)
|
emqx_ds_buffer:store_batch(DB, Batch, Opts)
|
||||||
catch
|
catch
|
||||||
error:{Reason, _Call} when Reason == timeout; Reason == noproc ->
|
error:{Reason, _Call} when Reason == timeout; Reason == noproc ->
|
||||||
{error, recoverable, Reason}
|
{error, recoverable, Reason}
|
||||||
end.
|
end.
|
||||||
|
|
||||||
|
store_batch_atomic(DB, Batch, _Opts) ->
|
||||||
|
Shards = shards_of_batch(DB, Batch),
|
||||||
|
case Shards of
|
||||||
|
[Shard] ->
|
||||||
|
case ra_store_batch(DB, Shard, Batch) of
|
||||||
|
{timeout, ServerId} ->
|
||||||
|
{error, recoverable, {timeout, ServerId}};
|
||||||
|
Result ->
|
||||||
|
Result
|
||||||
|
end;
|
||||||
|
[] ->
|
||||||
|
ok;
|
||||||
|
[_ | _] ->
|
||||||
|
{error, unrecoverable, nonatomic_batch_spans_multiple_storages}
|
||||||
|
end.
|
||||||
|
|
||||||
-spec get_streams(emqx_ds:db(), emqx_ds:topic_filter(), emqx_ds:time()) ->
|
-spec get_streams(emqx_ds:db(), emqx_ds:topic_filter(), emqx_ds:time()) ->
|
||||||
[{emqx_ds:stream_rank(), stream()}].
|
[{emqx_ds:stream_rank(), stream()}].
|
||||||
get_streams(DB, TopicFilter, StartTime) ->
|
get_streams(DB, TopicFilter, StartTime) ->
|
||||||
|
@ -418,6 +449,23 @@ shard_of_key(DB, Key) ->
|
||||||
Hash = erlang:phash2(Key, N),
|
Hash = erlang:phash2(Key, N),
|
||||||
integer_to_binary(Hash).
|
integer_to_binary(Hash).
|
||||||
|
|
||||||
|
shards_of_batch(DB, #dsbatch{operations = Operations, preconditions = Preconditions}) ->
|
||||||
|
shards_of_batch(DB, Preconditions, shards_of_batch(DB, Operations, []));
|
||||||
|
shards_of_batch(DB, Operations) ->
|
||||||
|
shards_of_batch(DB, Operations, []).
|
||||||
|
|
||||||
|
shards_of_batch(DB, [Operation | Rest], Acc) ->
|
||||||
|
case shard_of_operation(DB, Operation, clientid, #{}) of
|
||||||
|
Shard when Shard =:= hd(Acc) ->
|
||||||
|
shards_of_batch(DB, Rest, Acc);
|
||||||
|
Shard when Acc =:= [] ->
|
||||||
|
shards_of_batch(DB, Rest, [Shard]);
|
||||||
|
ShardAnother ->
|
||||||
|
[ShardAnother | Acc]
|
||||||
|
end;
|
||||||
|
shards_of_batch(_DB, [], Acc) ->
|
||||||
|
Acc.
|
||||||
|
|
||||||
%%================================================================================
|
%%================================================================================
|
||||||
%% Internal exports (RPC targets)
|
%% Internal exports (RPC targets)
|
||||||
%%================================================================================
|
%%================================================================================
|
||||||
|
@ -639,13 +687,22 @@ list_nodes() ->
|
||||||
end
|
end
|
||||||
).
|
).
|
||||||
|
|
||||||
-spec ra_store_batch(emqx_ds:db(), emqx_ds_replication_layer:shard_id(), [emqx_types:message()]) ->
|
-spec ra_store_batch(emqx_ds:db(), emqx_ds_replication_layer:shard_id(), emqx_ds:batch()) ->
|
||||||
ok | {timeout, _} | {error, recoverable | unrecoverable, _Err}.
|
ok | {timeout, _} | emqx_ds:error(_).
|
||||||
ra_store_batch(DB, Shard, Messages) ->
|
ra_store_batch(DB, Shard, Batch) ->
|
||||||
Command = #{
|
case Batch of
|
||||||
?tag => ?BATCH,
|
#dsbatch{operations = Operations, preconditions = Preconditions} ->
|
||||||
?batch_messages => Messages
|
Command = #{
|
||||||
},
|
?tag => ?BATCH,
|
||||||
|
?batch_operations => Operations,
|
||||||
|
?batch_preconditions => Preconditions
|
||||||
|
};
|
||||||
|
Operations ->
|
||||||
|
Command = #{
|
||||||
|
?tag => ?BATCH,
|
||||||
|
?batch_operations => Operations
|
||||||
|
}
|
||||||
|
end,
|
||||||
Servers = emqx_ds_replication_layer_shard:servers(DB, Shard, leader_preferred),
|
Servers = emqx_ds_replication_layer_shard:servers(DB, Shard, leader_preferred),
|
||||||
case emqx_ds_replication_layer_shard:process_command(Servers, Command, ?RA_TIMEOUT) of
|
case emqx_ds_replication_layer_shard:process_command(Servers, Command, ?RA_TIMEOUT) of
|
||||||
{ok, Result, _Leader} ->
|
{ok, Result, _Leader} ->
|
||||||
|
@ -782,6 +839,7 @@ ra_drop_shard(DB, Shard) ->
|
||||||
|
|
||||||
-define(pd_ra_idx_need_release, '$emqx_ds_raft_idx_need_release').
|
-define(pd_ra_idx_need_release, '$emqx_ds_raft_idx_need_release').
|
||||||
-define(pd_ra_bytes_need_release, '$emqx_ds_raft_bytes_need_release').
|
-define(pd_ra_bytes_need_release, '$emqx_ds_raft_bytes_need_release').
|
||||||
|
-define(pd_ra_force_monotonic, '$emqx_ds_raft_force_monotonic').
|
||||||
|
|
||||||
-spec init(_Args :: map()) -> ra_state().
|
-spec init(_Args :: map()) -> ra_state().
|
||||||
init(#{db := DB, shard := Shard}) ->
|
init(#{db := DB, shard := Shard}) ->
|
||||||
|
@ -791,18 +849,30 @@ init(#{db := DB, shard := Shard}) ->
|
||||||
{ra_state(), _Reply, _Effects}.
|
{ra_state(), _Reply, _Effects}.
|
||||||
apply(
|
apply(
|
||||||
RaftMeta,
|
RaftMeta,
|
||||||
#{
|
Command = #{
|
||||||
?tag := ?BATCH,
|
?tag := ?BATCH,
|
||||||
?batch_messages := MessagesIn
|
?batch_operations := OperationsIn
|
||||||
},
|
},
|
||||||
#{db_shard := DBShard = {DB, Shard}, latest := Latest0} = State0
|
#{db_shard := DBShard = {DB, Shard}, latest := Latest0} = State0
|
||||||
) ->
|
) ->
|
||||||
?tp(ds_ra_apply_batch, #{db => DB, shard => Shard, batch => MessagesIn, latest => Latest0}),
|
?tp(ds_ra_apply_batch, #{db => DB, shard => Shard, batch => OperationsIn, latest => Latest0}),
|
||||||
{Stats, Latest, Messages} = assign_timestamps(Latest0, MessagesIn),
|
Preconditions = maps:get(?batch_preconditions, Command, []),
|
||||||
Result = emqx_ds_storage_layer:store_batch(DBShard, Messages, #{durable => false}),
|
{Stats, Latest, Operations} = assign_timestamps(DB, Latest0, OperationsIn),
|
||||||
State = State0#{latest := Latest},
|
%% FIXME
|
||||||
set_ts(DBShard, Latest),
|
case emqx_ds_precondition:verify(emqx_ds_storage_layer, DBShard, Preconditions) of
|
||||||
Effects = try_release_log(Stats, RaftMeta, State),
|
ok ->
|
||||||
|
Result = emqx_ds_storage_layer:store_batch(DBShard, Operations, #{durable => false}),
|
||||||
|
State = State0#{latest := Latest},
|
||||||
|
set_ts(DBShard, Latest),
|
||||||
|
Effects = try_release_log(Stats, RaftMeta, State);
|
||||||
|
PreconditionFailed = {precondition_failed, _} ->
|
||||||
|
Result = {error, unrecoverable, PreconditionFailed},
|
||||||
|
State = State0,
|
||||||
|
Effects = [];
|
||||||
|
Result ->
|
||||||
|
State = State0,
|
||||||
|
Effects = []
|
||||||
|
end,
|
||||||
Effects =/= [] andalso ?tp(ds_ra_effects, #{effects => Effects, meta => RaftMeta}),
|
Effects =/= [] andalso ?tp(ds_ra_effects, #{effects => Effects, meta => RaftMeta}),
|
||||||
{State, Result, Effects};
|
{State, Result, Effects};
|
||||||
apply(
|
apply(
|
||||||
|
@ -877,6 +947,21 @@ apply(
|
||||||
Effects = handle_custom_event(DBShard, Latest, CustomEvent),
|
Effects = handle_custom_event(DBShard, Latest, CustomEvent),
|
||||||
{State#{latest => Latest}, ok, Effects}.
|
{State#{latest => Latest}, ok, Effects}.
|
||||||
|
|
||||||
|
assign_timestamps(DB, Latest, Messages) ->
|
||||||
|
ForceMonotonic = force_monotonic_timestamps(DB),
|
||||||
|
assign_timestamps(ForceMonotonic, Latest, Messages, [], 0, 0).
|
||||||
|
|
||||||
|
force_monotonic_timestamps(DB) ->
|
||||||
|
case erlang:get(?pd_ra_force_monotonic) of
|
||||||
|
undefined ->
|
||||||
|
DBConfig = emqx_ds_replication_layer_meta:db_config(DB),
|
||||||
|
Flag = maps:get(force_monotonic_timestamps, DBConfig),
|
||||||
|
erlang:put(?pd_ra_force_monotonic, Flag);
|
||||||
|
Flag ->
|
||||||
|
ok
|
||||||
|
end,
|
||||||
|
Flag.
|
||||||
|
|
||||||
try_release_log({_N, BatchSize}, RaftMeta = #{index := CurrentIdx}, State) ->
|
try_release_log({_N, BatchSize}, RaftMeta = #{index := CurrentIdx}, State) ->
|
||||||
%% NOTE
|
%% NOTE
|
||||||
%% Because cursor release means storage flush (see
|
%% Because cursor release means storage flush (see
|
||||||
|
@ -939,10 +1024,7 @@ tick(TimeMs, #{db_shard := DBShard = {DB, Shard}, latest := Latest}) ->
|
||||||
?tp(emqx_ds_replication_layer_tick, #{db => DB, shard => Shard, timestamp => Timestamp}),
|
?tp(emqx_ds_replication_layer_tick, #{db => DB, shard => Shard, timestamp => Timestamp}),
|
||||||
handle_custom_event(DBShard, Timestamp, tick).
|
handle_custom_event(DBShard, Timestamp, tick).
|
||||||
|
|
||||||
assign_timestamps(Latest, Messages) ->
|
assign_timestamps(true, Latest0, [Message0 = #message{} | Rest], Acc, N, Sz) ->
|
||||||
assign_timestamps(Latest, Messages, [], 0, 0).
|
|
||||||
|
|
||||||
assign_timestamps(Latest0, [Message0 | Rest], Acc, N, Sz) ->
|
|
||||||
case emqx_message:timestamp(Message0, microsecond) of
|
case emqx_message:timestamp(Message0, microsecond) of
|
||||||
TimestampUs when TimestampUs > Latest0 ->
|
TimestampUs when TimestampUs > Latest0 ->
|
||||||
Latest = TimestampUs,
|
Latest = TimestampUs,
|
||||||
|
@ -951,8 +1033,17 @@ assign_timestamps(Latest0, [Message0 | Rest], Acc, N, Sz) ->
|
||||||
Latest = Latest0 + 1,
|
Latest = Latest0 + 1,
|
||||||
Message = assign_timestamp(Latest, Message0)
|
Message = assign_timestamp(Latest, Message0)
|
||||||
end,
|
end,
|
||||||
assign_timestamps(Latest, Rest, [Message | Acc], N + 1, Sz + approx_message_size(Message0));
|
MSize = approx_message_size(Message0),
|
||||||
assign_timestamps(Latest, [], Acc, N, Size) ->
|
assign_timestamps(true, Latest, Rest, [Message | Acc], N + 1, Sz + MSize);
|
||||||
|
assign_timestamps(false, Latest0, [Message0 = #message{} | Rest], Acc, N, Sz) ->
|
||||||
|
Timestamp = emqx_message:timestamp(Message0),
|
||||||
|
Latest = max(Latest0, Timestamp),
|
||||||
|
Message = assign_timestamp(Timestamp, Message0),
|
||||||
|
MSize = approx_message_size(Message0),
|
||||||
|
assign_timestamps(false, Latest, Rest, [Message | Acc], N + 1, Sz + MSize);
|
||||||
|
assign_timestamps(ForceMonotonic, Latest, [Operation | Rest], Acc, N, Sz) ->
|
||||||
|
assign_timestamps(ForceMonotonic, Latest, Rest, [Operation | Acc], N + 1, Sz);
|
||||||
|
assign_timestamps(_ForceMonotonic, Latest, [], Acc, N, Size) ->
|
||||||
{{N, Size}, Latest, lists:reverse(Acc)}.
|
{{N, Size}, Latest, lists:reverse(Acc)}.
|
||||||
|
|
||||||
assign_timestamp(TimestampUs, Message) ->
|
assign_timestamp(TimestampUs, Message) ->
|
||||||
|
|
|
@ -19,7 +19,8 @@
|
||||||
-define(enc, 3).
|
-define(enc, 3).
|
||||||
|
|
||||||
%% ?BATCH
|
%% ?BATCH
|
||||||
-define(batch_messages, 2).
|
-define(batch_operations, 2).
|
||||||
|
-define(batch_preconditions, 4).
|
||||||
-define(timestamp, 3).
|
-define(timestamp, 3).
|
||||||
|
|
||||||
%% add_generation / update_config
|
%% add_generation / update_config
|
||||||
|
|
|
@ -56,6 +56,7 @@
|
||||||
topic/0,
|
topic/0,
|
||||||
batch/0,
|
batch/0,
|
||||||
operation/0,
|
operation/0,
|
||||||
|
deletion/0,
|
||||||
precondition/0,
|
precondition/0,
|
||||||
stream/0,
|
stream/0,
|
||||||
delete_stream/0,
|
delete_stream/0,
|
||||||
|
@ -110,7 +111,9 @@
|
||||||
message()
|
message()
|
||||||
%% Delete a message.
|
%% Delete a message.
|
||||||
%% Does nothing if the message does not exist.
|
%% Does nothing if the message does not exist.
|
||||||
| {delete, message_matcher('_')}.
|
| deletion().
|
||||||
|
|
||||||
|
-type deletion() :: {delete, message_matcher('_')}.
|
||||||
|
|
||||||
%% Precondition.
|
%% Precondition.
|
||||||
%% Fails whole batch if the storage already has the matching message (`if_exists'),
|
%% Fails whole batch if the storage already has the matching message (`if_exists'),
|
||||||
|
|
|
@ -37,6 +37,9 @@
|
||||||
next/4,
|
next/4,
|
||||||
delete_next/5,
|
delete_next/5,
|
||||||
|
|
||||||
|
%% Preconditions
|
||||||
|
lookup_message/2,
|
||||||
|
|
||||||
%% Generations
|
%% Generations
|
||||||
update_config/3,
|
update_config/3,
|
||||||
add_generation/2,
|
add_generation/2,
|
||||||
|
@ -74,6 +77,7 @@
|
||||||
batch_store_opts/0
|
batch_store_opts/0
|
||||||
]).
|
]).
|
||||||
|
|
||||||
|
-include("emqx_ds.hrl").
|
||||||
-include_lib("snabbkaffe/include/snabbkaffe.hrl").
|
-include_lib("snabbkaffe/include/snabbkaffe.hrl").
|
||||||
|
|
||||||
-define(REF(ShardId), {via, gproc, {n, l, {?MODULE, ShardId}}}).
|
-define(REF(ShardId), {via, gproc, {n, l, {?MODULE, ShardId}}}).
|
||||||
|
@ -115,6 +119,11 @@
|
||||||
|
|
||||||
-type gen_id() :: 0..16#ffff.
|
-type gen_id() :: 0..16#ffff.
|
||||||
|
|
||||||
|
-type batch() :: [
|
||||||
|
{emqx_ds:time(), emqx_types:message()}
|
||||||
|
| emqx_ds:deletion()
|
||||||
|
].
|
||||||
|
|
||||||
%% Options affecting how batches should be stored.
|
%% Options affecting how batches should be stored.
|
||||||
%% See also: `emqx_ds:message_store_opts()'.
|
%% See also: `emqx_ds:message_store_opts()'.
|
||||||
-type batch_store_opts() ::
|
-type batch_store_opts() ::
|
||||||
|
@ -294,6 +303,10 @@
|
||||||
| {ok, end_of_stream}
|
| {ok, end_of_stream}
|
||||||
| emqx_ds:error(_).
|
| emqx_ds:error(_).
|
||||||
|
|
||||||
|
%% Lookup a single message, for preconditions to work.
|
||||||
|
-callback lookup_message(shard_id(), generation_data(), emqx_ds_precondition:matcher()) ->
|
||||||
|
emqx_types:message() | not_found | emqx_ds:error(_).
|
||||||
|
|
||||||
-callback handle_event(shard_id(), generation_data(), emqx_ds:time(), CustomEvent | tick) ->
|
-callback handle_event(shard_id(), generation_data(), emqx_ds:time(), CustomEvent | tick) ->
|
||||||
[CustomEvent].
|
[CustomEvent].
|
||||||
|
|
||||||
|
@ -317,14 +330,10 @@ drop_shard(Shard) ->
|
||||||
|
|
||||||
%% @doc This is a convenicence wrapper that combines `prepare' and
|
%% @doc This is a convenicence wrapper that combines `prepare' and
|
||||||
%% `commit' operations.
|
%% `commit' operations.
|
||||||
-spec store_batch(
|
-spec store_batch(shard_id(), batch(), batch_store_opts()) ->
|
||||||
shard_id(),
|
|
||||||
[{emqx_ds:time(), emqx_types:message()}],
|
|
||||||
batch_store_opts()
|
|
||||||
) ->
|
|
||||||
emqx_ds:store_batch_result().
|
emqx_ds:store_batch_result().
|
||||||
store_batch(Shard, Messages, Options) ->
|
store_batch(Shard, Batch, Options) ->
|
||||||
case prepare_batch(Shard, Messages, #{}) of
|
case prepare_batch(Shard, Batch, #{}) of
|
||||||
{ok, CookedBatch} ->
|
{ok, CookedBatch} ->
|
||||||
commit_batch(Shard, CookedBatch, Options);
|
commit_batch(Shard, CookedBatch, Options);
|
||||||
ignore ->
|
ignore ->
|
||||||
|
@ -342,23 +351,21 @@ store_batch(Shard, Messages, Options) ->
|
||||||
%%
|
%%
|
||||||
%% The underlying storage layout MAY use timestamp as a unique message
|
%% The underlying storage layout MAY use timestamp as a unique message
|
||||||
%% ID.
|
%% ID.
|
||||||
-spec prepare_batch(
|
-spec prepare_batch(shard_id(), batch(), batch_prepare_opts()) ->
|
||||||
shard_id(),
|
{ok, cooked_batch()} | ignore | emqx_ds:error(_).
|
||||||
[{emqx_ds:time(), emqx_types:message()}],
|
prepare_batch(Shard, Batch, Options) ->
|
||||||
batch_prepare_opts()
|
|
||||||
) -> {ok, cooked_batch()} | ignore | emqx_ds:error(_).
|
|
||||||
prepare_batch(Shard, Messages = [{Time, _} | _], Options) ->
|
|
||||||
%% NOTE
|
%% NOTE
|
||||||
%% We assume that batches do not span generations. Callers should enforce this.
|
%% We assume that batches do not span generations. Callers should enforce this.
|
||||||
?tp(emqx_ds_storage_layer_prepare_batch, #{
|
?tp(emqx_ds_storage_layer_prepare_batch, #{
|
||||||
shard => Shard, messages => Messages, options => Options
|
shard => Shard, batch => Batch, options => Options
|
||||||
}),
|
}),
|
||||||
%% FIXME: always store messages in the current generation
|
%% FIXME: always store messages in the current generation
|
||||||
case generation_at(Shard, Time) of
|
Time = batch_starts_at(Batch),
|
||||||
|
case is_integer(Time) andalso generation_at(Shard, Time) of
|
||||||
{GenId, #{module := Mod, data := GenData}} ->
|
{GenId, #{module := Mod, data := GenData}} ->
|
||||||
T0 = erlang:monotonic_time(microsecond),
|
T0 = erlang:monotonic_time(microsecond),
|
||||||
Result =
|
Result =
|
||||||
case Mod:prepare_batch(Shard, GenData, Messages, Options) of
|
case Mod:prepare_batch(Shard, GenData, Batch, Options) of
|
||||||
{ok, CookedBatch} ->
|
{ok, CookedBatch} ->
|
||||||
{ok, #{?tag => ?COOKED_BATCH, ?generation => GenId, ?enc => CookedBatch}};
|
{ok, #{?tag => ?COOKED_BATCH, ?generation => GenId, ?enc => CookedBatch}};
|
||||||
Error = {error, _, _} ->
|
Error = {error, _, _} ->
|
||||||
|
@ -368,11 +375,21 @@ prepare_batch(Shard, Messages = [{Time, _} | _], Options) ->
|
||||||
%% TODO store->prepare
|
%% TODO store->prepare
|
||||||
emqx_ds_builtin_metrics:observe_store_batch_time(Shard, T1 - T0),
|
emqx_ds_builtin_metrics:observe_store_batch_time(Shard, T1 - T0),
|
||||||
Result;
|
Result;
|
||||||
|
false ->
|
||||||
|
%% No write operations in this batch.
|
||||||
|
ignore;
|
||||||
not_found ->
|
not_found ->
|
||||||
|
%% Generation is likely already GCed.
|
||||||
ignore
|
ignore
|
||||||
end;
|
end.
|
||||||
prepare_batch(_Shard, [], _Options) ->
|
|
||||||
ignore.
|
-spec batch_starts_at(batch()) -> emqx_ds:time() | undefined.
|
||||||
|
batch_starts_at([{Time, _Message} | _]) when is_integer(Time) ->
|
||||||
|
Time;
|
||||||
|
batch_starts_at([{delete, #message_matcher{timestamp = Time}} | _]) ->
|
||||||
|
Time;
|
||||||
|
batch_starts_at([]) ->
|
||||||
|
undefined.
|
||||||
|
|
||||||
%% @doc Commit cooked batch to the storage.
|
%% @doc Commit cooked batch to the storage.
|
||||||
%%
|
%%
|
||||||
|
@ -559,6 +576,16 @@ update_config(ShardId, Since, Options) ->
|
||||||
add_generation(ShardId, Since) ->
|
add_generation(ShardId, Since) ->
|
||||||
gen_server:call(?REF(ShardId), #call_add_generation{since = Since}, infinity).
|
gen_server:call(?REF(ShardId), #call_add_generation{since = Since}, infinity).
|
||||||
|
|
||||||
|
-spec lookup_message(shard_id(), emqx_ds_precondition:matcher()) ->
|
||||||
|
emqx_types:message() | not_found | emqx_ds:error(_).
|
||||||
|
lookup_message(ShardId, Matcher = #message_matcher{timestamp = Time}) ->
|
||||||
|
case generation_at(ShardId, Time) of
|
||||||
|
{_GenId, #{module := Mod, data := GenData}} ->
|
||||||
|
Mod:lookup_message(ShardId, GenData, Matcher);
|
||||||
|
not_found ->
|
||||||
|
not_found
|
||||||
|
end.
|
||||||
|
|
||||||
-spec list_generations_with_lifetimes(shard_id()) ->
|
-spec list_generations_with_lifetimes(shard_id()) ->
|
||||||
#{
|
#{
|
||||||
gen_id() => #{
|
gen_id() => #{
|
||||||
|
|
|
@ -21,6 +21,8 @@
|
||||||
%% used for testing.
|
%% used for testing.
|
||||||
-module(emqx_ds_storage_reference).
|
-module(emqx_ds_storage_reference).
|
||||||
|
|
||||||
|
-include("emqx_ds.hrl").
|
||||||
|
|
||||||
-behaviour(emqx_ds_storage_layer).
|
-behaviour(emqx_ds_storage_layer).
|
||||||
|
|
||||||
%% API:
|
%% API:
|
||||||
|
@ -39,7 +41,8 @@
|
||||||
make_delete_iterator/5,
|
make_delete_iterator/5,
|
||||||
update_iterator/4,
|
update_iterator/4,
|
||||||
next/6,
|
next/6,
|
||||||
delete_next/7
|
delete_next/7,
|
||||||
|
lookup_message/3
|
||||||
]).
|
]).
|
||||||
|
|
||||||
%% internal exports:
|
%% internal exports:
|
||||||
|
@ -49,6 +52,8 @@
|
||||||
|
|
||||||
-include_lib("emqx_utils/include/emqx_message.hrl").
|
-include_lib("emqx_utils/include/emqx_message.hrl").
|
||||||
|
|
||||||
|
-define(DB_KEY(TIMESTAMP), <<TIMESTAMP:64>>).
|
||||||
|
|
||||||
%%================================================================================
|
%%================================================================================
|
||||||
%% Type declarations
|
%% Type declarations
|
||||||
%%================================================================================
|
%%================================================================================
|
||||||
|
@ -102,23 +107,22 @@ drop(_ShardId, DBHandle, _GenId, _CFRefs, #s{cf = CFHandle}) ->
|
||||||
ok = rocksdb:drop_column_family(DBHandle, CFHandle),
|
ok = rocksdb:drop_column_family(DBHandle, CFHandle),
|
||||||
ok.
|
ok.
|
||||||
|
|
||||||
prepare_batch(_ShardId, _Data, Messages, _Options) ->
|
prepare_batch(_ShardId, _Data, Batch, _Options) ->
|
||||||
{ok, Messages}.
|
{ok, Batch}.
|
||||||
|
|
||||||
commit_batch(_ShardId, #s{db = DB, cf = CF}, Messages, Options) ->
|
commit_batch(_ShardId, S = #s{db = DB}, Batch, Options) ->
|
||||||
{ok, Batch} = rocksdb:batch(),
|
{ok, BatchHandle} = rocksdb:batch(),
|
||||||
lists:foreach(
|
lists:foreach(fun(Op) -> process_batch_operation(S, Op, BatchHandle) end, Batch),
|
||||||
fun({TS, Msg}) ->
|
Res = rocksdb:write_batch(DB, BatchHandle, write_batch_opts(Options)),
|
||||||
Key = <<TS:64>>,
|
rocksdb:release_batch(BatchHandle),
|
||||||
Val = term_to_binary(Msg),
|
|
||||||
rocksdb:batch_put(Batch, CF, Key, Val)
|
|
||||||
end,
|
|
||||||
Messages
|
|
||||||
),
|
|
||||||
Res = rocksdb:write_batch(DB, Batch, write_batch_opts(Options)),
|
|
||||||
rocksdb:release_batch(Batch),
|
|
||||||
Res.
|
Res.
|
||||||
|
|
||||||
|
process_batch_operation(S, {TS, Msg = #message{}}, BatchHandle) ->
|
||||||
|
Val = encode_message(Msg),
|
||||||
|
rocksdb:batch_put(BatchHandle, S#s.cf, ?DB_KEY(TS), Val);
|
||||||
|
process_batch_operation(S, {delete, #message_matcher{timestamp = TS}}, BatchHandle) ->
|
||||||
|
rocksdb:batch_delete(BatchHandle, S#s.cf, ?DB_KEY(TS)).
|
||||||
|
|
||||||
get_streams(_Shard, _Data, _TopicFilter, _StartTime) ->
|
get_streams(_Shard, _Data, _TopicFilter, _StartTime) ->
|
||||||
[#stream{}].
|
[#stream{}].
|
||||||
|
|
||||||
|
@ -205,6 +209,16 @@ delete_next(_Shard, #s{db = DB, cf = CF}, It0, Selector, BatchSize, _Now, IsCurr
|
||||||
{ok, It, NumDeleted, NumIterated}
|
{ok, It, NumDeleted, NumIterated}
|
||||||
end.
|
end.
|
||||||
|
|
||||||
|
lookup_message(_ShardId, #s{db = DB, cf = CF}, #message_matcher{timestamp = TS}) ->
|
||||||
|
case rocksdb:get(DB, CF, ?DB_KEY(TS), _ReadOpts = []) of
|
||||||
|
{ok, Val} ->
|
||||||
|
decode_message(Val);
|
||||||
|
not_found ->
|
||||||
|
not_found;
|
||||||
|
{error, Reason} ->
|
||||||
|
{error, unrecoverable, Reason}
|
||||||
|
end.
|
||||||
|
|
||||||
%%================================================================================
|
%%================================================================================
|
||||||
%% Internal functions
|
%% Internal functions
|
||||||
%%================================================================================
|
%%================================================================================
|
||||||
|
@ -214,7 +228,7 @@ do_next(_, _, _, _, 0, Key, Acc) ->
|
||||||
do_next(TopicFilter, StartTime, IT, Action, NLeft, Key0, Acc) ->
|
do_next(TopicFilter, StartTime, IT, Action, NLeft, Key0, Acc) ->
|
||||||
case rocksdb:iterator_move(IT, Action) of
|
case rocksdb:iterator_move(IT, Action) of
|
||||||
{ok, Key = <<TS:64>>, Blob} ->
|
{ok, Key = <<TS:64>>, Blob} ->
|
||||||
Msg = #message{topic = Topic} = binary_to_term(Blob),
|
Msg = #message{topic = Topic} = decode_message(Blob),
|
||||||
TopicWords = emqx_topic:words(Topic),
|
TopicWords = emqx_topic:words(Topic),
|
||||||
case emqx_topic:match(TopicWords, TopicFilter) andalso TS >= StartTime of
|
case emqx_topic:match(TopicWords, TopicFilter) andalso TS >= StartTime of
|
||||||
true ->
|
true ->
|
||||||
|
@ -234,7 +248,7 @@ do_delete_next(
|
||||||
) ->
|
) ->
|
||||||
case rocksdb:iterator_move(IT, Action) of
|
case rocksdb:iterator_move(IT, Action) of
|
||||||
{ok, Key, Blob} ->
|
{ok, Key, Blob} ->
|
||||||
Msg = #message{topic = Topic, timestamp = TS} = binary_to_term(Blob),
|
Msg = #message{topic = Topic, timestamp = TS} = decode_message(Blob),
|
||||||
TopicWords = emqx_topic:words(Topic),
|
TopicWords = emqx_topic:words(Topic),
|
||||||
case emqx_topic:match(TopicWords, TopicFilter) andalso TS >= StartTime of
|
case emqx_topic:match(TopicWords, TopicFilter) andalso TS >= StartTime of
|
||||||
true ->
|
true ->
|
||||||
|
@ -285,6 +299,12 @@ do_delete_next(
|
||||||
{Key0, {AccDel, AccIter}}
|
{Key0, {AccDel, AccIter}}
|
||||||
end.
|
end.
|
||||||
|
|
||||||
|
encode_message(Msg) ->
|
||||||
|
term_to_binary(Msg).
|
||||||
|
|
||||||
|
decode_message(Val) ->
|
||||||
|
binary_to_term(Val).
|
||||||
|
|
||||||
%% @doc Generate a column family ID for the MQTT messages
|
%% @doc Generate a column family ID for the MQTT messages
|
||||||
-spec data_cf(emqx_ds_storage_layer:gen_id()) -> [char()].
|
-spec data_cf(emqx_ds_storage_layer:gen_id()) -> [char()].
|
||||||
data_cf(GenId) ->
|
data_cf(GenId) ->
|
||||||
|
|
Loading…
Reference in New Issue