feat(dsrepl): enable WAL-less batch writes
This commit is contained in:
parent
2705226eb5
commit
0c0757b8c2
|
@ -70,6 +70,7 @@ prepare(Index, State) ->
|
||||||
ok | {ok, _BytesWritten :: non_neg_integer()} | {error, ra_snapshot:file_err()}.
|
ok | {ok, _BytesWritten :: non_neg_integer()} | {error, ra_snapshot:file_err()}.
|
||||||
write(Dir, Meta, MachineState) ->
|
write(Dir, Meta, MachineState) ->
|
||||||
?tp(dsrepl_snapshot_write, #{meta => Meta, state => MachineState}),
|
?tp(dsrepl_snapshot_write, #{meta => Meta, state => MachineState}),
|
||||||
|
ok = emqx_ds_storage_layer:flush(shard_id(MachineState)),
|
||||||
ra_log_snapshot:write(Dir, Meta, MachineState).
|
ra_log_snapshot:write(Dir, Meta, MachineState).
|
||||||
|
|
||||||
%% Reading a snapshot.
|
%% Reading a snapshot.
|
||||||
|
@ -229,7 +230,7 @@ complete_accept(WS = #ws{started_at = StartedAt, writer = SnapWriter}) ->
|
||||||
write_machine_snapshot(WS).
|
write_machine_snapshot(WS).
|
||||||
|
|
||||||
write_machine_snapshot(#ws{dir = Dir, meta = Meta, state = MachineState}) ->
|
write_machine_snapshot(#ws{dir = Dir, meta = Meta, state = MachineState}) ->
|
||||||
write(Dir, Meta, MachineState).
|
ra_log_snapshot:write(Dir, Meta, MachineState).
|
||||||
|
|
||||||
%% Restoring machine state from a snapshot.
|
%% Restoring machine state from a snapshot.
|
||||||
%% This is equivalent to restoring from a log snapshot.
|
%% This is equivalent to restoring from a log snapshot.
|
||||||
|
|
|
@ -326,7 +326,7 @@ commit_batch(
|
||||||
end,
|
end,
|
||||||
Payloads
|
Payloads
|
||||||
),
|
),
|
||||||
Result = rocksdb:write_batch(DB, Batch, []),
|
Result = rocksdb:write_batch(DB, Batch, [{disable_wal, true}]),
|
||||||
rocksdb:release_batch(Batch),
|
rocksdb:release_batch(Batch),
|
||||||
ets:insert(Gvars, {?IDLE_DETECT, false, MaxTs}),
|
ets:insert(Gvars, {?IDLE_DETECT, false, MaxTs}),
|
||||||
%% NOTE
|
%% NOTE
|
||||||
|
|
|
@ -44,6 +44,7 @@
|
||||||
drop_generation/2,
|
drop_generation/2,
|
||||||
|
|
||||||
%% Snapshotting
|
%% Snapshotting
|
||||||
|
flush/1,
|
||||||
take_snapshot/1,
|
take_snapshot/1,
|
||||||
accept_snapshot/1,
|
accept_snapshot/1,
|
||||||
|
|
||||||
|
@ -279,6 +280,7 @@
|
||||||
-record(call_update_config, {options :: emqx_ds:create_db_opts(), since :: emqx_ds:time()}).
|
-record(call_update_config, {options :: emqx_ds:create_db_opts(), since :: emqx_ds:time()}).
|
||||||
-record(call_list_generations_with_lifetimes, {}).
|
-record(call_list_generations_with_lifetimes, {}).
|
||||||
-record(call_drop_generation, {gen_id :: gen_id()}).
|
-record(call_drop_generation, {gen_id :: gen_id()}).
|
||||||
|
-record(call_flush, {}).
|
||||||
-record(call_take_snapshot, {}).
|
-record(call_take_snapshot, {}).
|
||||||
|
|
||||||
-spec drop_shard(shard_id()) -> ok.
|
-spec drop_shard(shard_id()) -> ok.
|
||||||
|
@ -539,6 +541,10 @@ shard_info(ShardId, status) ->
|
||||||
error:badarg -> down
|
error:badarg -> down
|
||||||
end.
|
end.
|
||||||
|
|
||||||
|
-spec flush(shard_id()) -> ok | {error, _}.
|
||||||
|
flush(ShardId) ->
|
||||||
|
gen_server:call(?REF(ShardId), #call_flush{}, infinity).
|
||||||
|
|
||||||
-spec take_snapshot(shard_id()) -> {ok, emqx_ds_storage_snapshot:reader()} | {error, _Reason}.
|
-spec take_snapshot(shard_id()) -> {ok, emqx_ds_storage_snapshot:reader()} | {error, _Reason}.
|
||||||
take_snapshot(ShardId) ->
|
take_snapshot(ShardId) ->
|
||||||
case gen_server:call(?REF(ShardId), #call_take_snapshot{}, infinity) of
|
case gen_server:call(?REF(ShardId), #call_take_snapshot{}, infinity) of
|
||||||
|
@ -566,6 +572,7 @@ start_link(Shard = {_, _}, Options) ->
|
||||||
shard_id :: shard_id(),
|
shard_id :: shard_id(),
|
||||||
db :: rocksdb:db_handle(),
|
db :: rocksdb:db_handle(),
|
||||||
cf_refs :: cf_refs(),
|
cf_refs :: cf_refs(),
|
||||||
|
cf_need_flush :: gen_id(),
|
||||||
schema :: shard_schema(),
|
schema :: shard_schema(),
|
||||||
shard :: shard()
|
shard :: shard()
|
||||||
}).
|
}).
|
||||||
|
@ -591,10 +598,12 @@ init({ShardId, Options}) ->
|
||||||
{Scm, CFRefs0}
|
{Scm, CFRefs0}
|
||||||
end,
|
end,
|
||||||
Shard = open_shard(ShardId, DB, CFRefs, Schema),
|
Shard = open_shard(ShardId, DB, CFRefs, Schema),
|
||||||
|
CurrentGenId = maps:get(current_generation, Schema),
|
||||||
S = #s{
|
S = #s{
|
||||||
shard_id = ShardId,
|
shard_id = ShardId,
|
||||||
db = DB,
|
db = DB,
|
||||||
cf_refs = CFRefs,
|
cf_refs = CFRefs,
|
||||||
|
cf_need_flush = CurrentGenId,
|
||||||
schema = Schema,
|
schema = Schema,
|
||||||
shard = Shard
|
shard = Shard
|
||||||
},
|
},
|
||||||
|
@ -635,6 +644,9 @@ handle_call(#call_list_generations_with_lifetimes{}, _From, S) ->
|
||||||
handle_call(#call_drop_generation{gen_id = GenId}, _From, S0) ->
|
handle_call(#call_drop_generation{gen_id = GenId}, _From, S0) ->
|
||||||
{Reply, S} = handle_drop_generation(S0, GenId),
|
{Reply, S} = handle_drop_generation(S0, GenId),
|
||||||
{reply, Reply, S};
|
{reply, Reply, S};
|
||||||
|
handle_call(#call_flush{}, _From, S0) ->
|
||||||
|
{Reply, S} = handle_flush(S0),
|
||||||
|
{reply, Reply, S};
|
||||||
handle_call(#call_take_snapshot{}, _From, S) ->
|
handle_call(#call_take_snapshot{}, _From, S) ->
|
||||||
Snapshot = handle_take_snapshot(S),
|
Snapshot = handle_take_snapshot(S),
|
||||||
{reply, Snapshot, S};
|
{reply, Snapshot, S};
|
||||||
|
@ -866,6 +878,10 @@ rocksdb_open(Shard, Options) ->
|
||||||
DBOptions = [
|
DBOptions = [
|
||||||
{create_if_missing, true},
|
{create_if_missing, true},
|
||||||
{create_missing_column_families, true},
|
{create_missing_column_families, true},
|
||||||
|
%% NOTE
|
||||||
|
%% With WAL-less writes, it's important to have CFs flushed atomically.
|
||||||
|
%% For example, bitfield-lts backend needs data + trie CFs to be consistent.
|
||||||
|
{atomic_flush, true},
|
||||||
{enable_write_thread_adaptive_yield, false}
|
{enable_write_thread_adaptive_yield, false}
|
||||||
| maps:get(db_options, Options, [])
|
| maps:get(db_options, Options, [])
|
||||||
],
|
],
|
||||||
|
@ -921,6 +937,30 @@ update_last_until(Schema = #{current_generation := GenId}, Until) ->
|
||||||
{error, overlaps_existing_generations}
|
{error, overlaps_existing_generations}
|
||||||
end.
|
end.
|
||||||
|
|
||||||
|
handle_flush(S = #s{db = DB, cf_need_flush = NeedFlushGenId, schema = Schema}) ->
|
||||||
|
%% NOTE
|
||||||
|
%% There could have been few generations added since the last time `flush/1` was
|
||||||
|
%% called. Strictly speaking, we don't need to flush them all at once as part of
|
||||||
|
%% a single atomic flush, but the error handling is a bit easier this way.
|
||||||
|
CurrentGenId = maps:get(current_generation, Schema),
|
||||||
|
GenIds = lists:seq(NeedFlushGenId, CurrentGenId),
|
||||||
|
CFHandles = lists:flatmap(
|
||||||
|
fun(GenId) ->
|
||||||
|
#{?GEN_KEY(GenId) := #{cf_refs := CFRefs}} = Schema,
|
||||||
|
{_, CFHandles} = lists:unzip(CFRefs),
|
||||||
|
CFHandles
|
||||||
|
end,
|
||||||
|
GenIds
|
||||||
|
),
|
||||||
|
case rocksdb:flush(DB, CFHandles, [{wait, true}]) of
|
||||||
|
ok ->
|
||||||
|
%% Current generation will always need a flush.
|
||||||
|
?tp(ds_storage_flush_complete, #{gens => GenIds, cfs => CFHandles}),
|
||||||
|
{ok, S#s{cf_need_flush = CurrentGenId}};
|
||||||
|
{error, _} = Error ->
|
||||||
|
{Error, S}
|
||||||
|
end.
|
||||||
|
|
||||||
handle_take_snapshot(#s{db = DB, shard_id = ShardId}) ->
|
handle_take_snapshot(#s{db = DB, shard_id = ShardId}) ->
|
||||||
Name = integer_to_list(erlang:system_time(millisecond)),
|
Name = integer_to_list(erlang:system_time(millisecond)),
|
||||||
Dir = checkpoint_dir(ShardId, Name),
|
Dir = checkpoint_dir(ShardId, Name),
|
||||||
|
|
|
@ -115,7 +115,7 @@ commit_batch(_ShardId, #s{db = DB, cf = CF}, Messages) ->
|
||||||
end,
|
end,
|
||||||
Messages
|
Messages
|
||||||
),
|
),
|
||||||
Res = rocksdb:write_batch(DB, Batch, _WriteOptions = []),
|
Res = rocksdb:write_batch(DB, Batch, _WriteOptions = [{disable_wal, true}]),
|
||||||
rocksdb:release_batch(Batch),
|
rocksdb:release_batch(Batch),
|
||||||
Res.
|
Res.
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue