Merge pull request #12762 from thalesmg/ds-fix-sync-egress-reply-m-20240321

fix(ds): add caller to pending replies before flushing and reply failures to sync callers
This commit is contained in:
Thales Macedo Garitezi 2024-03-21 16:42:42 -03:00 committed by GitHub
commit 23ad37f566
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
1 changed files with 19 additions and 14 deletions

View File

@ -161,12 +161,13 @@ do_flush(
) ->
case emqx_ds_replication_layer:ra_store_batch(DB, Shard, lists:reverse(Messages)) of
ok ->
lists:foreach(fun(From) -> gen_server:reply(From, ok) end, Replies),
true = erlang:garbage_collect(),
?tp(
emqx_ds_replication_layer_egress_flush,
#{db => DB, shard => Shard, batch => Messages}
);
),
lists:foreach(fun(From) -> gen_server:reply(From, ok) end, Replies),
true = erlang:garbage_collect(),
ok;
Error ->
true = erlang:garbage_collect(),
?tp(
@ -175,7 +176,11 @@ do_flush(
#{db => DB, shard => Shard, reason => Error}
),
Cooldown = ?COOLDOWN_MIN + rand:uniform(?COOLDOWN_MAX - ?COOLDOWN_MIN),
ok = timer:sleep(Cooldown)
ok = timer:sleep(Cooldown),
%% Since we drop the entire batch here, we at least reply callers with an
%% error so they don't hang indefinitely in the `gen_server' call with
%% `infinity' timeout.
lists:foreach(fun(From) -> gen_server:reply(From, {error, Error}) end, Replies)
end,
S#s{
n = 0,
@ -193,14 +198,6 @@ do_enqueue(From, Sync, MsgOrBatch, S0 = #s{n = N, batch = Batch, pending_replies
Msg ->
S0#s{n = N + 1, batch = [Msg | Batch]}
end,
S2 =
case N >= NMax of
true ->
_ = erlang:cancel_timer(S0#s.tref),
do_flush(S1);
false ->
S1
end,
%% TODO: later we may want to delay the reply until the message is
%% replicated, but it requies changes to the PUBACK/PUBREC flow to
%% allow for async replies. For now, we ack when the message is
@ -208,12 +205,20 @@ do_enqueue(From, Sync, MsgOrBatch, S0 = #s{n = N, batch = Batch, pending_replies
%%
%% Otherwise, the client would freeze for at least flush interval,
%% or until the buffer is filled.
S =
S2 =
case Sync of
true ->
S2#s{pending_replies = [From | Replies]};
S1#s{pending_replies = [From | Replies]};
false ->
gen_server:reply(From, ok),
S1
end,
S =
case N >= NMax of
true ->
_ = erlang:cancel_timer(S2#s.tref),
do_flush(S2);
false ->
S2
end,
%% TODO: add a backpressure mechanism for the server to avoid