feat(ft-gc): treat all transfer as incomplete

Since the concept of _complete transfers_ is being split out into
the _export_ concept, we lose knowledge of completeness in the GC.
Instead of asking exporters for transfer statuses we just treat all
transfer as incomplete when GCing.
This commit is contained in:
Andrew Mayorov 2023-03-20 15:15:43 +03:00 committed by Ilya Averyanov
parent 0d39546080
commit 4f2600b9f1
4 changed files with 121 additions and 99 deletions

View File

@ -137,20 +137,10 @@ pread(Node, Segment, St) ->
%%
maybe_garbage_collect(ok, St = #st{storage = Storage, transfer = Transfer}) ->
Nodes = get_coverage_nodes(St),
Nodes = emqx_ft_assembly:nodes(St#st.assembly),
emqx_ft_storage_fs_gc:collect(Storage, Transfer, Nodes);
maybe_garbage_collect({error, _}, _St) ->
ok.
get_coverage_nodes(St) ->
Coverage = emqx_ft_assembly:coverage(St#st.assembly),
ordsets:to_list(
lists:foldl(
fun({Node, _Segment}, Acc) -> ordsets:add_element(Node, Acc) end,
ordsets:new(),
Coverage
)
).
segsize(#{fragment := {segment, Info}}) ->
maps:get(size, Info).

View File

@ -22,6 +22,7 @@
-export([status/1]).
-export([filemeta/1]).
-export([nodes/1]).
-export([coverage/1]).
-export([properties/1]).
@ -108,6 +109,24 @@ filemeta(Asm) ->
coverage(#asm{coverage = Coverage}) ->
Coverage.
-spec nodes(t()) -> [node()].
nodes(#asm{meta = Meta, segs = Segs}) ->
S1 = orddict:fold(
fun(_Meta, {Node, _Fragment}, Acc) ->
ordsets:add_element(Node, Acc)
end,
ordsets:new(),
Meta
),
S2 = emqx_wdgraph:fold(
fun(_Offset, {_End, _, {Node, _Fragment}}, Acc) ->
ordsets:add_element(Node, Acc)
end,
ordsets:new(),
Segs
),
ordsets:to_list(ordsets:union(S1, S2)).
properties(#asm{properties = Properties}) ->
Properties.

View File

@ -80,11 +80,15 @@ handle_call(Call, From, St) ->
?SLOG(error, #{msg => "unexpected_call", call => Call, from => From}),
{noreply, St}.
% TODO
% handle_cast({collect, Transfer, [Node | Rest]}, St) ->
% ok = do_collect_transfer(Transfer, Node, St),
% ok = collect(self(), Transfer, Rest),
% {noreply, St};
handle_cast({collect, Transfer, [Node | Rest]}, St) ->
ok = do_collect_transfer(Transfer, Node, St),
case Rest of
[_ | _] ->
gen_server:cast(self(), {collect, Transfer, Rest});
[] ->
ok
end,
{noreply, St};
handle_cast(reset, St) ->
{noreply, reset_timer(St)};
handle_cast(Cast, St) ->
@ -95,10 +99,13 @@ handle_info({timeout, TRef, collect}, St = #st{next_gc_timer = TRef}) ->
StNext = do_collect_garbage(St),
{noreply, start_timer(StNext#st{next_gc_timer = undefined})}.
% do_collect_transfer(Transfer, Node, St = #st{storage = Storage}) when Node == node() ->
% Stats = try_collect_transfer(Storage, Transfer, complete, init_gcstats()),
% ok = maybe_report(Stats, St),
% ok.
do_collect_transfer(Transfer, Node, St = #st{storage = Storage}) when Node == node() ->
Stats = try_collect_transfer(Storage, Transfer, complete, init_gcstats()),
ok = maybe_report(Stats, St),
ok;
do_collect_transfer(_Transfer, _Node, _St = #st{}) ->
% TODO
ok.
maybe_collect_garbage(_CalledAt, St = #st{last_gc = undefined}) ->
do_collect_garbage(St);
@ -149,21 +156,13 @@ collect_garbage(Storage, Transfers, Stats) ->
)
).
try_collect_transfer(Storage, Transfer, #{status := complete}, Stats) ->
% File transfer is complete.
% We should be good to delete fragments and temporary files with their respective
% directories altogether.
% TODO: file expiration
{_, Stats1} = collect_fragments(Storage, Transfer, Stats),
{_, Stats2} = collect_tempfiles(Storage, Transfer, Stats1),
Stats2;
try_collect_transfer(Storage, Transfer, #{status := incomplete}, Stats) ->
% File transfer is still incomplete.
try_collect_transfer(Storage, Transfer, TransferInfo = #{}, Stats) ->
% File transfer might still be incomplete.
% Any outdated fragments and temporary files should be collectable. As a kind of
% heuristic we only delete transfer directory itself only if it is also outdated
% _and was empty at the start of GC_, as a precaution against races between
% writers and GCs.
TTL = get_segments_ttl(Storage, Transfer),
TTL = get_segments_ttl(Storage, TransferInfo),
Cutoff = erlang:system_time(second) - TTL,
{FragCleaned, Stats1} = collect_outdated_fragments(Storage, Transfer, Cutoff, Stats),
{TempCleaned, Stats2} = collect_outdated_tempfiles(Storage, Transfer, Cutoff, Stats1),
@ -173,7 +172,14 @@ try_collect_transfer(Storage, Transfer, #{status := incomplete}, Stats) ->
collect_transfer_directory(Storage, Transfer, Stats2);
false ->
Stats2
end.
end;
try_collect_transfer(Storage, Transfer, complete, Stats) ->
% File transfer is complete.
% We should be good to delete fragments and temporary files with their respective
% directories altogether.
{_, Stats1} = collect_fragments(Storage, Transfer, Stats),
{_, Stats2} = collect_tempfiles(Storage, Transfer, Stats1),
Stats2.
collect_fragments(Storage, Transfer, Stats) ->
Dirname = emqx_ft_storage_fs:get_subdir(Storage, Transfer, fragment),

View File

@ -45,9 +45,11 @@ init_per_testcase(TC, Config) ->
})
end
),
ok = snabbkaffe:start_trace(),
Config.
end_per_testcase(_TC, _Config) ->
ok = snabbkaffe:stop(),
ok = application:stop(emqx_ft),
ok.
@ -126,20 +128,29 @@ t_gc_complete_transfers(_Config) ->
emqx_ft_storage_fs_gc:collect(Storage)
),
% 2. Complete just the first transfer
?assertEqual(
ok,
complete_transfer(Storage, T1, S1)
{ok, {ok, Event}} = ?wait_async_action(
?assertEqual(ok, complete_transfer(Storage, T1, S1)),
#{?snk_kind := garbage_collection},
1000
),
?assertMatch(
#gcstats{
#{
stats := #gcstats{
files = Files,
directories = 2,
space = Space,
errors = #{} = Es
}
} when Files == ?NSEGS(S1, SS1) andalso Space > S1 andalso map_size(Es) == 0,
emqx_ft_storage_fs_gc:collect(Storage)
Event
),
% 3. Complete rest of transfers
{ok, Sub} = snabbkaffe_collector:subscribe(
?match_event(#{?snk_kind := garbage_collection}),
2,
1000,
0
),
?assertEqual(
[ok, ok],
emqx_misc:pmap(
@ -147,18 +158,19 @@ t_gc_complete_transfers(_Config) ->
[{T2, S2}, {T3, S3}]
)
),
?assertMatch(
#gcstats{
files = Files,
directories = 4,
space = Space,
errors = #{} = Es
} when
Files == (?NSEGS(S2, SS2) + ?NSEGS(S3, SS3)) andalso
Space > (S2 + S3) andalso
map_size(Es) == 0,
emqx_ft_storage_fs_gc:collect(Storage)
).
{ok, Events} = snabbkaffe_collector:receive_events(Sub),
CFiles = lists:sum([Stats#gcstats.files || #{stats := Stats} <- Events]),
CDirectories = lists:sum([Stats#gcstats.directories || #{stats := Stats} <- Events]),
CSpace = lists:sum([Stats#gcstats.space || #{stats := Stats} <- Events]),
CErrors = lists:foldl(
fun maps:merge/2,
#{},
[Stats#gcstats.errors || #{stats := Stats} <- Events]
),
?assertEqual(?NSEGS(S2, SS2) + ?NSEGS(S3, SS3), CFiles),
?assertEqual(2 + 2, CDirectories),
?assertMatch(Space when Space > S2 + S3, CSpace),
?assertMatch(Errors when map_size(Errors) == 0, CErrors).
t_gc_incomplete_transfers(_Config) ->
ok = emqx_config:put([file_transfer, storage, gc, minimum_segments_ttl], 0),
@ -188,8 +200,6 @@ t_gc_incomplete_transfers(_Config) ->
],
% 1. Start transfers, send all the segments but don't trigger completion.
_ = emqx_misc:pmap(fun(Transfer) -> start_transfer(Storage, Transfer) end, Transfers),
?check_trace(
begin
% 2. Enable periodic GC every 0.5 seconds.
ok = emqx_config:put([file_transfer, storage, gc, interval], 500),
ok = emqx_ft_storage_fs_gc:reset(Storage),
@ -231,9 +241,6 @@ t_gc_incomplete_transfers(_Config) ->
} when Files == (?NSEGS(S3, SS3) + ?NSEGS(S4, SS4)) andalso Space > S3 + S4,
5000,
0
)
end,
[]
).
t_gc_handling_errors(_Config) ->