fix: avoid uploading blocks too frequently and splitting large buffers
Fixes https://github.com/emqx/emqx/pull/13069#discussion_r1613706747
This commit is contained in:
parent
af99829a21
commit
616f14ae53
|
@ -144,6 +144,16 @@ fields(aggregation) ->
|
||||||
default => 1_000_000,
|
default => 1_000_000,
|
||||||
desc => ?DESC("aggregation_max_records")
|
desc => ?DESC("aggregation_max_records")
|
||||||
}
|
}
|
||||||
|
)},
|
||||||
|
{min_block_size,
|
||||||
|
mk(
|
||||||
|
emqx_schema:bytesize(),
|
||||||
|
#{
|
||||||
|
default => <<"10mb">>,
|
||||||
|
importance => ?IMPORTANCE_HIDDEN,
|
||||||
|
required => true,
|
||||||
|
validator => fun block_size_validator/1
|
||||||
|
}
|
||||||
)}
|
)}
|
||||||
];
|
];
|
||||||
fields(common_action_parameters) ->
|
fields(common_action_parameters) ->
|
||||||
|
@ -156,7 +166,7 @@ fields(common_action_parameters) ->
|
||||||
importance => ?IMPORTANCE_HIDDEN,
|
importance => ?IMPORTANCE_HIDDEN,
|
||||||
desc => ?DESC("max_block_size"),
|
desc => ?DESC("max_block_size"),
|
||||||
required => true,
|
required => true,
|
||||||
validator => fun max_block_size_validator/1
|
validator => fun block_size_validator/1
|
||||||
}
|
}
|
||||||
)}
|
)}
|
||||||
];
|
];
|
||||||
|
@ -293,7 +303,7 @@ scunion(Field, Schemas, {value, Value}) ->
|
||||||
throw(#{field_name => Field, expected => maps:keys(Schemas)})
|
throw(#{field_name => Field, expected => maps:keys(Schemas)})
|
||||||
end.
|
end.
|
||||||
|
|
||||||
max_block_size_validator(SizeLimit) ->
|
block_size_validator(SizeLimit) ->
|
||||||
case SizeLimit =< 4_000 * 1024 * 1024 of
|
case SizeLimit =< 4_000 * 1024 * 1024 of
|
||||||
true -> ok;
|
true -> ok;
|
||||||
false -> {error, "must be less than 4000 MiB"}
|
false -> {error, "must be less than 4000 MiB"}
|
||||||
|
|
|
@ -55,10 +55,6 @@
|
||||||
%% `emqx_template' API
|
%% `emqx_template' API
|
||||||
-export([lookup/2]).
|
-export([lookup/2]).
|
||||||
|
|
||||||
-ifdef(TEST).
|
|
||||||
-export([take_chunk/2]).
|
|
||||||
-endif.
|
|
||||||
|
|
||||||
%%------------------------------------------------------------------------------
|
%%------------------------------------------------------------------------------
|
||||||
%% Type declarations
|
%% Type declarations
|
||||||
%%------------------------------------------------------------------------------
|
%%------------------------------------------------------------------------------
|
||||||
|
@ -131,6 +127,7 @@
|
||||||
action := binary(),
|
action := binary(),
|
||||||
blob := emqx_template:t(),
|
blob := emqx_template:t(),
|
||||||
container := string(),
|
container := string(),
|
||||||
|
min_block_size := pos_integer(),
|
||||||
max_block_size := pos_integer(),
|
max_block_size := pos_integer(),
|
||||||
pool := connector_resource_id()
|
pool := connector_resource_id()
|
||||||
}
|
}
|
||||||
|
@ -144,6 +141,8 @@
|
||||||
buffer_size := non_neg_integer(),
|
buffer_size := non_neg_integer(),
|
||||||
container := container(),
|
container := container(),
|
||||||
max_block_size := pos_integer(),
|
max_block_size := pos_integer(),
|
||||||
|
min_block_size := pos_integer(),
|
||||||
|
next_block := queue:queue(iolist()),
|
||||||
num_blocks := non_neg_integer(),
|
num_blocks := non_neg_integer(),
|
||||||
pool := pool_name(),
|
pool := pool_name(),
|
||||||
started := boolean()
|
started := boolean()
|
||||||
|
@ -348,6 +347,7 @@ init_transfer_state(Buffer, Opts) ->
|
||||||
blob := BlobTemplate,
|
blob := BlobTemplate,
|
||||||
container := Container,
|
container := Container,
|
||||||
max_block_size := MaxBlockSize,
|
max_block_size := MaxBlockSize,
|
||||||
|
min_block_size := MinBlockSize,
|
||||||
pool := Pool
|
pool := Pool
|
||||||
}
|
}
|
||||||
} = Opts,
|
} = Opts,
|
||||||
|
@ -358,6 +358,8 @@ init_transfer_state(Buffer, Opts) ->
|
||||||
buffer_size => 0,
|
buffer_size => 0,
|
||||||
container => Container,
|
container => Container,
|
||||||
max_block_size => MaxBlockSize,
|
max_block_size => MaxBlockSize,
|
||||||
|
min_block_size => MinBlockSize,
|
||||||
|
next_block => queue:new(),
|
||||||
num_blocks => 0,
|
num_blocks => 0,
|
||||||
pool => Pool,
|
pool => Pool,
|
||||||
started => false
|
started => false
|
||||||
|
@ -371,12 +373,25 @@ mk_blob_name_key(Buffer, ActionName, BlobTemplate) ->
|
||||||
process_append(IOData, TransferState0) ->
|
process_append(IOData, TransferState0) ->
|
||||||
#{
|
#{
|
||||||
buffer := Buffer,
|
buffer := Buffer,
|
||||||
buffer_size := BufferSize0
|
buffer_size := BufferSize0,
|
||||||
|
min_block_size := MinBlockSize,
|
||||||
|
next_block := NextBlock0
|
||||||
} = TransferState0,
|
} = TransferState0,
|
||||||
TransferState0#{
|
Size = iolist_size(IOData),
|
||||||
buffer := [Buffer, IOData],
|
case Size + BufferSize0 >= MinBlockSize of
|
||||||
buffer_size := BufferSize0 + iolist_size(IOData)
|
true ->
|
||||||
}.
|
%% Block is ready to be written.
|
||||||
|
TransferState0#{
|
||||||
|
buffer := [],
|
||||||
|
buffer_size := 0,
|
||||||
|
next_block := queue:in([Buffer, IOData], NextBlock0)
|
||||||
|
};
|
||||||
|
false ->
|
||||||
|
TransferState0#{
|
||||||
|
buffer := [Buffer, IOData],
|
||||||
|
buffer_size := BufferSize0 + Size
|
||||||
|
}
|
||||||
|
end.
|
||||||
|
|
||||||
-spec process_write(transfer_state()) ->
|
-spec process_write(transfer_state()) ->
|
||||||
{ok, transfer_state()} | {error, term()}.
|
{ok, transfer_state()} | {error, term()}.
|
||||||
|
@ -396,25 +411,19 @@ process_write(TransferState0 = #{started := false}) ->
|
||||||
{error, Reason} ->
|
{error, Reason} ->
|
||||||
{error, Reason}
|
{error, Reason}
|
||||||
end;
|
end;
|
||||||
process_write(TransferState = #{started := true, buffer_size := 0}) ->
|
|
||||||
{ok, TransferState};
|
|
||||||
process_write(TransferState0 = #{started := true}) ->
|
process_write(TransferState0 = #{started := true}) ->
|
||||||
#{
|
#{
|
||||||
buffer := Buffer,
|
next_block := NextBlock0
|
||||||
buffer_size := BufferSize,
|
|
||||||
max_block_size := MaxBlockSize
|
|
||||||
} = TransferState0,
|
} = TransferState0,
|
||||||
case BufferSize > MaxBlockSize of
|
case queue:out(NextBlock0) of
|
||||||
true ->
|
{{value, Block}, NextBlock} ->
|
||||||
{IOData, NewBuffer} = take_chunk(Buffer, MaxBlockSize),
|
|
||||||
?tp(azure_blob_storage_will_write_chunk, #{}),
|
?tp(azure_blob_storage_will_write_chunk, #{}),
|
||||||
do_process_write(IOData, NewBuffer, TransferState0);
|
do_process_write(Block, TransferState0#{next_block := NextBlock});
|
||||||
false ->
|
{empty, _} ->
|
||||||
NewBuffer = [],
|
{ok, TransferState0}
|
||||||
do_process_write(Buffer, NewBuffer, TransferState0)
|
|
||||||
end.
|
end.
|
||||||
|
|
||||||
do_process_write(IOData, NewBuffer, TransferState0 = #{started := true}) ->
|
do_process_write(IOData, TransferState0 = #{started := true}) ->
|
||||||
#{
|
#{
|
||||||
blob := Blob,
|
blob := Blob,
|
||||||
container := Container,
|
container := Container,
|
||||||
|
@ -423,12 +432,7 @@ do_process_write(IOData, NewBuffer, TransferState0 = #{started := true}) ->
|
||||||
} = TransferState0,
|
} = TransferState0,
|
||||||
case append_data(Pool, Container, Blob, block_id(NumBlocks), IOData) of
|
case append_data(Pool, Container, Blob, block_id(NumBlocks), IOData) of
|
||||||
{ok, _} ->
|
{ok, _} ->
|
||||||
BufferSize = iolist_size(NewBuffer),
|
TransferState = TransferState0#{num_blocks := NumBlocks + 1},
|
||||||
TransferState = TransferState0#{
|
|
||||||
buffer := NewBuffer,
|
|
||||||
buffer_size := BufferSize,
|
|
||||||
num_blocks := NumBlocks + 1
|
|
||||||
},
|
|
||||||
process_write(TransferState);
|
process_write(TransferState);
|
||||||
{error, Reason} ->
|
{error, Reason} ->
|
||||||
{error, Reason}
|
{error, Reason}
|
||||||
|
@ -439,10 +443,21 @@ do_process_write(IOData, NewBuffer, TransferState0 = #{started := true}) ->
|
||||||
process_complete(TransferState) ->
|
process_complete(TransferState) ->
|
||||||
#{
|
#{
|
||||||
blob := Blob,
|
blob := Blob,
|
||||||
|
buffer := Buffer,
|
||||||
|
buffer_size := BufferSize,
|
||||||
container := Container,
|
container := Container,
|
||||||
num_blocks := NumBlocks,
|
num_blocks := NumBlocks0,
|
||||||
pool := Pool
|
pool := Pool
|
||||||
} = TransferState,
|
} = TransferState,
|
||||||
|
%% Flush any left-over data
|
||||||
|
NumBlocks =
|
||||||
|
case BufferSize > 0 of
|
||||||
|
true ->
|
||||||
|
{ok, #{num_blocks := NumBlocks1}} = do_process_write(Buffer, TransferState),
|
||||||
|
NumBlocks1;
|
||||||
|
false ->
|
||||||
|
NumBlocks0
|
||||||
|
end,
|
||||||
BlockRefs = [{block_id(N), latest} || N <- lists:seq(0, NumBlocks - 1)],
|
BlockRefs = [{block_id(N), latest} || N <- lists:seq(0, NumBlocks - 1)],
|
||||||
case put_block_list(Pool, Container, Blob, BlockRefs) of
|
case put_block_list(Pool, Container, Blob, BlockRefs) of
|
||||||
{ok, _} ->
|
{ok, _} ->
|
||||||
|
@ -511,6 +526,7 @@ install_action(#{parameters := #{mode := aggregated}} = ActionConfig, ConnState)
|
||||||
aggregation := #{
|
aggregation := #{
|
||||||
container := ContainerOpts,
|
container := ContainerOpts,
|
||||||
max_records := MaxRecords,
|
max_records := MaxRecords,
|
||||||
|
min_block_size := MinBlockSize,
|
||||||
time_interval := TimeInterval
|
time_interval := TimeInterval
|
||||||
},
|
},
|
||||||
container := ContainerName,
|
container := ContainerName,
|
||||||
|
@ -531,6 +547,7 @@ install_action(#{parameters := #{mode := aggregated}} = ActionConfig, ConnState)
|
||||||
blob => Blob,
|
blob => Blob,
|
||||||
container => ContainerName,
|
container => ContainerName,
|
||||||
max_block_size => MaxBlockSize,
|
max_block_size => MaxBlockSize,
|
||||||
|
min_block_size => MinBlockSize,
|
||||||
pool => Pool
|
pool => Pool
|
||||||
},
|
},
|
||||||
DeliveryOpts = #{
|
DeliveryOpts = #{
|
||||||
|
@ -707,49 +724,6 @@ check_aggreg_upload_errors(AggregId) ->
|
||||||
check_container_accessible(Pool, Container) ->
|
check_container_accessible(Pool, Container) ->
|
||||||
list_blobs(Pool, Container).
|
list_blobs(Pool, Container).
|
||||||
|
|
||||||
-spec take_chunk(transfer_buffer(), pos_integer()) -> {_IOList :: iolist(), transfer_buffer()}.
|
|
||||||
take_chunk(Buffer, MaxBlockSize) ->
|
|
||||||
RemainingBytes = MaxBlockSize,
|
|
||||||
take_chunk(Buffer, RemainingBytes, _IOListAcc = []).
|
|
||||||
|
|
||||||
-spec take_chunk(transfer_buffer(), non_neg_integer(), IOList) ->
|
|
||||||
{IOList, transfer_buffer()}
|
|
||||||
when
|
|
||||||
IOList :: iolist().
|
|
||||||
take_chunk(RemainingBuffer = [], _RemainingBytes, IOListAcc) ->
|
|
||||||
{IOListAcc, RemainingBuffer};
|
|
||||||
take_chunk(RemainingBuffer, RemainingBytes, IOListAcc) when RemainingBytes =< 0 ->
|
|
||||||
{IOListAcc, [RemainingBuffer]};
|
|
||||||
take_chunk([Data0], RemainingBytes, IOListAcc) ->
|
|
||||||
case do_take_chunk([Data0], RemainingBytes) of
|
|
||||||
{done, Data} ->
|
|
||||||
{[IOListAcc, Data], _RemainingBuffer = []};
|
|
||||||
{more, {Data, Rest}} ->
|
|
||||||
{[IOListAcc, Data], [Rest]}
|
|
||||||
end;
|
|
||||||
take_chunk([Data0, Rest0], RemainingBytes0, IOListAcc) ->
|
|
||||||
case do_take_chunk(Data0, RemainingBytes0) of
|
|
||||||
{done, Data} ->
|
|
||||||
RemainingBytes = RemainingBytes0 - iolist_size(Data),
|
|
||||||
take_chunk([Rest0], RemainingBytes, [IOListAcc, Data]);
|
|
||||||
{more, {Data, Rest}} ->
|
|
||||||
{[IOListAcc, Data], [Rest, Rest0]}
|
|
||||||
end.
|
|
||||||
|
|
||||||
do_take_chunk(Data, ChunkSize) ->
|
|
||||||
case iolist_size(Data) =< ChunkSize of
|
|
||||||
true ->
|
|
||||||
{done, Data};
|
|
||||||
false ->
|
|
||||||
BinData0 = iolist_to_binary(Data),
|
|
||||||
{more, split_binary(BinData0, ChunkSize)}
|
|
||||||
end.
|
|
||||||
|
|
||||||
%% ensure_transfer_buffer_shape([_, _] = Buffer) ->
|
|
||||||
%% Buffer;
|
|
||||||
%% ensure_transfer_buffer_shape(Data) ->
|
|
||||||
%% [Data].
|
|
||||||
|
|
||||||
block_id(N) ->
|
block_id(N) ->
|
||||||
NumDigits = 32,
|
NumDigits = 32,
|
||||||
list_to_binary(string:pad(integer_to_list(N), NumDigits, leading, $0)).
|
list_to_binary(string:pad(integer_to_list(N), NumDigits, leading, $0)).
|
||||||
|
|
|
@ -615,7 +615,9 @@ t_aggreg_pending_upload_restart(Config) ->
|
||||||
#{
|
#{
|
||||||
<<"parameters">> =>
|
<<"parameters">> =>
|
||||||
#{
|
#{
|
||||||
<<"max_block_size">> => <<"1024B">>
|
<<"aggregation">> => #{
|
||||||
|
<<"min_block_size">> => <<"1024B">>
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
|
@ -1,99 +0,0 @@
|
||||||
%%--------------------------------------------------------------------
|
|
||||||
%% Copyright (c) 2024 EMQ Technologies Co., Ltd. All Rights Reserved.
|
|
||||||
%%--------------------------------------------------------------------
|
|
||||||
|
|
||||||
-module(emqx_bridge_azure_blob_storage_tests).
|
|
||||||
|
|
||||||
-include_lib("proper/include/proper.hrl").
|
|
||||||
-include_lib("eunit/include/eunit.hrl").
|
|
||||||
|
|
||||||
%%------------------------------------------------------------------------------
|
|
||||||
%% Helper fns
|
|
||||||
%%------------------------------------------------------------------------------
|
|
||||||
|
|
||||||
take_chunk(Buffer, BlockSize) ->
|
|
||||||
emqx_bridge_azure_blob_storage_connector:take_chunk(Buffer, BlockSize).
|
|
||||||
|
|
||||||
%%------------------------------------------------------------------------------
|
|
||||||
%% Generators
|
|
||||||
%%------------------------------------------------------------------------------
|
|
||||||
|
|
||||||
iodata_gen() ->
|
|
||||||
frequency([
|
|
||||||
{10, []},
|
|
||||||
{10, binary()},
|
|
||||||
{10, list(oneof([binary(), byte()]))},
|
|
||||||
{1, ?SIZED(Size, resize(Size div 3, list(iodata_gen())))}
|
|
||||||
]).
|
|
||||||
|
|
||||||
buffer_gen() ->
|
|
||||||
?LET(
|
|
||||||
DataParts,
|
|
||||||
list(iodata_gen()),
|
|
||||||
lists:foldl(
|
|
||||||
fun(DataPart, Buffer) ->
|
|
||||||
[Buffer, DataPart]
|
|
||||||
end,
|
|
||||||
_InitialBuffer = [],
|
|
||||||
DataParts
|
|
||||||
)
|
|
||||||
).
|
|
||||||
|
|
||||||
%%------------------------------------------------------------------------------
|
|
||||||
%% Properties
|
|
||||||
%%------------------------------------------------------------------------------
|
|
||||||
|
|
||||||
%% Verifies that we can take several chunks from the buffer, and that data is not lost nor
|
|
||||||
%% created.
|
|
||||||
take_chunk_preserves_data_prop() ->
|
|
||||||
?FORALL(
|
|
||||||
{Buffer, BlockSize, Steps},
|
|
||||||
{buffer_gen(), non_neg_integer(), pos_integer()},
|
|
||||||
begin
|
|
||||||
{Chunks, FinalBuffer} =
|
|
||||||
lists:mapfoldl(
|
|
||||||
fun(_, Acc) ->
|
|
||||||
take_chunk(Acc, BlockSize)
|
|
||||||
end,
|
|
||||||
Buffer,
|
|
||||||
lists:seq(1, Steps)
|
|
||||||
),
|
|
||||||
%% Original buffer is preserved
|
|
||||||
BufferBin = iolist_to_binary(Buffer),
|
|
||||||
ConcatenatedBin = iolist_to_binary([Chunks, FinalBuffer]),
|
|
||||||
?WHENFAIL(
|
|
||||||
ct:pal(
|
|
||||||
"block size: ~b\nsteps: ~b\noriginal buffer:\n ~p\nchunks + final buffer:\n ~p",
|
|
||||||
[BlockSize, Steps, Buffer, {Chunks, FinalBuffer}]
|
|
||||||
),
|
|
||||||
BufferBin =:= ConcatenatedBin
|
|
||||||
)
|
|
||||||
end
|
|
||||||
).
|
|
||||||
|
|
||||||
%% Verifies that the produced chunk has at most the requested size.
|
|
||||||
take_chunk_size_prop() ->
|
|
||||||
?FORALL(
|
|
||||||
{Buffer, BlockSize},
|
|
||||||
{buffer_gen(), non_neg_integer()},
|
|
||||||
begin
|
|
||||||
{Chunk, FinalBuffer} = take_chunk(Buffer, BlockSize),
|
|
||||||
?WHENFAIL(
|
|
||||||
ct:pal(
|
|
||||||
"block size: ~b\n\noriginal buffer:\n ~p\nchunk + final buffer:\n ~p"
|
|
||||||
"\nchunk size: ~b",
|
|
||||||
[BlockSize, Buffer, {Chunk, FinalBuffer}, iolist_size(Chunk)]
|
|
||||||
),
|
|
||||||
iolist_size(Chunk) =< BlockSize
|
|
||||||
)
|
|
||||||
end
|
|
||||||
).
|
|
||||||
|
|
||||||
%%------------------------------------------------------------------------------
|
|
||||||
%% Tests
|
|
||||||
%%------------------------------------------------------------------------------
|
|
||||||
|
|
||||||
take_chunk_test_() ->
|
|
||||||
Props = [take_chunk_preserves_data_prop(), take_chunk_size_prop()],
|
|
||||||
Opts = [{numtests, 1000}, {to_file, user}, {max_size, 100}],
|
|
||||||
{timeout, 300, [?_assert(proper:quickcheck(Prop, Opts)) || Prop <- Props]}.
|
|
Loading…
Reference in New Issue