feat(ds): Add a benchmarking tool for storage efficiency analysis
This commit is contained in:
parent
afeb2ab8aa
commit
23dafbb03b
|
@ -0,0 +1,223 @@
|
||||||
|
%%--------------------------------------------------------------------
|
||||||
|
%% Copyright (c) 2024 EMQ Technologies Co., Ltd. All Rights Reserved.
|
||||||
|
%%
|
||||||
|
%% Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
%% you may not use this file except in compliance with the License.
|
||||||
|
%% You may obtain a copy of the License at
|
||||||
|
%%
|
||||||
|
%% http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
%%
|
||||||
|
%% Unless required by applicable law or agreed to in writing, software
|
||||||
|
%% distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
%% See the License for the specific language governing permissions and
|
||||||
|
%% limitations under the License.
|
||||||
|
%%--------------------------------------------------------------------
|
||||||
|
|
||||||
|
%% @doc This script can be loaded to a running EMQX EE node. It will
|
||||||
|
%% create a number of DS databases with different options and fill
|
||||||
|
%% them with data of given size.
|
||||||
|
%%
|
||||||
|
%% Then it will measure size of the database directories and create
|
||||||
|
%% a "storage (in)efficiency" report.
|
||||||
|
-module(storage_efficiency).
|
||||||
|
|
||||||
|
-include_lib("emqx_utils/include/emqx_message.hrl").
|
||||||
|
|
||||||
|
%% API:
|
||||||
|
-export([run/0, run/1]).
|
||||||
|
|
||||||
|
%%================================================================================
|
||||||
|
%% API functions
|
||||||
|
%%================================================================================
|
||||||
|
|
||||||
|
run() ->
|
||||||
|
run(#{}).
|
||||||
|
|
||||||
|
run(Custom) ->
|
||||||
|
RunConf = maps:merge(
|
||||||
|
#{
|
||||||
|
%% Sleep between batches:
|
||||||
|
sleep => 1_000,
|
||||||
|
%% Don't run test, only plot data:
|
||||||
|
dry_run => false,
|
||||||
|
%% Payload size multiplier:
|
||||||
|
size => 10,
|
||||||
|
%% Number of batches:
|
||||||
|
batches => 100,
|
||||||
|
%% Add generation every N batches:
|
||||||
|
add_generation => 10
|
||||||
|
},
|
||||||
|
Custom
|
||||||
|
),
|
||||||
|
lists:foreach(
|
||||||
|
fun(DBConf) ->
|
||||||
|
run(DBConf, RunConf)
|
||||||
|
end,
|
||||||
|
configs()
|
||||||
|
).
|
||||||
|
|
||||||
|
%% erlfmt-ignore
|
||||||
|
gnuplot_script(Filename) ->
|
||||||
|
"set terminal qt\n"
|
||||||
|
%% "set logscale y 10\n"
|
||||||
|
"set title \"" ++ filename:basename(Filename, ".dat") ++ "\"\n"
|
||||||
|
"set key autotitle columnheader\n"
|
||||||
|
"plot for [n=2:*] \"" ++ Filename ++ "\" using 1:n with linespoints".
|
||||||
|
|
||||||
|
%%================================================================================
|
||||||
|
%% Internal functions
|
||||||
|
%%================================================================================
|
||||||
|
|
||||||
|
configs() ->
|
||||||
|
[
|
||||||
|
{'benchmark-skipstream-asn1',
|
||||||
|
db_conf({emqx_ds_storage_skipstream_lts, #{serialization_schema => asn1}})},
|
||||||
|
{'benchmark-skipstream-v1',
|
||||||
|
db_conf({emqx_ds_storage_skipstream_lts, #{serialization_schema => v1}})},
|
||||||
|
{'benchmark-bitfield', db_conf({emqx_ds_storage_bitfield_lts, #{}})}
|
||||||
|
].
|
||||||
|
|
||||||
|
db_conf(Storage) ->
|
||||||
|
#{
|
||||||
|
backend => builtin_local,
|
||||||
|
%% n_sites => 1,
|
||||||
|
n_shards => 1,
|
||||||
|
%% replication_factor => 1,
|
||||||
|
%% replication_options => #{},
|
||||||
|
storage => Storage
|
||||||
|
}.
|
||||||
|
|
||||||
|
-record(s, {
|
||||||
|
data_size = 0,
|
||||||
|
payload_size = 0,
|
||||||
|
n_messages = 0,
|
||||||
|
datapoints = #{},
|
||||||
|
x_axis = []
|
||||||
|
}).
|
||||||
|
|
||||||
|
run({DB, Config}, RunConf) ->
|
||||||
|
#{
|
||||||
|
batches := NBatches,
|
||||||
|
size := PSMultiplier,
|
||||||
|
add_generation := AddGeneration,
|
||||||
|
sleep := Sleep,
|
||||||
|
dry_run := DryRun
|
||||||
|
} = RunConf,
|
||||||
|
{ok, _} = application:ensure_all_started(emqx_ds_backends),
|
||||||
|
Dir = dir(DB),
|
||||||
|
Filename = atom_to_list(DB) ++ ".dat",
|
||||||
|
DryRun orelse
|
||||||
|
begin
|
||||||
|
io:format(user, "Running benchmark for ~p in ~p~n", [DB, Dir]),
|
||||||
|
%% Ensure safe directory:
|
||||||
|
{match, _} = re:run(Dir, filename:join("data", DB)),
|
||||||
|
%% Ensure clean state:
|
||||||
|
ok = emqx_ds:open_db(DB, Config),
|
||||||
|
ok = emqx_ds:drop_db(DB),
|
||||||
|
ok = file:del_dir_r(Dir),
|
||||||
|
%% Open a fresh DB:
|
||||||
|
ok = emqx_ds:open_db(DB, Config),
|
||||||
|
S = lists:foldl(
|
||||||
|
fun(Batch, Acc0) ->
|
||||||
|
Size = PSMultiplier * Batch,
|
||||||
|
io:format(user, "Storing batch with payload size ~p~n", [Size]),
|
||||||
|
Acc1 = store_batch(DB, Size, Acc0),
|
||||||
|
%% Sleep so all data is hopefully flushed:
|
||||||
|
timer:sleep(Sleep),
|
||||||
|
(Batch div AddGeneration) =:= 0 andalso
|
||||||
|
emqx_ds:add_generation(DB),
|
||||||
|
collect_datapoint(DB, Acc1)
|
||||||
|
end,
|
||||||
|
collect_datapoint(DB, #s{}),
|
||||||
|
lists:seq(1, NBatches)
|
||||||
|
),
|
||||||
|
{ok, FD} = file:open(Filename, [write]),
|
||||||
|
io:put_chars(FD, print(S)),
|
||||||
|
file:close(FD)
|
||||||
|
end,
|
||||||
|
os:cmd("echo '" ++ gnuplot_script(Filename) ++ "' | gnuplot --persist -"),
|
||||||
|
ok.
|
||||||
|
|
||||||
|
collect_datapoint(
|
||||||
|
DB, S0 = #s{n_messages = N, data_size = DS, payload_size = PS, datapoints = DP0, x_axis = X}
|
||||||
|
) ->
|
||||||
|
NewData = [{"$_n", N}, {"$data", DS}, {"$payloads", PS} | dirsize(DB)],
|
||||||
|
DP = lists:foldl(
|
||||||
|
fun({Key, Val}, Acc) ->
|
||||||
|
maps:update_with(
|
||||||
|
Key,
|
||||||
|
fun(M) -> M#{N => Val} end,
|
||||||
|
#{},
|
||||||
|
Acc
|
||||||
|
)
|
||||||
|
end,
|
||||||
|
DP0,
|
||||||
|
NewData
|
||||||
|
),
|
||||||
|
S0#s{
|
||||||
|
datapoints = DP,
|
||||||
|
x_axis = [N | X]
|
||||||
|
}.
|
||||||
|
|
||||||
|
print(#s{x_axis = XX, datapoints = DP}) ->
|
||||||
|
Cols = lists:sort(maps:keys(DP)),
|
||||||
|
Lines = [
|
||||||
|
%% Print header:
|
||||||
|
Cols
|
||||||
|
%% Scan through rows:
|
||||||
|
| [
|
||||||
|
%% Scan throgh columns:
|
||||||
|
[integer_to_binary(maps:get(X, maps:get(Col, DP), 0)) || Col <- Cols]
|
||||||
|
|| X <- lists:reverse(XX)
|
||||||
|
]
|
||||||
|
],
|
||||||
|
lists:join(
|
||||||
|
"\n",
|
||||||
|
[lists:join(" ", Line) || Line <- Lines]
|
||||||
|
).
|
||||||
|
|
||||||
|
dirsize(DB) ->
|
||||||
|
RawOutput = os:cmd("cd " ++ dir(DB) ++ "; du -b --max-depth 1 ."),
|
||||||
|
[
|
||||||
|
begin
|
||||||
|
[Sz, Dir] = string:lexemes(L, "\t"),
|
||||||
|
{Dir, list_to_integer(Sz)}
|
||||||
|
end
|
||||||
|
|| L <- string:lexemes(RawOutput, "\n")
|
||||||
|
].
|
||||||
|
|
||||||
|
dir(DB) ->
|
||||||
|
filename:join(emqx_ds_storage_layer:base_dir(), DB).
|
||||||
|
|
||||||
|
store_batch(DB, PayloadSize, S0 = #s{n_messages = N, data_size = DS, payload_size = PS}) ->
|
||||||
|
From = rand:bytes(16),
|
||||||
|
BatchSize = 50,
|
||||||
|
Batch = [
|
||||||
|
#message{
|
||||||
|
id = emqx_guid:gen(),
|
||||||
|
timestamp = emqx_message:timestamp_now(),
|
||||||
|
payload = rand:bytes(PayloadSize),
|
||||||
|
from = From,
|
||||||
|
topic = emqx_topic:join([
|
||||||
|
<<"blah">>,
|
||||||
|
<<"blah">>,
|
||||||
|
'',
|
||||||
|
<<"blah">>,
|
||||||
|
From,
|
||||||
|
<<"bazzzzzzzzzzzzzzzzzzzzzzz">>,
|
||||||
|
integer_to_binary(I)
|
||||||
|
])
|
||||||
|
}
|
||||||
|
|| I <- lists:seq(1, BatchSize)
|
||||||
|
],
|
||||||
|
ok = emqx_ds:store_batch(DB, Batch, #{sync => true}),
|
||||||
|
S0#s{
|
||||||
|
n_messages = N + length(Batch),
|
||||||
|
data_size = DS + lists:sum(lists:map(fun msg_size/1, Batch)),
|
||||||
|
payload_size = PS + length(Batch) * PayloadSize
|
||||||
|
}.
|
||||||
|
|
||||||
|
%% We consider MQTT wire encoding to be "close to the ideal".
|
||||||
|
msg_size(Msg = #message{}) ->
|
||||||
|
iolist_size(emqx_frame:serialize(emqx_message:to_packet(undefined, Msg))).
|
Loading…
Reference in New Issue