perf: add broker_pool_size, generic_pool_size and channel_cleanup_batch_size config options

Tuning these options can improve performance if cluster interconnect network latency is high.

Fixes: EMQX-10661
This commit is contained in:
Serge Tupchii 2023-08-03 17:49:47 +03:00
parent ed28c12a66
commit 466fe7e009
9 changed files with 72 additions and 5 deletions

View File

@ -2,7 +2,7 @@
{application, emqx, [
{id, "emqx"},
{description, "EMQX Core"},
{vsn, "5.1.4"},
{vsn, "5.1.5"},
{modules, []},
{registered, []},
{applications, [

View File

@ -31,7 +31,7 @@ start_link() ->
init([]) ->
%% Broker pool
PoolSize = emqx_vm:schedulers() * 2,
PoolSize = emqx:get_config([node, broker_pool_size], emqx_vm:schedulers() * 2),
BrokerPool = emqx_pool_sup:spec([
broker_pool,
hash,

View File

@ -685,7 +685,8 @@ handle_cast(Msg, State) ->
handle_info({'DOWN', _MRef, process, Pid, _Reason}, State = #{chan_pmon := PMon}) ->
?tp(emqx_cm_process_down, #{stale_pid => Pid, reason => _Reason}),
ChanPids = [Pid | emqx_utils:drain_down(?BATCH_SIZE)],
BatchSize = emqx:get_config([node, channel_cleanup_batch_size], ?BATCH_SIZE),
ChanPids = [Pid | emqx_utils:drain_down(BatchSize)],
{Items, PMon1} = emqx_pmon:erase_all(ChanPids, PMon),
lists:foreach(fun mark_channel_disconnected/1, ChanPids),
ok = emqx_pool:async_submit(fun lists:foreach/2, [fun ?MODULE:clean_down/1, Items]),

View File

@ -31,7 +31,9 @@ init([]) ->
%% always start emqx_config_handler first to load the emqx.conf to emqx_config
[
child_spec(emqx_config_handler, worker),
child_spec(emqx_pool_sup, supervisor),
child_spec(emqx_pool_sup, supervisor, [
emqx:get_config([node, generic_pool_size], emqx_vm:schedulers())
]),
child_spec(emqx_hooks, worker),
child_spec(emqx_stats, worker),
child_spec(emqx_metrics, worker),

View File

@ -24,6 +24,7 @@
-export([
start_link/0,
start_link/1,
start_link/3,
start_link/4
]).
@ -51,6 +52,9 @@ spec(ChildId, Args) ->
start_link() ->
start_link(?POOL, random, {?POOL, start_link, []}).
start_link(PoolSize) ->
start_link(?POOL, random, PoolSize, {?POOL, start_link, []}).
-spec start_link(atom() | tuple(), atom(), mfargs()) ->
{ok, pid()} | {error, term()}.
start_link(Pool, Type, MFA) ->

View File

@ -1,6 +1,6 @@
{application, emqx_conf, [
{description, "EMQX configuration management"},
{vsn, "0.1.24"},
{vsn, "0.1.25"},
{registered, []},
{mod, {emqx_conf_app, []}},
{applications, [kernel, stdlib, emqx_ctl]},

View File

@ -672,6 +672,35 @@ fields("node") ->
mapping => "emqx_machine.custom_shard_transports",
default => #{}
}
)},
{"broker_pool_size",
sc(
pos_integer(),
#{
importance => ?IMPORTANCE_HIDDEN,
default => emqx_vm:schedulers() * 2,
'readOnly' => true,
desc => ?DESC(node_broker_pool_size)
}
)},
{"generic_pool_size",
sc(
pos_integer(),
#{
importance => ?IMPORTANCE_HIDDEN,
default => emqx_vm:schedulers(),
'readOnly' => true,
desc => ?DESC(node_generic_pool_size)
}
)},
{"channel_cleanup_batch_size",
sc(
pos_integer(),
#{
importance => ?IMPORTANCE_HIDDEN,
default => 100_000,
desc => ?DESC(node_channel_cleanup_batch_size)
}
)}
];
fields("cluster_call") ->

View File

@ -0,0 +1,3 @@
Add `node.broker_pool_size`, `node.generic_pool_size`, `node.channel_cleanup_batch_size` options to EMQX configuration.
Tuning these options can significantly improve performance if cluster interconnect network latency is high.

View File

@ -776,4 +776,32 @@ the default is to use the value set in <code>db.default_shard_transport</code>."
db_shard_transports.label:
"""Shard Transports"""
node_broker_pool_size.desc:
"""The number of workers in emqx_broker pool. Increasing this value may improve performance
by enhancing parallelism, especially when EMQX cluster interconnect network latency is high.
Defaults to the number of Erlang schedulers (CPU cores) * 2.
"""
node_broker_pool_size.label:
"""Node Broker Pool Size"""
node_generic_pool_size.desc:
"""The number of workers in emqx_pool. Increasing this value may improve performance
by enhancing parallelism, especially when EMQX cluster interconnect network latency is high.
Defaults to the number of Erlang schedulers (CPU cores).
"""
node_generic_pool_size.label:
"""Node Generic Pool Size"""
node_channel_cleanup_batch_size.desc:
"""The size of the channel cleanup batch. if EMQX cluster interconnect network latency is high,
reducing this value together with increasing node.generic_pool_size may improve performance
during an abrupt disconnect of a large numbers of clients.
Defaults to 100000.
"""
node_channel_cleanup_batch_size.label:
"""Node Channel Cleanup Batch Size"""
}