Merge branch 'release-50' into file-transfer
* release-50: chore(rebalance): fix app metadata chore(rebalance): move apps from lib-ee, add READMEs docs: refine zh tr docs: delete APL header from ee file docs: delete zh changelog chore(rebalance): review fixes chore(rebalance): rebase and review fixes feat(rebalance): port apps from 4.x
This commit is contained in:
commit
8d9b785bd7
1
Makefile
1
Makefile
|
@ -179,6 +179,7 @@ clean-all:
|
|||
@rm -f rebar.lock
|
||||
@rm -rf deps
|
||||
@rm -rf _build
|
||||
@rm -f emqx_dialyzer_*_plt
|
||||
|
||||
.PHONY: deps-all
|
||||
deps-all: $(REBAR) $(PROFILES:%=deps-%)
|
||||
|
|
|
@ -14,26 +14,19 @@
|
|||
%% limitations under the License.
|
||||
%%--------------------------------------------------------------------
|
||||
|
||||
%% This file contains common macros for testing.
|
||||
%% It must not be used anywhere except in test suites.
|
||||
|
||||
-include_lib("snabbkaffe/include/snabbkaffe.hrl").
|
||||
|
||||
-define(assertWaitEvent(Code, EventMatch, Timeout),
|
||||
?check_trace(
|
||||
?assertMatch(
|
||||
{_, {ok, EventMatch}},
|
||||
?wait_async_action(
|
||||
Code,
|
||||
EventMatch,
|
||||
Timeout
|
||||
),
|
||||
fun(Trace) ->
|
||||
?assert(
|
||||
lists:any(
|
||||
fun
|
||||
(EventMatch) -> true;
|
||||
(_) -> false
|
||||
end,
|
||||
Trace
|
||||
)
|
||||
)
|
||||
end
|
||||
)
|
||||
)
|
||||
).
|
||||
|
||||
|
|
|
@ -0,0 +1,42 @@
|
|||
%%--------------------------------------------------------------------
|
||||
%% Copyright (c) 2017-2023 EMQ Technologies Co., Ltd. All Rights Reserved.
|
||||
%%
|
||||
%% Licensed under the Apache License, Version 2.0 (the "License");
|
||||
%% you may not use this file except in compliance with the License.
|
||||
%% You may obtain a copy of the License at
|
||||
%%
|
||||
%% http://www.apache.org/licenses/LICENSE-2.0
|
||||
%%
|
||||
%% Unless required by applicable law or agreed to in writing, software
|
||||
%% distributed under the License is distributed on an "AS IS" BASIS,
|
||||
%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
%% See the License for the specific language governing permissions and
|
||||
%% limitations under the License.
|
||||
%%--------------------------------------------------------------------
|
||||
|
||||
-define(CHANNEL_METRICS, [
|
||||
recv_pkt,
|
||||
recv_msg,
|
||||
'recv_msg.qos0',
|
||||
'recv_msg.qos1',
|
||||
'recv_msg.qos2',
|
||||
'recv_msg.dropped',
|
||||
'recv_msg.dropped.await_pubrel_timeout',
|
||||
send_pkt,
|
||||
send_msg,
|
||||
'send_msg.qos0',
|
||||
'send_msg.qos1',
|
||||
'send_msg.qos2',
|
||||
'send_msg.dropped',
|
||||
'send_msg.dropped.expired',
|
||||
'send_msg.dropped.queue_full',
|
||||
'send_msg.dropped.too_large'
|
||||
]).
|
||||
|
||||
-define(INFO_KEYS, [
|
||||
conninfo,
|
||||
conn_state,
|
||||
clientinfo,
|
||||
session,
|
||||
will_msg
|
||||
]).
|
|
@ -34,6 +34,7 @@
|
|||
-define(HP_BRIDGE, 870).
|
||||
-define(HP_DELAY_PUB, 860).
|
||||
%% apps that can stop the hooks chain from continuing
|
||||
-define(HP_NODE_REBALANCE, 110).
|
||||
-define(HP_EXHOOK, 100).
|
||||
|
||||
%% == Lowest Priority = 0, don't change this value as the plugins may depend on it.
|
||||
|
|
|
@ -14,6 +14,7 @@
|
|||
{emqx_conf,2}.
|
||||
{emqx_dashboard,1}.
|
||||
{emqx_delayed,1}.
|
||||
{emqx_eviction_agent,1}.
|
||||
{emqx_exhook,1}.
|
||||
{emqx_ft_storage_exporter_fs,1}.
|
||||
{emqx_ft_storage_fs,1}.
|
||||
|
@ -30,6 +31,10 @@
|
|||
{emqx_mgmt_cluster,1}.
|
||||
{emqx_mgmt_trace,1}.
|
||||
{emqx_mgmt_trace,2}.
|
||||
{emqx_node_rebalance,1}.
|
||||
{emqx_node_rebalance_api,1}.
|
||||
{emqx_node_rebalance_evacuation,1}.
|
||||
{emqx_node_rebalance_status,1}.
|
||||
{emqx_persistent_session,1}.
|
||||
{emqx_plugin_libs,1}.
|
||||
{emqx_plugins,1}.
|
||||
|
|
|
@ -18,6 +18,7 @@
|
|||
-module(emqx_channel).
|
||||
|
||||
-include("emqx.hrl").
|
||||
-include("emqx_channel.hrl").
|
||||
-include("emqx_mqtt.hrl").
|
||||
-include("logger.hrl").
|
||||
-include("types.hrl").
|
||||
|
@ -57,6 +58,12 @@
|
|||
clear_keepalive/1
|
||||
]).
|
||||
|
||||
%% Export for emqx_channel implementations
|
||||
-export([
|
||||
maybe_nack/1,
|
||||
maybe_mark_as_delivered/2
|
||||
]).
|
||||
|
||||
%% Exports for CT
|
||||
-export([set_field/3]).
|
||||
|
||||
|
@ -69,7 +76,7 @@
|
|||
]
|
||||
).
|
||||
|
||||
-export_type([channel/0, opts/0]).
|
||||
-export_type([channel/0, opts/0, conn_state/0]).
|
||||
|
||||
-record(channel, {
|
||||
%% MQTT ConnInfo
|
||||
|
@ -131,33 +138,6 @@
|
|||
quota_timer => expire_quota_limit
|
||||
}).
|
||||
|
||||
-define(CHANNEL_METRICS, [
|
||||
recv_pkt,
|
||||
recv_msg,
|
||||
'recv_msg.qos0',
|
||||
'recv_msg.qos1',
|
||||
'recv_msg.qos2',
|
||||
'recv_msg.dropped',
|
||||
'recv_msg.dropped.await_pubrel_timeout',
|
||||
send_pkt,
|
||||
send_msg,
|
||||
'send_msg.qos0',
|
||||
'send_msg.qos1',
|
||||
'send_msg.qos2',
|
||||
'send_msg.dropped',
|
||||
'send_msg.dropped.expired',
|
||||
'send_msg.dropped.queue_full',
|
||||
'send_msg.dropped.too_large'
|
||||
]).
|
||||
|
||||
-define(INFO_KEYS, [
|
||||
conninfo,
|
||||
conn_state,
|
||||
clientinfo,
|
||||
session,
|
||||
will_msg
|
||||
]).
|
||||
|
||||
-define(LIMITER_ROUTING, message_routing).
|
||||
|
||||
-dialyzer({no_match, [shutdown/4, ensure_timer/2, interval/2]}).
|
||||
|
@ -1091,10 +1071,12 @@ handle_out(unsuback, {PacketId, _ReasonCodes}, Channel) ->
|
|||
handle_out(disconnect, ReasonCode, Channel) when is_integer(ReasonCode) ->
|
||||
ReasonName = disconnect_reason(ReasonCode),
|
||||
handle_out(disconnect, {ReasonCode, ReasonName}, Channel);
|
||||
handle_out(disconnect, {ReasonCode, ReasonName}, Channel = ?IS_MQTT_V5) ->
|
||||
Packet = ?DISCONNECT_PACKET(ReasonCode),
|
||||
handle_out(disconnect, {ReasonCode, ReasonName}, Channel) ->
|
||||
handle_out(disconnect, {ReasonCode, ReasonName, #{}}, Channel);
|
||||
handle_out(disconnect, {ReasonCode, ReasonName, Props}, Channel = ?IS_MQTT_V5) ->
|
||||
Packet = ?DISCONNECT_PACKET(ReasonCode, Props),
|
||||
{ok, [{outgoing, Packet}, {close, ReasonName}], Channel};
|
||||
handle_out(disconnect, {_ReasonCode, ReasonName}, Channel) ->
|
||||
handle_out(disconnect, {_ReasonCode, ReasonName, _Props}, Channel) ->
|
||||
{ok, {close, ReasonName}, Channel};
|
||||
handle_out(auth, {ReasonCode, Properties}, Channel) ->
|
||||
{ok, ?AUTH_PACKET(ReasonCode, Properties), Channel};
|
||||
|
@ -1211,13 +1193,19 @@ handle_call(
|
|||
{takeover, 'end'},
|
||||
Channel = #channel{
|
||||
session = Session,
|
||||
pendings = Pendings
|
||||
pendings = Pendings,
|
||||
conninfo = #{clientid := ClientId}
|
||||
}
|
||||
) ->
|
||||
ok = emqx_session:takeover(Session),
|
||||
%% TODO: Should not drain deliver here (side effect)
|
||||
Delivers = emqx_utils:drain_deliver(),
|
||||
AllPendings = lists:append(Delivers, Pendings),
|
||||
?tp(
|
||||
debug,
|
||||
emqx_channel_takeover_end,
|
||||
#{clientid => ClientId}
|
||||
),
|
||||
disconnect_and_shutdown(takenover, AllPendings, Channel);
|
||||
handle_call(list_authz_cache, Channel) ->
|
||||
{reply, emqx_authz_cache:list_authz_cache(), Channel};
|
||||
|
@ -1289,6 +1277,8 @@ handle_info(die_if_test = Info, Channel) ->
|
|||
die_if_test_compiled(),
|
||||
?SLOG(error, #{msg => "unexpected_info", info => Info}),
|
||||
{ok, Channel};
|
||||
handle_info({disconnect, ReasonCode, ReasonName, Props}, Channel) ->
|
||||
handle_out(disconnect, {ReasonCode, ReasonName, Props}, Channel);
|
||||
handle_info({puback, PacketId, PubRes, RC}, Channel) ->
|
||||
do_finish_publish(PacketId, PubRes, RC, Channel);
|
||||
handle_info(Info, Channel) ->
|
||||
|
|
|
@ -23,6 +23,8 @@
|
|||
-include("logger.hrl").
|
||||
-include("types.hrl").
|
||||
-include_lib("snabbkaffe/include/snabbkaffe.hrl").
|
||||
-include_lib("stdlib/include/qlc.hrl").
|
||||
-include_lib("stdlib/include/ms_transform.hrl").
|
||||
|
||||
-export([start_link/0]).
|
||||
|
||||
|
@ -72,6 +74,12 @@
|
|||
get_session_confs/2
|
||||
]).
|
||||
|
||||
%% Client management
|
||||
-export([
|
||||
channel_with_session_table/1,
|
||||
live_connection_table/1
|
||||
]).
|
||||
|
||||
%% gen_server callbacks
|
||||
-export([
|
||||
init/1,
|
||||
|
@ -597,6 +605,40 @@ all_channels() ->
|
|||
Pat = [{{'_', '$1'}, [], ['$1']}],
|
||||
ets:select(?CHAN_TAB, Pat).
|
||||
|
||||
%% @doc Get clientinfo for all clients with sessions
|
||||
channel_with_session_table(ConnModuleList) ->
|
||||
Ms = ets:fun2ms(
|
||||
fun({{ClientId, _ChanPid}, Info, _Stats}) ->
|
||||
{ClientId, Info}
|
||||
end
|
||||
),
|
||||
Table = ets:table(?CHAN_INFO_TAB, [{traverse, {select, Ms}}]),
|
||||
ConnModules = sets:from_list(ConnModuleList, [{version, 2}]),
|
||||
qlc:q([
|
||||
{ClientId, ConnState, ConnInfo, ClientInfo}
|
||||
|| {ClientId, #{
|
||||
conn_state := ConnState,
|
||||
clientinfo := ClientInfo,
|
||||
conninfo := #{clean_start := false, conn_mod := ConnModule} = ConnInfo
|
||||
}} <-
|
||||
Table,
|
||||
sets:is_element(ConnModule, ConnModules)
|
||||
]).
|
||||
|
||||
%% @doc Get all local connection query handle
|
||||
live_connection_table(ConnModules) ->
|
||||
Ms = lists:map(fun live_connection_ms/1, ConnModules),
|
||||
Table = ets:table(?CHAN_CONN_TAB, [{traverse, {select, Ms}}]),
|
||||
qlc:q([{ClientId, ChanPid} || {ClientId, ChanPid} <- Table, is_channel_connected(ChanPid)]).
|
||||
|
||||
live_connection_ms(ConnModule) ->
|
||||
{{{'$1', '$2'}, ConnModule}, [], [{{'$1', '$2'}}]}.
|
||||
|
||||
is_channel_connected(ChanPid) when node(ChanPid) =:= node() ->
|
||||
ets:member(?CHAN_LIVE_TAB, ChanPid);
|
||||
is_channel_connected(_ChanPid) ->
|
||||
false.
|
||||
|
||||
%% @doc Get all registered clientIDs. Debug/test interface
|
||||
all_client_ids() ->
|
||||
Pat = [{{'$1', '_'}, [], ['$1']}],
|
||||
|
@ -697,7 +739,8 @@ code_change(_OldVsn, State, _Extra) ->
|
|||
%%--------------------------------------------------------------------
|
||||
|
||||
clean_down({ChanPid, ClientId}) ->
|
||||
do_unregister_channel({ClientId, ChanPid}).
|
||||
do_unregister_channel({ClientId, ChanPid}),
|
||||
ok = ?tp(debug, emqx_cm_clean_down, #{client_id => ClientId}).
|
||||
|
||||
stats_fun() ->
|
||||
lists:foreach(fun update_stats/1, ?CHAN_STATS).
|
||||
|
@ -723,12 +766,12 @@ get_chann_conn_mod(ClientId, ChanPid) ->
|
|||
wrap_rpc(emqx_cm_proto_v2:get_chann_conn_mod(ClientId, ChanPid)).
|
||||
|
||||
mark_channel_connected(ChanPid) ->
|
||||
?tp(emqx_cm_connected_client_count_inc, #{}),
|
||||
?tp(emqx_cm_connected_client_count_inc, #{chan_pid => ChanPid}),
|
||||
ets:insert_new(?CHAN_LIVE_TAB, {ChanPid, true}),
|
||||
ok.
|
||||
|
||||
mark_channel_disconnected(ChanPid) ->
|
||||
?tp(emqx_cm_connected_client_count_dec, #{}),
|
||||
?tp(emqx_cm_connected_client_count_dec, #{chan_pid => ChanPid}),
|
||||
ets:delete(?CHAN_LIVE_TAB, ChanPid),
|
||||
ok.
|
||||
|
||||
|
|
|
@ -167,9 +167,15 @@ handle_info(Info, State) ->
|
|||
{noreply, State}.
|
||||
|
||||
terminate(_Reason, _State) ->
|
||||
ok = ekka:unmonitor(membership),
|
||||
emqx_stats:cancel_update(route_stats),
|
||||
mnesia:unsubscribe({table, ?ROUTING_NODE, simple}).
|
||||
try
|
||||
ok = ekka:unmonitor(membership),
|
||||
emqx_stats:cancel_update(route_stats),
|
||||
mnesia:unsubscribe({table, ?ROUTING_NODE, simple})
|
||||
catch
|
||||
exit:{noproc, {gen_server, call, [mria_membership, _]}} ->
|
||||
?SLOG(warning, #{msg => "mria_membership_down"}),
|
||||
ok
|
||||
end.
|
||||
|
||||
code_change(_OldVsn, State, _Extra) ->
|
||||
{ok, State}.
|
||||
|
|
|
@ -20,6 +20,7 @@
|
|||
set_default_config/0,
|
||||
set_default_config/1,
|
||||
set_default_config/2,
|
||||
set_default_config/3,
|
||||
request/2,
|
||||
request/3,
|
||||
request/4,
|
||||
|
@ -41,11 +42,14 @@ set_default_config(DefaultUsername) ->
|
|||
set_default_config(DefaultUsername, false).
|
||||
|
||||
set_default_config(DefaultUsername, HAProxyEnabled) ->
|
||||
set_default_config(DefaultUsername, HAProxyEnabled, #{}).
|
||||
|
||||
set_default_config(DefaultUsername, HAProxyEnabled, Opts) ->
|
||||
Config = #{
|
||||
listeners => #{
|
||||
http => #{
|
||||
enable => true,
|
||||
bind => 18083,
|
||||
bind => maps:get(bind, Opts, 18083),
|
||||
inet6 => false,
|
||||
ipv6_v6only => false,
|
||||
max_connections => 512,
|
||||
|
|
|
@ -0,0 +1,94 @@
|
|||
Business Source License 1.1
|
||||
|
||||
Licensor: Hangzhou EMQ Technologies Co., Ltd.
|
||||
Licensed Work: EMQX Enterprise Edition
|
||||
The Licensed Work is (c) 2023
|
||||
Hangzhou EMQ Technologies Co., Ltd.
|
||||
Additional Use Grant: Students and educators are granted right to copy,
|
||||
modify, and create derivative work for research
|
||||
or education.
|
||||
Change Date: 2027-02-01
|
||||
Change License: Apache License, Version 2.0
|
||||
|
||||
For information about alternative licensing arrangements for the Software,
|
||||
please contact Licensor: https://www.emqx.com/en/contact
|
||||
|
||||
Notice
|
||||
|
||||
The Business Source License (this document, or the “License”) is not an Open
|
||||
Source license. However, the Licensed Work will eventually be made available
|
||||
under an Open Source License, as stated in this License.
|
||||
|
||||
License text copyright (c) 2017 MariaDB Corporation Ab, All Rights Reserved.
|
||||
“Business Source License” is a trademark of MariaDB Corporation Ab.
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
|
||||
Business Source License 1.1
|
||||
|
||||
Terms
|
||||
|
||||
The Licensor hereby grants you the right to copy, modify, create derivative
|
||||
works, redistribute, and make non-production use of the Licensed Work. The
|
||||
Licensor may make an Additional Use Grant, above, permitting limited
|
||||
production use.
|
||||
|
||||
Effective on the Change Date, or the fourth anniversary of the first publicly
|
||||
available distribution of a specific version of the Licensed Work under this
|
||||
License, whichever comes first, the Licensor hereby grants you rights under
|
||||
the terms of the Change License, and the rights granted in the paragraph
|
||||
above terminate.
|
||||
|
||||
If your use of the Licensed Work does not comply with the requirements
|
||||
currently in effect as described in this License, you must purchase a
|
||||
commercial license from the Licensor, its affiliated entities, or authorized
|
||||
resellers, or you must refrain from using the Licensed Work.
|
||||
|
||||
All copies of the original and modified Licensed Work, and derivative works
|
||||
of the Licensed Work, are subject to this License. This License applies
|
||||
separately for each version of the Licensed Work and the Change Date may vary
|
||||
for each version of the Licensed Work released by Licensor.
|
||||
|
||||
You must conspicuously display this License on each original or modified copy
|
||||
of the Licensed Work. If you receive the Licensed Work in original or
|
||||
modified form from a third party, the terms and conditions set forth in this
|
||||
License apply to your use of that work.
|
||||
|
||||
Any use of the Licensed Work in violation of this License will automatically
|
||||
terminate your rights under this License for the current and all other
|
||||
versions of the Licensed Work.
|
||||
|
||||
This License does not grant you any right in any trademark or logo of
|
||||
Licensor or its affiliates (provided that you may use a trademark or logo of
|
||||
Licensor as expressly required by this License).
|
||||
|
||||
TO THE EXTENT PERMITTED BY APPLICABLE LAW, THE LICENSED WORK IS PROVIDED ON
|
||||
AN “AS IS” BASIS. LICENSOR HEREBY DISCLAIMS ALL WARRANTIES AND CONDITIONS,
|
||||
EXPRESS OR IMPLIED, INCLUDING (WITHOUT LIMITATION) WARRANTIES OF
|
||||
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NON-INFRINGEMENT, AND
|
||||
TITLE.
|
||||
|
||||
MariaDB hereby grants you permission to use this License’s text to license
|
||||
your works, and to refer to it using the trademark “Business Source License”,
|
||||
as long as you comply with the Covenants of Licensor below.
|
||||
|
||||
Covenants of Licensor
|
||||
|
||||
In consideration of the right to use this License’s text and the “Business
|
||||
Source License” name and trademark, Licensor covenants to MariaDB, and to all
|
||||
other recipients of the licensed work to be provided by Licensor:
|
||||
|
||||
1. To specify as the Change License the GPL Version 2.0 or any later version,
|
||||
or a license that is compatible with GPL Version 2.0 or a later version,
|
||||
where “compatible” means that software provided under the Change License can
|
||||
be included in a program with software provided under GPL Version 2.0 or a
|
||||
later version. Licensor may specify additional Change Licenses without
|
||||
limitation.
|
||||
|
||||
2. To either: (a) specify an additional grant of rights to use that does not
|
||||
impose any additional restriction on the right granted in this License, as
|
||||
the Additional Use Grant; or (b) insert the text “None”.
|
||||
|
||||
3. To specify a Change Date.
|
||||
|
||||
4. Not to modify this License in any other way.
|
|
@ -0,0 +1,35 @@
|
|||
# EMQX Eviction Agent
|
||||
|
||||
`emqx_eviction_agent` is a part of the node evacuation/node rebalance feature in EMQX.
|
||||
It is a low-level application that encapsulates working with actual MQTT connections.
|
||||
|
||||
## Application Responsibilities
|
||||
|
||||
`emqx_eviction_agent` application:
|
||||
|
||||
* Blocks incoming connection to the node it is running on.
|
||||
* Serves as a facade for connection/session eviction operations.
|
||||
* Reports blocking status via HTTP API.
|
||||
|
||||
The `emqx_eviction_agent` is relatively passive and has no eviction/rebalancing logic. It allows
|
||||
`emqx_node_rebalance` to perform eviction/rebalancing operations using high-level API, without having to deal with
|
||||
MQTT connections directly.
|
||||
|
||||
## EMQX Integration
|
||||
|
||||
`emqx_eviction_agent` interacts with the following EMQX components:
|
||||
* `emqx_cm` - to get the list of active MQTT connections;
|
||||
* `emqx_hooks` subsystem - to block/unblock incoming connections;
|
||||
* `emqx_channel` and the corresponding connection modules to perform the eviction.
|
||||
|
||||
## User Facing API
|
||||
|
||||
The application provided a very simple API (CLI and HTTP) to inspect the current blocking status.
|
||||
|
||||
# Documentation
|
||||
|
||||
The rebalancing concept is described in the corresponding [EIP](https://github.com/emqx/eip/blob/main/active/0020-node-rebalance.md).
|
||||
|
||||
# Contributing
|
||||
|
||||
Please see our [contributing.md](../../CONTRIBUTING.md).
|
|
@ -0,0 +1,3 @@
|
|||
##--------------------------------------------------------------------
|
||||
## EMQX Eviction Agent Plugin
|
||||
##--------------------------------------------------------------------
|
|
@ -0,0 +1,2 @@
|
|||
{deps, [{emqx, {path, "../../apps/emqx"}}]}.
|
||||
{project_plugins, [erlfmt]}.
|
|
@ -0,0 +1,21 @@
|
|||
{application, emqx_eviction_agent, [
|
||||
{description, "EMQX Eviction Agent"},
|
||||
{vsn, "5.0.0"},
|
||||
{registered, [
|
||||
emqx_eviction_agent_sup,
|
||||
emqx_eviction_agent,
|
||||
emqx_eviction_agent_conn_sup
|
||||
]},
|
||||
{applications, [
|
||||
kernel,
|
||||
stdlib,
|
||||
emqx_ctl
|
||||
]},
|
||||
{mod, {emqx_eviction_agent_app, []}},
|
||||
{env, []},
|
||||
{modules, []},
|
||||
{links, [
|
||||
{"Homepage", "https://www.emqx.com/"},
|
||||
{"Github", "https://github.com/emqx"}
|
||||
]}
|
||||
]}.
|
|
@ -0,0 +1,3 @@
|
|||
%% -*- mode: erlang -*-
|
||||
%% Unless you know what you are doing, DO NOT edit manually!!
|
||||
{VSN, [{<<".*">>, []}], [{<<".*">>, []}]}.
|
|
@ -0,0 +1,348 @@
|
|||
%%--------------------------------------------------------------------
|
||||
%% Copyright (c) 2022-2023 EMQ Technologies Co., Ltd. All Rights Reserved.
|
||||
%%--------------------------------------------------------------------
|
||||
|
||||
-module(emqx_eviction_agent).
|
||||
|
||||
-include_lib("emqx/include/emqx_mqtt.hrl").
|
||||
-include_lib("emqx/include/logger.hrl").
|
||||
-include_lib("emqx/include/types.hrl").
|
||||
-include_lib("emqx/include/emqx_hooks.hrl").
|
||||
|
||||
-include_lib("stdlib/include/qlc.hrl").
|
||||
-include_lib("snabbkaffe/include/snabbkaffe.hrl").
|
||||
|
||||
-export([
|
||||
start_link/0,
|
||||
enable/2,
|
||||
disable/1,
|
||||
status/0,
|
||||
connection_count/0,
|
||||
session_count/0,
|
||||
session_count/1,
|
||||
evict_connections/1,
|
||||
evict_sessions/2,
|
||||
evict_sessions/3,
|
||||
evict_session_channel/3
|
||||
]).
|
||||
|
||||
-behaviour(gen_server).
|
||||
|
||||
-export([
|
||||
init/1,
|
||||
handle_call/3,
|
||||
handle_info/2,
|
||||
handle_cast/2,
|
||||
code_change/3
|
||||
]).
|
||||
|
||||
-export([
|
||||
on_connect/2,
|
||||
on_connack/3
|
||||
]).
|
||||
|
||||
-export([
|
||||
hook/0,
|
||||
unhook/0
|
||||
]).
|
||||
|
||||
-export_type([server_reference/0]).
|
||||
|
||||
-define(CONN_MODULES, [
|
||||
emqx_connection, emqx_ws_connection, emqx_quic_connection, emqx_eviction_agent_channel
|
||||
]).
|
||||
|
||||
%%--------------------------------------------------------------------
|
||||
%% APIs
|
||||
%%--------------------------------------------------------------------
|
||||
|
||||
-type server_reference() :: binary() | undefined.
|
||||
-type status() :: {enabled, conn_stats()} | disabled.
|
||||
-type conn_stats() :: #{
|
||||
connections := non_neg_integer(),
|
||||
sessions := non_neg_integer()
|
||||
}.
|
||||
-type kind() :: atom().
|
||||
|
||||
-spec start_link() -> startlink_ret().
|
||||
start_link() ->
|
||||
gen_server:start_link({local, ?MODULE}, ?MODULE, [], []).
|
||||
|
||||
-spec enable(kind(), server_reference()) -> ok_or_error(eviction_agent_busy).
|
||||
enable(Kind, ServerReference) ->
|
||||
gen_server:call(?MODULE, {enable, Kind, ServerReference}).
|
||||
|
||||
-spec disable(kind()) -> ok.
|
||||
disable(Kind) ->
|
||||
gen_server:call(?MODULE, {disable, Kind}).
|
||||
|
||||
-spec status() -> status().
|
||||
status() ->
|
||||
case enable_status() of
|
||||
{enabled, _Kind, _ServerReference} ->
|
||||
{enabled, stats()};
|
||||
disabled ->
|
||||
disabled
|
||||
end.
|
||||
|
||||
-spec evict_connections(pos_integer()) -> ok_or_error(disabled).
|
||||
evict_connections(N) ->
|
||||
case enable_status() of
|
||||
{enabled, _Kind, ServerReference} ->
|
||||
ok = do_evict_connections(N, ServerReference);
|
||||
disabled ->
|
||||
{error, disabled}
|
||||
end.
|
||||
|
||||
-spec evict_sessions(pos_integer(), node() | [node()]) -> ok_or_error(disabled).
|
||||
evict_sessions(N, Node) when is_atom(Node) ->
|
||||
evict_sessions(N, [Node]);
|
||||
evict_sessions(N, Nodes) when is_list(Nodes) andalso length(Nodes) > 0 ->
|
||||
evict_sessions(N, Nodes, any).
|
||||
|
||||
-spec evict_sessions(pos_integer(), node() | [node()], atom()) -> ok_or_error(disabled).
|
||||
evict_sessions(N, Node, ConnState) when is_atom(Node) ->
|
||||
evict_sessions(N, [Node], ConnState);
|
||||
evict_sessions(N, Nodes, ConnState) when
|
||||
is_list(Nodes) andalso length(Nodes) > 0
|
||||
->
|
||||
case enable_status() of
|
||||
{enabled, _Kind, _ServerReference} ->
|
||||
ok = do_evict_sessions(N, Nodes, ConnState);
|
||||
disabled ->
|
||||
{error, disabled}
|
||||
end.
|
||||
|
||||
%%--------------------------------------------------------------------
|
||||
%% gen_server callbacks
|
||||
%%--------------------------------------------------------------------
|
||||
|
||||
init([]) ->
|
||||
_ = persistent_term:erase(?MODULE),
|
||||
{ok, #{}}.
|
||||
|
||||
%% enable
|
||||
handle_call({enable, Kind, ServerReference}, _From, St) ->
|
||||
Reply =
|
||||
case enable_status() of
|
||||
disabled ->
|
||||
ok = persistent_term:put(?MODULE, {enabled, Kind, ServerReference});
|
||||
{enabled, Kind, _ServerReference} ->
|
||||
ok = persistent_term:put(?MODULE, {enabled, Kind, ServerReference});
|
||||
{enabled, _OtherKind, _ServerReference} ->
|
||||
{error, eviction_agent_busy}
|
||||
end,
|
||||
{reply, Reply, St};
|
||||
%% disable
|
||||
handle_call({disable, Kind}, _From, St) ->
|
||||
Reply =
|
||||
case enable_status() of
|
||||
disabled ->
|
||||
{error, disabled};
|
||||
{enabled, Kind, _ServerReference} ->
|
||||
_ = persistent_term:erase(?MODULE),
|
||||
ok;
|
||||
{enabled, _OtherKind, _ServerReference} ->
|
||||
{error, eviction_agent_busy}
|
||||
end,
|
||||
{reply, Reply, St};
|
||||
handle_call(Msg, _From, St) ->
|
||||
?SLOG(warning, #{msg => "unknown_call", call => Msg, state => St}),
|
||||
{reply, {error, unknown_call}, St}.
|
||||
|
||||
handle_info(Msg, St) ->
|
||||
?SLOG(warning, #{msg => "unknown_msg", info => Msg, state => St}),
|
||||
{noreply, St}.
|
||||
|
||||
handle_cast(Msg, St) ->
|
||||
?SLOG(warning, #{msg => "unknown_cast", cast => Msg, state => St}),
|
||||
{noreply, St}.
|
||||
|
||||
code_change(_Vsn, State, _Extra) ->
|
||||
{ok, State}.
|
||||
|
||||
%%--------------------------------------------------------------------
|
||||
%% Hook callbacks
|
||||
%%--------------------------------------------------------------------
|
||||
|
||||
on_connect(_ConnInfo, _Props) ->
|
||||
case enable_status() of
|
||||
{enabled, _Kind, _ServerReference} ->
|
||||
{stop, {error, ?RC_USE_ANOTHER_SERVER}};
|
||||
disabled ->
|
||||
ignore
|
||||
end.
|
||||
|
||||
on_connack(
|
||||
#{proto_name := <<"MQTT">>, proto_ver := ?MQTT_PROTO_V5},
|
||||
use_another_server,
|
||||
Props
|
||||
) ->
|
||||
case enable_status() of
|
||||
{enabled, _Kind, ServerReference} ->
|
||||
{ok, Props#{'Server-Reference' => ServerReference}};
|
||||
disabled ->
|
||||
{ok, Props}
|
||||
end;
|
||||
on_connack(_ClientInfo, _Reason, Props) ->
|
||||
{ok, Props}.
|
||||
|
||||
%%--------------------------------------------------------------------
|
||||
%% Hook funcs
|
||||
%%--------------------------------------------------------------------
|
||||
|
||||
hook() ->
|
||||
?tp(debug, eviction_agent_hook, #{}),
|
||||
ok = emqx_hooks:put('client.connack', {?MODULE, on_connack, []}, ?HP_NODE_REBALANCE),
|
||||
ok = emqx_hooks:put('client.connect', {?MODULE, on_connect, []}, ?HP_NODE_REBALANCE).
|
||||
|
||||
unhook() ->
|
||||
?tp(debug, eviction_agent_unhook, #{}),
|
||||
ok = emqx_hooks:del('client.connect', {?MODULE, on_connect}),
|
||||
ok = emqx_hooks:del('client.connack', {?MODULE, on_connack}).
|
||||
|
||||
enable_status() ->
|
||||
persistent_term:get(?MODULE, disabled).
|
||||
|
||||
% connection management
|
||||
stats() ->
|
||||
#{
|
||||
connections => connection_count(),
|
||||
sessions => session_count()
|
||||
}.
|
||||
|
||||
connection_table() ->
|
||||
emqx_cm:live_connection_table(?CONN_MODULES).
|
||||
|
||||
connection_count() ->
|
||||
table_count(connection_table()).
|
||||
|
||||
channel_with_session_table(any) ->
|
||||
qlc:q([
|
||||
{ClientId, ConnInfo, ClientInfo}
|
||||
|| {ClientId, _, ConnInfo, ClientInfo} <-
|
||||
emqx_cm:channel_with_session_table(?CONN_MODULES)
|
||||
]);
|
||||
channel_with_session_table(RequiredConnState) ->
|
||||
qlc:q([
|
||||
{ClientId, ConnInfo, ClientInfo}
|
||||
|| {ClientId, ConnState, ConnInfo, ClientInfo} <-
|
||||
emqx_cm:channel_with_session_table(?CONN_MODULES),
|
||||
RequiredConnState =:= ConnState
|
||||
]).
|
||||
|
||||
session_count() ->
|
||||
session_count(any).
|
||||
|
||||
session_count(ConnState) ->
|
||||
table_count(channel_with_session_table(ConnState)).
|
||||
|
||||
table_count(QH) ->
|
||||
qlc:fold(fun(_, Acc) -> Acc + 1 end, 0, QH).
|
||||
|
||||
take_connections(N) ->
|
||||
ChanQH = qlc:q([ChanPid || {_ClientId, ChanPid} <- connection_table()]),
|
||||
ChanPidCursor = qlc:cursor(ChanQH),
|
||||
ChanPids = qlc:next_answers(ChanPidCursor, N),
|
||||
ok = qlc:delete_cursor(ChanPidCursor),
|
||||
ChanPids.
|
||||
|
||||
take_channel_with_sessions(N, ConnState) ->
|
||||
ChanPidCursor = qlc:cursor(channel_with_session_table(ConnState)),
|
||||
Channels = qlc:next_answers(ChanPidCursor, N),
|
||||
ok = qlc:delete_cursor(ChanPidCursor),
|
||||
Channels.
|
||||
|
||||
do_evict_connections(N, ServerReference) when N > 0 ->
|
||||
ChanPids = take_connections(N),
|
||||
ok = lists:foreach(
|
||||
fun(ChanPid) ->
|
||||
disconnect_channel(ChanPid, ServerReference)
|
||||
end,
|
||||
ChanPids
|
||||
).
|
||||
|
||||
do_evict_sessions(N, Nodes, ConnState) when N > 0 ->
|
||||
Channels = take_channel_with_sessions(N, ConnState),
|
||||
ok = lists:foreach(
|
||||
fun({ClientId, ConnInfo, ClientInfo}) ->
|
||||
evict_session_channel(Nodes, ClientId, ConnInfo, ClientInfo)
|
||||
end,
|
||||
Channels
|
||||
).
|
||||
|
||||
evict_session_channel(Nodes, ClientId, ConnInfo, ClientInfo) ->
|
||||
Node = select_random(Nodes),
|
||||
?SLOG(
|
||||
info,
|
||||
#{
|
||||
msg => "evict_session_channel",
|
||||
client_id => ClientId,
|
||||
node => Node,
|
||||
conn_info => ConnInfo,
|
||||
client_info => ClientInfo
|
||||
}
|
||||
),
|
||||
case emqx_eviction_agent_proto_v1:evict_session_channel(Node, ClientId, ConnInfo, ClientInfo) of
|
||||
{badrpc, Reason} ->
|
||||
?SLOG(
|
||||
error,
|
||||
#{
|
||||
msg => "evict_session_channel_rpc_error",
|
||||
client_id => ClientId,
|
||||
node => Node,
|
||||
reason => Reason
|
||||
}
|
||||
),
|
||||
{error, Reason};
|
||||
{error, Reason} = Error ->
|
||||
?SLOG(
|
||||
error,
|
||||
#{
|
||||
msg => "evict_session_channel_error",
|
||||
client_id => ClientId,
|
||||
node => Node,
|
||||
reason => Reason
|
||||
}
|
||||
),
|
||||
Error;
|
||||
Res ->
|
||||
Res
|
||||
end.
|
||||
|
||||
-spec evict_session_channel(
|
||||
emqx_types:clientid(),
|
||||
emqx_types:conninfo(),
|
||||
emqx_types:clientinfo()
|
||||
) -> supervisor:startchild_ret().
|
||||
evict_session_channel(ClientId, ConnInfo, ClientInfo) ->
|
||||
?SLOG(info, #{
|
||||
msg => "evict_session_channel",
|
||||
client_id => ClientId,
|
||||
conn_info => ConnInfo,
|
||||
client_info => ClientInfo
|
||||
}),
|
||||
Result = emqx_eviction_agent_channel:start_supervised(
|
||||
#{
|
||||
conninfo => ConnInfo,
|
||||
clientinfo => ClientInfo
|
||||
}
|
||||
),
|
||||
?SLOG(
|
||||
info,
|
||||
#{
|
||||
msg => "evict_session_channel_result",
|
||||
client_id => ClientId,
|
||||
result => Result
|
||||
}
|
||||
),
|
||||
Result.
|
||||
|
||||
disconnect_channel(ChanPid, ServerReference) ->
|
||||
ChanPid !
|
||||
{disconnect, ?RC_USE_ANOTHER_SERVER, use_another_server, #{
|
||||
'Server-Reference' => ServerReference
|
||||
}}.
|
||||
|
||||
select_random(List) when length(List) > 0 ->
|
||||
lists:nth(rand:uniform(length(List)), List).
|
|
@ -0,0 +1,85 @@
|
|||
%%--------------------------------------------------------------------
|
||||
%% Copyright (c) 2022-2023 EMQ Technologies Co., Ltd. All Rights Reserved.
|
||||
%%--------------------------------------------------------------------
|
||||
|
||||
-module(emqx_eviction_agent_api).
|
||||
|
||||
-behaviour(minirest_api).
|
||||
|
||||
-include_lib("typerefl/include/types.hrl").
|
||||
-include_lib("hocon/include/hoconsc.hrl").
|
||||
-include_lib("emqx/include/logger.hrl").
|
||||
|
||||
%% Swagger specs from hocon schema
|
||||
-export([
|
||||
api_spec/0,
|
||||
paths/0,
|
||||
schema/1,
|
||||
namespace/0
|
||||
]).
|
||||
|
||||
-export([
|
||||
fields/1,
|
||||
roots/0
|
||||
]).
|
||||
|
||||
%% API callbacks
|
||||
-export([
|
||||
'/node_eviction/status'/2
|
||||
]).
|
||||
|
||||
-import(hoconsc, [mk/2, ref/1, ref/2]).
|
||||
|
||||
namespace() -> "node_eviction".
|
||||
|
||||
api_spec() ->
|
||||
emqx_dashboard_swagger:spec(?MODULE, #{check_schema => true}).
|
||||
|
||||
paths() ->
|
||||
[
|
||||
"/node_eviction/status"
|
||||
].
|
||||
|
||||
schema("/node_eviction/status") ->
|
||||
#{
|
||||
'operationId' => '/node_eviction/status',
|
||||
get => #{
|
||||
tags => [<<"node_eviction">>],
|
||||
summary => <<"Get node eviction status">>,
|
||||
description => ?DESC("node_eviction_status_get"),
|
||||
responses => #{
|
||||
200 => schema_status()
|
||||
}
|
||||
}
|
||||
}.
|
||||
|
||||
'/node_eviction/status'(_Bindings, _Params) ->
|
||||
case emqx_eviction_agent:status() of
|
||||
disabled ->
|
||||
{200, #{status => disabled}};
|
||||
{enabled, Stats} ->
|
||||
{200, #{
|
||||
status => enabled,
|
||||
stats => Stats
|
||||
}}
|
||||
end.
|
||||
|
||||
schema_status() ->
|
||||
mk(hoconsc:union([ref(status_enabled), ref(status_disabled)]), #{}).
|
||||
|
||||
roots() -> [].
|
||||
|
||||
fields(status_enabled) ->
|
||||
[
|
||||
{status, mk(enabled, #{default => enabled})},
|
||||
{stats, ref(stats)}
|
||||
];
|
||||
fields(stats) ->
|
||||
[
|
||||
{connections, mk(integer(), #{})},
|
||||
{sessions, mk(integer(), #{})}
|
||||
];
|
||||
fields(status_disabled) ->
|
||||
[
|
||||
{status, mk(disabled, #{default => disabled})}
|
||||
].
|
|
@ -0,0 +1,22 @@
|
|||
%%--------------------------------------------------------------------
|
||||
%% Copyright (c) 2022-2023 EMQ Technologies Co., Ltd. All Rights Reserved.
|
||||
%%--------------------------------------------------------------------
|
||||
|
||||
-module(emqx_eviction_agent_app).
|
||||
|
||||
-behaviour(application).
|
||||
|
||||
-export([
|
||||
start/2,
|
||||
stop/1
|
||||
]).
|
||||
|
||||
start(_Type, _Args) ->
|
||||
ok = emqx_eviction_agent:hook(),
|
||||
{ok, Sup} = emqx_eviction_agent_sup:start_link(),
|
||||
ok = emqx_eviction_agent_cli:load(),
|
||||
{ok, Sup}.
|
||||
|
||||
stop(_State) ->
|
||||
ok = emqx_eviction_agent:unhook(),
|
||||
ok = emqx_eviction_agent_cli:unload().
|
|
@ -0,0 +1,358 @@
|
|||
%%--------------------------------------------------------------------
|
||||
%% Copyright (c) 2022-2023 EMQ Technologies Co., Ltd. All Rights Reserved.
|
||||
%%--------------------------------------------------------------------
|
||||
|
||||
%% MQTT Channel
|
||||
-module(emqx_eviction_agent_channel).
|
||||
|
||||
-include_lib("emqx/include/emqx.hrl").
|
||||
-include_lib("emqx/include/emqx_channel.hrl").
|
||||
-include_lib("emqx/include/emqx_mqtt.hrl").
|
||||
-include_lib("emqx/include/logger.hrl").
|
||||
-include_lib("emqx/include/types.hrl").
|
||||
|
||||
-include_lib("snabbkaffe/include/snabbkaffe.hrl").
|
||||
|
||||
-export([
|
||||
start_link/1,
|
||||
start_supervised/1,
|
||||
call/2,
|
||||
call/3,
|
||||
cast/2,
|
||||
stop/1
|
||||
]).
|
||||
|
||||
-export([
|
||||
init/1,
|
||||
handle_call/3,
|
||||
handle_cast/2,
|
||||
handle_info/2,
|
||||
terminate/2,
|
||||
code_change/3
|
||||
]).
|
||||
|
||||
-type opts() :: #{
|
||||
conninfo := emqx_types:conninfo(),
|
||||
clientinfo := emqx_types:clientinfo()
|
||||
}.
|
||||
|
||||
%%--------------------------------------------------------------------
|
||||
%% API
|
||||
%%--------------------------------------------------------------------
|
||||
|
||||
-spec start_supervised(opts()) -> supervisor:startchild_ret().
|
||||
start_supervised(#{clientinfo := #{clientid := ClientId}} = Opts) ->
|
||||
RandomId = integer_to_binary(erlang:unique_integer([positive])),
|
||||
ClientIdBin = bin_clientid(ClientId),
|
||||
Id = <<ClientIdBin/binary, "-", RandomId/binary>>,
|
||||
ChildSpec = #{
|
||||
id => Id,
|
||||
start => {?MODULE, start_link, [Opts]},
|
||||
restart => temporary,
|
||||
shutdown => 5000,
|
||||
type => worker,
|
||||
modules => [?MODULE]
|
||||
},
|
||||
supervisor:start_child(
|
||||
emqx_eviction_agent_conn_sup,
|
||||
ChildSpec
|
||||
).
|
||||
|
||||
-spec start_link(opts()) -> startlink_ret().
|
||||
start_link(Opts) ->
|
||||
gen_server:start_link(?MODULE, [Opts], []).
|
||||
|
||||
-spec cast(pid(), term()) -> ok.
|
||||
cast(Pid, Req) ->
|
||||
gen_server:cast(Pid, Req).
|
||||
|
||||
-spec call(pid(), term()) -> term().
|
||||
call(Pid, Req) ->
|
||||
call(Pid, Req, infinity).
|
||||
|
||||
-spec call(pid(), term(), timeout()) -> term().
|
||||
call(Pid, Req, Timeout) ->
|
||||
gen_server:call(Pid, Req, Timeout).
|
||||
|
||||
-spec stop(pid()) -> ok.
|
||||
stop(Pid) ->
|
||||
gen_server:stop(Pid).
|
||||
|
||||
%%--------------------------------------------------------------------
|
||||
%% gen_server API
|
||||
%%--------------------------------------------------------------------
|
||||
|
||||
init([#{conninfo := OldConnInfo, clientinfo := #{clientid := ClientId} = OldClientInfo}]) ->
|
||||
process_flag(trap_exit, true),
|
||||
ClientInfo = clientinfo(OldClientInfo),
|
||||
ConnInfo = conninfo(OldConnInfo),
|
||||
case open_session(ConnInfo, ClientInfo) of
|
||||
{ok, Channel0} ->
|
||||
case set_expiry_timer(Channel0) of
|
||||
{ok, Channel1} ->
|
||||
?SLOG(
|
||||
info,
|
||||
#{
|
||||
msg => "channel_initialized",
|
||||
clientid => ClientId,
|
||||
node => node()
|
||||
}
|
||||
),
|
||||
ok = emqx_cm:mark_channel_disconnected(self()),
|
||||
{ok, Channel1, hibernate};
|
||||
{error, Reason} ->
|
||||
{stop, Reason}
|
||||
end;
|
||||
{error, Reason} ->
|
||||
{stop, Reason}
|
||||
end.
|
||||
|
||||
handle_call(kick, _From, Channel) ->
|
||||
{stop, kicked, ok, Channel};
|
||||
handle_call(discard, _From, Channel) ->
|
||||
{stop, discarded, ok, Channel};
|
||||
handle_call({takeover, 'begin'}, _From, #{session := Session} = Channel) ->
|
||||
{reply, Session, Channel#{takeover => true}};
|
||||
handle_call(
|
||||
{takeover, 'end'},
|
||||
_From,
|
||||
#{
|
||||
session := Session,
|
||||
clientinfo := #{clientid := ClientId},
|
||||
pendings := Pendings
|
||||
} = Channel
|
||||
) ->
|
||||
ok = emqx_session:takeover(Session),
|
||||
%% TODO: Should not drain deliver here (side effect)
|
||||
Delivers = emqx_utils:drain_deliver(),
|
||||
AllPendings = lists:append(Delivers, Pendings),
|
||||
?tp(
|
||||
debug,
|
||||
emqx_channel_takeover_end,
|
||||
#{clientid => ClientId}
|
||||
),
|
||||
{stop, normal, AllPendings, Channel};
|
||||
handle_call(list_acl_cache, _From, Channel) ->
|
||||
{reply, [], Channel};
|
||||
handle_call({quota, _Policy}, _From, Channel) ->
|
||||
{reply, ok, Channel};
|
||||
handle_call(Req, _From, Channel) ->
|
||||
?SLOG(
|
||||
error,
|
||||
#{
|
||||
msg => "unexpected_call",
|
||||
req => Req
|
||||
}
|
||||
),
|
||||
{reply, ignored, Channel}.
|
||||
|
||||
handle_info(Deliver = {deliver, _Topic, _Msg}, Channel) ->
|
||||
Delivers = [Deliver | emqx_utils:drain_deliver()],
|
||||
{noreply, handle_deliver(Delivers, Channel)};
|
||||
handle_info(expire_session, Channel) ->
|
||||
{stop, expired, Channel};
|
||||
handle_info(Info, Channel) ->
|
||||
?SLOG(
|
||||
error,
|
||||
#{
|
||||
msg => "unexpected_info",
|
||||
info => Info
|
||||
}
|
||||
),
|
||||
{noreply, Channel}.
|
||||
|
||||
handle_cast(Msg, Channel) ->
|
||||
?SLOG(error, #{msg => "unexpected_cast", cast => Msg}),
|
||||
{noreply, Channel}.
|
||||
|
||||
terminate(Reason, #{conninfo := ConnInfo, clientinfo := ClientInfo, session := Session} = Channel) ->
|
||||
ok = cancel_expiry_timer(Channel),
|
||||
(Reason =:= expired) andalso emqx_persistent_session:persist(ClientInfo, ConnInfo, Session),
|
||||
emqx_session:terminate(ClientInfo, Reason, Session).
|
||||
|
||||
code_change(_OldVsn, Channel, _Extra) ->
|
||||
{ok, Channel}.
|
||||
|
||||
%%--------------------------------------------------------------------
|
||||
%% Internal functions
|
||||
%%--------------------------------------------------------------------
|
||||
|
||||
handle_deliver(
|
||||
Delivers,
|
||||
#{
|
||||
takeover := true,
|
||||
pendings := Pendings,
|
||||
session := Session,
|
||||
clientinfo := #{clientid := ClientId} = ClientInfo
|
||||
} = Channel
|
||||
) ->
|
||||
%% NOTE: Order is important here. While the takeover is in
|
||||
%% progress, the session cannot enqueue messages, since it already
|
||||
%% passed on the queue to the new connection in the session state.
|
||||
NPendings = lists:append(
|
||||
Pendings,
|
||||
emqx_session:ignore_local(ClientInfo, emqx_channel:maybe_nack(Delivers), ClientId, Session)
|
||||
),
|
||||
Channel#{pendings => NPendings};
|
||||
handle_deliver(
|
||||
Delivers,
|
||||
#{
|
||||
takeover := false,
|
||||
session := Session,
|
||||
clientinfo := #{clientid := ClientId} = ClientInfo
|
||||
} = Channel
|
||||
) ->
|
||||
Delivers1 = emqx_channel:maybe_nack(Delivers),
|
||||
Delivers2 = emqx_session:ignore_local(ClientInfo, Delivers1, ClientId, Session),
|
||||
NSession = emqx_session:enqueue(ClientInfo, Delivers2, Session),
|
||||
NChannel = persist(NSession, Channel),
|
||||
%% We consider queued/dropped messages as delivered since they are now in the session state.
|
||||
emqx_channel:maybe_mark_as_delivered(Session, Delivers),
|
||||
NChannel.
|
||||
|
||||
cancel_expiry_timer(#{expiry_timer := TRef}) when is_reference(TRef) ->
|
||||
_ = erlang:cancel_timer(TRef),
|
||||
ok;
|
||||
cancel_expiry_timer(_) ->
|
||||
ok.
|
||||
|
||||
set_expiry_timer(#{conninfo := ConnInfo} = Channel) ->
|
||||
case maps:get(expiry_interval, ConnInfo) of
|
||||
?UINT_MAX ->
|
||||
{ok, Channel};
|
||||
I when I > 0 ->
|
||||
Timer = erlang:send_after(timer:seconds(I), self(), expire_session),
|
||||
{ok, Channel#{expiry_timer => Timer}};
|
||||
_ ->
|
||||
{error, should_be_expired}
|
||||
end.
|
||||
|
||||
open_session(ConnInfo, #{clientid := ClientId} = ClientInfo) ->
|
||||
Channel = channel(ConnInfo, ClientInfo),
|
||||
case emqx_cm:open_session(_CleanSession = false, ClientInfo, ConnInfo) of
|
||||
{ok, #{present := false}} ->
|
||||
?SLOG(
|
||||
info,
|
||||
#{
|
||||
msg => "no_session",
|
||||
clientid => ClientId,
|
||||
node => node()
|
||||
}
|
||||
),
|
||||
{error, no_session};
|
||||
{ok, #{session := Session, present := true, pendings := Pendings0}} ->
|
||||
?SLOG(
|
||||
info,
|
||||
#{
|
||||
msg => "session_opened",
|
||||
clientid => ClientId,
|
||||
node => node()
|
||||
}
|
||||
),
|
||||
Pendings1 = lists:usort(lists:append(Pendings0, emqx_utils:drain_deliver())),
|
||||
NSession = emqx_session:enqueue(
|
||||
ClientInfo,
|
||||
emqx_session:ignore_local(
|
||||
ClientInfo,
|
||||
emqx_channel:maybe_nack(Pendings1),
|
||||
ClientId,
|
||||
Session
|
||||
),
|
||||
Session
|
||||
),
|
||||
NChannel = Channel#{session => NSession},
|
||||
ok = emqx_cm:insert_channel_info(ClientId, info(NChannel), stats(NChannel)),
|
||||
?SLOG(
|
||||
info,
|
||||
#{
|
||||
msg => "channel_info_updated",
|
||||
clientid => ClientId,
|
||||
node => node()
|
||||
}
|
||||
),
|
||||
{ok, NChannel};
|
||||
{error, Reason} = Error ->
|
||||
?SLOG(
|
||||
error,
|
||||
#{
|
||||
msg => "session_open_failed",
|
||||
clientid => ClientId,
|
||||
node => node(),
|
||||
reason => Reason
|
||||
}
|
||||
),
|
||||
Error
|
||||
end.
|
||||
|
||||
conninfo(OldConnInfo) ->
|
||||
DisconnectedAt = maps:get(disconnected_at, OldConnInfo, erlang:system_time(millisecond)),
|
||||
ConnInfo0 = maps:with(
|
||||
[
|
||||
socktype,
|
||||
sockname,
|
||||
peername,
|
||||
peercert,
|
||||
clientid,
|
||||
clean_start,
|
||||
receive_maximum,
|
||||
expiry_interval,
|
||||
connected_at,
|
||||
disconnected_at,
|
||||
keepalive
|
||||
],
|
||||
OldConnInfo
|
||||
),
|
||||
ConnInfo0#{
|
||||
conn_mod => ?MODULE,
|
||||
connected => false,
|
||||
disconnected_at => DisconnectedAt
|
||||
}.
|
||||
|
||||
clientinfo(OldClientInfo) ->
|
||||
maps:with(
|
||||
[
|
||||
zone,
|
||||
protocol,
|
||||
peerhost,
|
||||
sockport,
|
||||
clientid,
|
||||
username,
|
||||
is_bridge,
|
||||
is_superuser,
|
||||
mountpoint
|
||||
],
|
||||
OldClientInfo
|
||||
).
|
||||
|
||||
channel(ConnInfo, ClientInfo) ->
|
||||
#{
|
||||
conninfo => ConnInfo,
|
||||
clientinfo => ClientInfo,
|
||||
expiry_timer => undefined,
|
||||
takeover => false,
|
||||
resuming => false,
|
||||
pendings => []
|
||||
}.
|
||||
|
||||
persist(Session, #{clientinfo := ClientInfo, conninfo := ConnInfo} = Channel) ->
|
||||
Session1 = emqx_persistent_session:persist(ClientInfo, ConnInfo, Session),
|
||||
Channel#{session => Session1}.
|
||||
|
||||
info(Channel) ->
|
||||
#{
|
||||
conninfo => maps:get(conninfo, Channel, undefined),
|
||||
clientinfo => maps:get(clientinfo, Channel, undefined),
|
||||
session => emqx_utils:maybe_apply(
|
||||
fun emqx_session:info/1,
|
||||
maps:get(session, Channel, undefined)
|
||||
),
|
||||
conn_state => disconnected
|
||||
}.
|
||||
|
||||
stats(#{session := Session}) ->
|
||||
lists:append(emqx_session:stats(Session), emqx_pd:get_counters(?CHANNEL_METRICS)).
|
||||
|
||||
bin_clientid(ClientId) when is_binary(ClientId) ->
|
||||
ClientId;
|
||||
bin_clientid(ClientId) when is_atom(ClientId) ->
|
||||
atom_to_binary(ClientId).
|
|
@ -0,0 +1,30 @@
|
|||
%%--------------------------------------------------------------------
|
||||
%% Copyright (c) 2022-2023 EMQ Technologies Co., Ltd. All Rights Reserved.
|
||||
%%--------------------------------------------------------------------
|
||||
|
||||
-module(emqx_eviction_agent_cli).
|
||||
|
||||
%% APIs
|
||||
-export([
|
||||
load/0,
|
||||
unload/0,
|
||||
cli/1
|
||||
]).
|
||||
|
||||
load() ->
|
||||
emqx_ctl:register_command(eviction, {?MODULE, cli}, []).
|
||||
|
||||
unload() ->
|
||||
emqx_ctl:unregister_command(eviction).
|
||||
|
||||
cli(["status"]) ->
|
||||
case emqx_eviction_agent:status() of
|
||||
disabled ->
|
||||
emqx_ctl:print("Eviction status: disabled~n");
|
||||
{enabled, _Stats} ->
|
||||
emqx_ctl:print("Eviction status: enabled~n")
|
||||
end;
|
||||
cli(_) ->
|
||||
emqx_ctl:usage(
|
||||
[{"eviction status", "Get current node eviction status"}]
|
||||
).
|
|
@ -0,0 +1,21 @@
|
|||
%%--------------------------------------------------------------------
|
||||
%% Copyright (c) 2022-2023 EMQ Technologies Co., Ltd. All Rights Reserved.
|
||||
%%--------------------------------------------------------------------
|
||||
|
||||
-module(emqx_eviction_agent_conn_sup).
|
||||
|
||||
-behaviour(supervisor).
|
||||
|
||||
-export([start_link/0]).
|
||||
|
||||
-export([init/1]).
|
||||
|
||||
start_link() ->
|
||||
supervisor:start_link({local, ?MODULE}, ?MODULE, []).
|
||||
|
||||
init([]) ->
|
||||
{ok,
|
||||
{
|
||||
#{strategy => one_for_one, intensity => 10, period => 3600},
|
||||
[]
|
||||
}}.
|
|
@ -0,0 +1,34 @@
|
|||
%%--------------------------------------------------------------------
|
||||
%% Copyright (c) 2022-2023 EMQ Technologies Co., Ltd. All Rights Reserved.
|
||||
%%--------------------------------------------------------------------
|
||||
|
||||
-module(emqx_eviction_agent_sup).
|
||||
|
||||
-behaviour(supervisor).
|
||||
|
||||
-export([start_link/0]).
|
||||
|
||||
-export([init/1]).
|
||||
|
||||
start_link() ->
|
||||
supervisor:start_link({local, ?MODULE}, ?MODULE, []).
|
||||
|
||||
init([]) ->
|
||||
Childs = [
|
||||
child_spec(worker, emqx_eviction_agent, []),
|
||||
child_spec(supervisor, emqx_eviction_agent_conn_sup, [])
|
||||
],
|
||||
{ok, {
|
||||
#{strategy => one_for_one, intensity => 10, period => 3600},
|
||||
Childs
|
||||
}}.
|
||||
|
||||
child_spec(Type, Mod, Args) ->
|
||||
#{
|
||||
id => Mod,
|
||||
start => {Mod, start_link, Args},
|
||||
restart => permanent,
|
||||
shutdown => 5000,
|
||||
type => Type,
|
||||
modules => [Mod]
|
||||
}.
|
|
@ -0,0 +1,27 @@
|
|||
%%--------------------------------------------------------------------
|
||||
%% Copyright (c) 2022-2023 EMQ Technologies Co., Ltd. All Rights Reserved.
|
||||
%%--------------------------------------------------------------------
|
||||
|
||||
-module(emqx_eviction_agent_proto_v1).
|
||||
|
||||
-behaviour(emqx_bpapi).
|
||||
|
||||
-export([
|
||||
introduced_in/0,
|
||||
|
||||
evict_session_channel/4
|
||||
]).
|
||||
|
||||
-include_lib("emqx/include/bpapi.hrl").
|
||||
|
||||
introduced_in() ->
|
||||
"5.0.22".
|
||||
|
||||
-spec evict_session_channel(
|
||||
node(),
|
||||
emqx_types:clientid(),
|
||||
emqx_types:conninfo(),
|
||||
emqx_types:clientinfo()
|
||||
) -> supervisor:startchild_err() | emqx_rpc:badrpc().
|
||||
evict_session_channel(Node, ClientId, ConnInfo, ClientInfo) ->
|
||||
rpc:call(Node, emqx_eviction_agent, evict_session_channel, [ClientId, ConnInfo, ClientInfo]).
|
|
@ -0,0 +1,467 @@
|
|||
%%--------------------------------------------------------------------
|
||||
%% Copyright (c) 2022-2023 EMQ Technologies Co., Ltd. All Rights Reserved.
|
||||
%%--------------------------------------------------------------------
|
||||
|
||||
-module(emqx_eviction_agent_SUITE).
|
||||
|
||||
-compile(export_all).
|
||||
-compile(nowarn_export_all).
|
||||
|
||||
-include_lib("eunit/include/eunit.hrl").
|
||||
-include_lib("common_test/include/ct.hrl").
|
||||
-include_lib("emqx/include/emqx_mqtt.hrl").
|
||||
-include_lib("emqx/include/asserts.hrl").
|
||||
|
||||
-import(
|
||||
emqx_eviction_agent_test_helpers,
|
||||
[emqtt_connect/0, emqtt_connect/1, emqtt_connect/2]
|
||||
).
|
||||
|
||||
-define(assertPrinted(Printed, Code),
|
||||
?assertMatch(
|
||||
{match, _},
|
||||
re:run(Code, Printed)
|
||||
)
|
||||
).
|
||||
|
||||
all() ->
|
||||
emqx_common_test_helpers:all(?MODULE).
|
||||
|
||||
init_per_suite(Config) ->
|
||||
emqx_common_test_helpers:start_apps([emqx_eviction_agent]),
|
||||
Config.
|
||||
|
||||
end_per_suite(_Config) ->
|
||||
emqx_common_test_helpers:stop_apps([emqx_eviction_agent]).
|
||||
|
||||
init_per_testcase(Case, Config) ->
|
||||
_ = emqx_eviction_agent:disable(test_eviction),
|
||||
ok = snabbkaffe:start_trace(),
|
||||
start_slave(Case, Config).
|
||||
|
||||
start_slave(t_explicit_session_takeover, Config) ->
|
||||
ClusterNodes = emqx_eviction_agent_test_helpers:start_cluster(
|
||||
[{evacuate_test1, 2883}, {evacuate_test2, 3883}],
|
||||
[emqx_eviction_agent]
|
||||
),
|
||||
[{evacuate_nodes, ClusterNodes} | Config];
|
||||
start_slave(_Case, Config) ->
|
||||
Config.
|
||||
|
||||
end_per_testcase(TestCase, Config) ->
|
||||
emqx_eviction_agent:disable(test_eviction),
|
||||
ok = snabbkaffe:stop(),
|
||||
stop_slave(TestCase, Config).
|
||||
|
||||
stop_slave(t_explicit_session_takeover, Config) ->
|
||||
emqx_eviction_agent_test_helpers:stop_cluster(
|
||||
?config(evacuate_nodes, Config),
|
||||
[emqx_eviction_agent]
|
||||
);
|
||||
stop_slave(_Case, _Config) ->
|
||||
ok.
|
||||
|
||||
%%--------------------------------------------------------------------
|
||||
%% Tests
|
||||
%%--------------------------------------------------------------------
|
||||
|
||||
t_enable_disable(_Config) ->
|
||||
erlang:process_flag(trap_exit, true),
|
||||
|
||||
?assertMatch(
|
||||
disabled,
|
||||
emqx_eviction_agent:status()
|
||||
),
|
||||
|
||||
{ok, C0} = emqtt_connect(),
|
||||
ok = emqtt:disconnect(C0),
|
||||
|
||||
ok = emqx_eviction_agent:enable(test_eviction, undefined),
|
||||
|
||||
?assertMatch(
|
||||
{error, eviction_agent_busy},
|
||||
emqx_eviction_agent:enable(bar, undefined)
|
||||
),
|
||||
|
||||
?assertMatch(
|
||||
ok,
|
||||
emqx_eviction_agent:enable(test_eviction, <<"srv">>)
|
||||
),
|
||||
|
||||
?assertMatch(
|
||||
{enabled, #{}},
|
||||
emqx_eviction_agent:status()
|
||||
),
|
||||
|
||||
?assertMatch(
|
||||
{error, {use_another_server, #{}}},
|
||||
emqtt_connect()
|
||||
),
|
||||
|
||||
?assertMatch(
|
||||
{error, eviction_agent_busy},
|
||||
emqx_eviction_agent:disable(bar)
|
||||
),
|
||||
|
||||
?assertMatch(
|
||||
ok,
|
||||
emqx_eviction_agent:disable(test_eviction)
|
||||
),
|
||||
|
||||
?assertMatch(
|
||||
{error, disabled},
|
||||
emqx_eviction_agent:disable(test_eviction)
|
||||
),
|
||||
|
||||
?assertMatch(
|
||||
disabled,
|
||||
emqx_eviction_agent:status()
|
||||
),
|
||||
|
||||
{ok, C1} = emqtt_connect(),
|
||||
ok = emqtt:disconnect(C1).
|
||||
|
||||
t_evict_connections_status(_Config) ->
|
||||
erlang:process_flag(trap_exit, true),
|
||||
|
||||
{ok, _C} = emqtt_connect(),
|
||||
|
||||
{error, disabled} = emqx_eviction_agent:evict_connections(1),
|
||||
|
||||
ok = emqx_eviction_agent:enable(test_eviction, undefined),
|
||||
|
||||
?assertMatch(
|
||||
{enabled, #{connections := 1, sessions := _}},
|
||||
emqx_eviction_agent:status()
|
||||
),
|
||||
|
||||
ok = emqx_eviction_agent:evict_connections(1),
|
||||
|
||||
ct:sleep(100),
|
||||
|
||||
?assertMatch(
|
||||
{enabled, #{connections := 0, sessions := _}},
|
||||
emqx_eviction_agent:status()
|
||||
),
|
||||
|
||||
ok = emqx_eviction_agent:disable(test_eviction).
|
||||
|
||||
t_explicit_session_takeover(Config) ->
|
||||
_ = erlang:process_flag(trap_exit, true),
|
||||
ok = restart_emqx(),
|
||||
|
||||
[{Node1, Port1}, {Node2, _Port2}] = ?config(evacuate_nodes, Config),
|
||||
|
||||
{ok, C0} = emqtt_connect([
|
||||
{clientid, <<"client_with_session">>},
|
||||
{clean_start, false},
|
||||
{port, Port1}
|
||||
]),
|
||||
{ok, _, _} = emqtt:subscribe(C0, <<"t1">>),
|
||||
|
||||
ok = rpc:call(Node1, emqx_eviction_agent, enable, [test_eviction, undefined]),
|
||||
|
||||
?assertEqual(
|
||||
1,
|
||||
rpc:call(Node1, emqx_eviction_agent, connection_count, [])
|
||||
),
|
||||
|
||||
[ChanPid] = rpc:call(Node1, emqx_cm, lookup_channels, [<<"client_with_session">>]),
|
||||
|
||||
?assertWaitEvent(
|
||||
begin
|
||||
ok = rpc:call(Node1, emqx_eviction_agent, evict_connections, [1]),
|
||||
receive
|
||||
{'EXIT', C0, {disconnected, ?RC_USE_ANOTHER_SERVER, _}} -> ok
|
||||
after 1000 ->
|
||||
?assert(false, "Connection not evicted")
|
||||
end
|
||||
end,
|
||||
#{?snk_kind := emqx_cm_connected_client_count_dec, chan_pid := ChanPid},
|
||||
2000
|
||||
),
|
||||
|
||||
?assertEqual(
|
||||
0,
|
||||
rpc:call(Node1, emqx_eviction_agent, connection_count, [])
|
||||
),
|
||||
|
||||
?assertEqual(
|
||||
1,
|
||||
rpc:call(Node1, emqx_eviction_agent, session_count, [])
|
||||
),
|
||||
|
||||
%% First, evacuate to the same node
|
||||
|
||||
?assertWaitEvent(
|
||||
rpc:call(Node1, emqx_eviction_agent, evict_sessions, [1, Node1]),
|
||||
#{?snk_kind := emqx_channel_takeover_end, clientid := <<"client_with_session">>},
|
||||
1000
|
||||
),
|
||||
|
||||
ok = rpc:call(Node1, emqx_eviction_agent, disable, [test_eviction]),
|
||||
|
||||
{ok, C1} = emqtt_connect([{port, Port1}]),
|
||||
emqtt:publish(C1, <<"t1">>, <<"MessageToEvictedSession1">>),
|
||||
ok = emqtt:disconnect(C1),
|
||||
|
||||
ok = rpc:call(Node1, emqx_eviction_agent, enable, [test_eviction, undefined]),
|
||||
|
||||
%% Evacuate to another node
|
||||
|
||||
?assertWaitEvent(
|
||||
rpc:call(Node1, emqx_eviction_agent, evict_sessions, [1, Node2]),
|
||||
#{?snk_kind := emqx_channel_takeover_end, clientid := <<"client_with_session">>},
|
||||
1000
|
||||
),
|
||||
|
||||
?assertEqual(
|
||||
0,
|
||||
rpc:call(Node1, emqx_eviction_agent, session_count, [])
|
||||
),
|
||||
|
||||
?assertEqual(
|
||||
1,
|
||||
rpc:call(Node2, emqx_eviction_agent, session_count, [])
|
||||
),
|
||||
|
||||
ok = rpc:call(Node1, emqx_eviction_agent, disable, [test_eviction]),
|
||||
|
||||
%% Session is on Node2, but we connect to Node1
|
||||
{ok, C2} = emqtt_connect([{port, Port1}]),
|
||||
emqtt:publish(C2, <<"t1">>, <<"MessageToEvictedSession2">>),
|
||||
ok = emqtt:disconnect(C2),
|
||||
|
||||
ct:sleep(100),
|
||||
|
||||
%% Session is on Node2, but we connect the subscribed client to Node1
|
||||
%% It should take over the session for the third time and recieve
|
||||
%% previously published messages
|
||||
{ok, C3} = emqtt_connect([
|
||||
{clientid, <<"client_with_session">>},
|
||||
{clean_start, false},
|
||||
{port, Port1}
|
||||
]),
|
||||
|
||||
ok = assert_receive_publish(
|
||||
[
|
||||
#{payload => <<"MessageToEvictedSession1">>, topic => <<"t1">>},
|
||||
#{payload => <<"MessageToEvictedSession2">>, topic => <<"t1">>}
|
||||
]
|
||||
),
|
||||
ok = emqtt:disconnect(C3).
|
||||
|
||||
t_disable_on_restart(_Config) ->
|
||||
ok = emqx_eviction_agent:enable(test_eviction, undefined),
|
||||
|
||||
ok = supervisor:terminate_child(emqx_eviction_agent_sup, emqx_eviction_agent),
|
||||
{ok, _} = supervisor:restart_child(emqx_eviction_agent_sup, emqx_eviction_agent),
|
||||
|
||||
?assertEqual(
|
||||
disabled,
|
||||
emqx_eviction_agent:status()
|
||||
).
|
||||
|
||||
t_session_serialization(_Config) ->
|
||||
_ = erlang:process_flag(trap_exit, true),
|
||||
ok = restart_emqx(),
|
||||
|
||||
{ok, C0} = emqtt_connect(<<"client_with_session">>, false),
|
||||
{ok, _, _} = emqtt:subscribe(C0, <<"t1">>),
|
||||
ok = emqtt:disconnect(C0),
|
||||
|
||||
ok = emqx_eviction_agent:enable(test_eviction, undefined),
|
||||
|
||||
?assertEqual(
|
||||
1,
|
||||
emqx_eviction_agent:session_count()
|
||||
),
|
||||
|
||||
%% Evacuate to the same node
|
||||
|
||||
?assertWaitEvent(
|
||||
emqx_eviction_agent:evict_sessions(1, node()),
|
||||
#{?snk_kind := emqx_channel_takeover_end, clientid := <<"client_with_session">>},
|
||||
1000
|
||||
),
|
||||
|
||||
ok = emqx_eviction_agent:disable(test_eviction),
|
||||
|
||||
?assertEqual(
|
||||
1,
|
||||
emqx_eviction_agent:session_count()
|
||||
),
|
||||
|
||||
?assertMatch(
|
||||
#{data := [#{clientid := <<"client_with_session">>}]},
|
||||
emqx_mgmt_api:cluster_query(
|
||||
emqx_channel_info,
|
||||
#{},
|
||||
[],
|
||||
fun emqx_mgmt_api_clients:qs2ms/2,
|
||||
fun emqx_mgmt_api_clients:format_channel_info/2
|
||||
)
|
||||
),
|
||||
|
||||
mock_print(),
|
||||
|
||||
?assertPrinted(
|
||||
"client_with_session",
|
||||
emqx_mgmt_cli:clients(["list"])
|
||||
),
|
||||
|
||||
?assertPrinted(
|
||||
"client_with_session",
|
||||
emqx_mgmt_cli:clients(["show", "client_with_session"])
|
||||
),
|
||||
|
||||
?assertWaitEvent(
|
||||
emqx_cm:kick_session(<<"client_with_session">>),
|
||||
#{?snk_kind := emqx_cm_clean_down, client_id := <<"client_with_session">>},
|
||||
1000
|
||||
),
|
||||
|
||||
?assertEqual(
|
||||
0,
|
||||
emqx_eviction_agent:session_count()
|
||||
).
|
||||
|
||||
t_will_msg(_Config) ->
|
||||
erlang:process_flag(trap_exit, true),
|
||||
|
||||
WillMsg = <<"will_msg">>,
|
||||
WillTopic = <<"will_topic">>,
|
||||
ClientId = <<"client_with_will">>,
|
||||
|
||||
_ = emqtt_connect([
|
||||
{clean_start, false},
|
||||
{clientid, ClientId},
|
||||
{will_payload, WillMsg},
|
||||
{will_topic, WillTopic}
|
||||
]),
|
||||
|
||||
{ok, C} = emqtt_connect(),
|
||||
{ok, _, _} = emqtt:subscribe(C, WillTopic),
|
||||
|
||||
[ChanPid] = emqx_cm:lookup_channels(ClientId),
|
||||
|
||||
ChanPid !
|
||||
{disconnect, ?RC_USE_ANOTHER_SERVER, use_another_server, #{
|
||||
'Server-Reference' => <<>>
|
||||
}},
|
||||
|
||||
receive
|
||||
{publish, #{
|
||||
payload := WillMsg,
|
||||
topic := WillTopic
|
||||
}} ->
|
||||
ok
|
||||
after 1000 ->
|
||||
ct:fail("Will message not received")
|
||||
end,
|
||||
|
||||
ok = emqtt:disconnect(C).
|
||||
|
||||
t_ws_conn(_Config) ->
|
||||
erlang:process_flag(trap_exit, true),
|
||||
|
||||
ClientId = <<"ws_client">>,
|
||||
{ok, C} = emqtt:start_link([
|
||||
{proto_ver, v5},
|
||||
{clientid, ClientId},
|
||||
{port, 8083},
|
||||
{ws_path, "/mqtt"}
|
||||
]),
|
||||
{ok, _} = emqtt:ws_connect(C),
|
||||
|
||||
ok = emqx_eviction_agent:enable(test_eviction, undefined),
|
||||
|
||||
?assertEqual(
|
||||
1,
|
||||
emqx_eviction_agent:connection_count()
|
||||
),
|
||||
|
||||
?assertWaitEvent(
|
||||
ok = emqx_eviction_agent:evict_connections(1),
|
||||
#{?snk_kind := emqx_cm_connected_client_count_dec},
|
||||
1000
|
||||
),
|
||||
|
||||
?assertEqual(
|
||||
0,
|
||||
emqx_eviction_agent:connection_count()
|
||||
).
|
||||
|
||||
-ifndef(BUILD_WITHOUT_QUIC).
|
||||
|
||||
t_quic_conn(_Config) ->
|
||||
erlang:process_flag(trap_exit, true),
|
||||
|
||||
QuicPort = emqx_common_test_helpers:select_free_port(quic),
|
||||
application:ensure_all_started(quicer),
|
||||
emqx_common_test_helpers:ensure_quic_listener(?MODULE, QuicPort),
|
||||
|
||||
ClientId = <<"quic_client">>,
|
||||
{ok, C} = emqtt:start_link([
|
||||
{proto_ver, v5},
|
||||
{clientid, ClientId},
|
||||
{port, QuicPort}
|
||||
]),
|
||||
{ok, _} = emqtt:quic_connect(C),
|
||||
|
||||
ok = emqx_eviction_agent:enable(test_eviction, undefined),
|
||||
|
||||
?assertEqual(
|
||||
1,
|
||||
emqx_eviction_agent:connection_count()
|
||||
),
|
||||
|
||||
?assertWaitEvent(
|
||||
ok = emqx_eviction_agent:evict_connections(1),
|
||||
#{?snk_kind := emqx_cm_connected_client_count_dec},
|
||||
1000
|
||||
),
|
||||
|
||||
?assertEqual(
|
||||
0,
|
||||
emqx_eviction_agent:connection_count()
|
||||
).
|
||||
|
||||
-endif.
|
||||
|
||||
%%--------------------------------------------------------------------
|
||||
%% Helpers
|
||||
%%--------------------------------------------------------------------
|
||||
|
||||
assert_receive_publish([]) ->
|
||||
ok;
|
||||
assert_receive_publish([#{payload := Msg, topic := Topic} | Rest]) ->
|
||||
receive
|
||||
{publish, #{
|
||||
payload := Msg,
|
||||
topic := Topic
|
||||
}} ->
|
||||
assert_receive_publish(Rest)
|
||||
after 1000 ->
|
||||
?assert(false, "Message `" ++ binary_to_list(Msg) ++ "` is lost")
|
||||
end.
|
||||
|
||||
connect_and_publish(Topic, Message) ->
|
||||
{ok, C} = emqtt_connect(),
|
||||
emqtt:publish(C, Topic, Message),
|
||||
ok = emqtt:disconnect(C).
|
||||
|
||||
restart_emqx() ->
|
||||
_ = application:stop(emqx),
|
||||
_ = application:start(emqx),
|
||||
_ = application:stop(emqx_eviction_agent),
|
||||
_ = application:start(emqx_eviction_agent),
|
||||
ok.
|
||||
|
||||
mock_print() ->
|
||||
catch meck:unload(emqx_ctl),
|
||||
meck:new(emqx_ctl, [non_strict, passthrough]),
|
||||
meck:expect(emqx_ctl, print, fun(Arg) -> emqx_ctl:format(Arg, []) end),
|
||||
meck:expect(emqx_ctl, print, fun(Msg, Arg) -> emqx_ctl:format(Msg, Arg) end),
|
||||
meck:expect(emqx_ctl, usage, fun(Usages) -> emqx_ctl:format_usage(Usages) end),
|
||||
meck:expect(emqx_ctl, usage, fun(Cmd, Descr) -> emqx_ctl:format_usage(Cmd, Descr) end).
|
|
@ -0,0 +1,69 @@
|
|||
%%--------------------------------------------------------------------
|
||||
%% Copyright (c) 2022-2023 EMQ Technologies Co., Ltd. All Rights Reserved.
|
||||
%%--------------------------------------------------------------------
|
||||
|
||||
-module(emqx_eviction_agent_api_SUITE).
|
||||
|
||||
-compile(export_all).
|
||||
-compile(nowarn_export_all).
|
||||
|
||||
-include_lib("eunit/include/eunit.hrl").
|
||||
-include_lib("common_test/include/ct.hrl").
|
||||
|
||||
-import(
|
||||
emqx_mgmt_api_test_util,
|
||||
[
|
||||
request_api/2,
|
||||
uri/1
|
||||
]
|
||||
).
|
||||
|
||||
all() ->
|
||||
emqx_common_test_helpers:all(?MODULE).
|
||||
|
||||
init_per_suite(Config) ->
|
||||
emqx_mgmt_api_test_util:init_suite([emqx_eviction_agent]),
|
||||
Config.
|
||||
|
||||
end_per_suite(Config) ->
|
||||
emqx_mgmt_api_test_util:end_suite([emqx_eviction_agent]),
|
||||
Config.
|
||||
|
||||
%%--------------------------------------------------------------------
|
||||
%% Tests
|
||||
%%--------------------------------------------------------------------
|
||||
|
||||
t_status(_Config) ->
|
||||
?assertMatch(
|
||||
{ok, #{<<"status">> := <<"disabled">>}},
|
||||
api_get(["node_eviction", "status"])
|
||||
),
|
||||
|
||||
ok = emqx_eviction_agent:enable(apitest, undefined),
|
||||
|
||||
?assertMatch(
|
||||
{ok, #{
|
||||
<<"status">> := <<"enabled">>,
|
||||
<<"stats">> := #{}
|
||||
}},
|
||||
api_get(["node_eviction", "status"])
|
||||
),
|
||||
|
||||
ok = emqx_eviction_agent:disable(apitest),
|
||||
|
||||
?assertMatch(
|
||||
{ok, #{<<"status">> := <<"disabled">>}},
|
||||
api_get(["node_eviction", "status"])
|
||||
).
|
||||
|
||||
%%--------------------------------------------------------------------
|
||||
%% Helpers
|
||||
%%--------------------------------------------------------------------
|
||||
|
||||
api_get(Path) ->
|
||||
case request_api(get, uri(Path)) of
|
||||
{ok, ResponseBody} ->
|
||||
{ok, jiffy:decode(list_to_binary(ResponseBody), [return_maps])};
|
||||
{error, _} = Error ->
|
||||
Error
|
||||
end.
|
|
@ -0,0 +1,251 @@
|
|||
%%--------------------------------------------------------------------
|
||||
%% Copyright (c) 2022-2023 EMQ Technologies Co., Ltd. All Rights Reserved.
|
||||
%%--------------------------------------------------------------------
|
||||
|
||||
-module(emqx_eviction_agent_channel_SUITE).
|
||||
|
||||
-compile(export_all).
|
||||
-compile(nowarn_export_all).
|
||||
|
||||
-include_lib("eunit/include/eunit.hrl").
|
||||
-include_lib("common_test/include/ct.hrl").
|
||||
-include_lib("emqx/include/emqx_mqtt.hrl").
|
||||
|
||||
-define(CLIENT_ID, <<"client_with_session">>).
|
||||
|
||||
-import(
|
||||
emqx_eviction_agent_test_helpers,
|
||||
[emqtt_connect/0, emqtt_connect/2]
|
||||
).
|
||||
|
||||
all() ->
|
||||
emqx_common_test_helpers:all(?MODULE).
|
||||
|
||||
init_per_suite(Config) ->
|
||||
emqx_common_test_helpers:start_apps([emqx_conf, emqx_eviction_agent]),
|
||||
{ok, _} = emqx:update_config([rpc, port_discovery], manual),
|
||||
Config.
|
||||
|
||||
end_per_suite(_Config) ->
|
||||
emqx_common_test_helpers:stop_apps([emqx_eviction_agent, emqx_conf]).
|
||||
|
||||
init_per_testcase(t_persistence, Config) ->
|
||||
emqx_config:put([persistent_session_store, enabled], true),
|
||||
{ok, _} = emqx_persistent_session_sup:start_link(),
|
||||
emqx_persistent_session:init_db_backend(),
|
||||
?assert(emqx_persistent_session:is_store_enabled()),
|
||||
Config;
|
||||
init_per_testcase(_TestCase, Config) ->
|
||||
Config.
|
||||
|
||||
end_per_testcase(t_persistence, Config) ->
|
||||
emqx_config:put([persistent_session_store, enabled], false),
|
||||
emqx_persistent_session:init_db_backend(),
|
||||
?assertNot(emqx_persistent_session:is_store_enabled()),
|
||||
Config;
|
||||
end_per_testcase(_TestCase, _Config) ->
|
||||
ok.
|
||||
|
||||
%%--------------------------------------------------------------------
|
||||
%% Tests
|
||||
%%--------------------------------------------------------------------
|
||||
|
||||
t_start_no_session(_Config) ->
|
||||
Opts = #{
|
||||
clientinfo => #{
|
||||
clientid => ?CLIENT_ID,
|
||||
zone => internal
|
||||
},
|
||||
conninfo => #{
|
||||
clientid => ?CLIENT_ID,
|
||||
receive_maximum => 32,
|
||||
expiry_interval => 10000
|
||||
}
|
||||
},
|
||||
?assertMatch(
|
||||
{error, {no_session, _}},
|
||||
emqx_eviction_agent_channel:start_supervised(Opts)
|
||||
).
|
||||
|
||||
t_start_no_expire(_Config) ->
|
||||
erlang:process_flag(trap_exit, true),
|
||||
|
||||
_ = emqtt_connect(?CLIENT_ID, false),
|
||||
|
||||
Opts = #{
|
||||
clientinfo => #{
|
||||
clientid => ?CLIENT_ID,
|
||||
zone => internal
|
||||
},
|
||||
conninfo => #{
|
||||
clientid => ?CLIENT_ID,
|
||||
receive_maximum => 32,
|
||||
expiry_interval => 0
|
||||
}
|
||||
},
|
||||
?assertMatch(
|
||||
{error, {should_be_expired, _}},
|
||||
emqx_eviction_agent_channel:start_supervised(Opts)
|
||||
).
|
||||
|
||||
t_start_infinite_expire(_Config) ->
|
||||
erlang:process_flag(trap_exit, true),
|
||||
|
||||
_ = emqtt_connect(?CLIENT_ID, false),
|
||||
|
||||
Opts = #{
|
||||
clientinfo => #{
|
||||
clientid => ?CLIENT_ID,
|
||||
zone => internal
|
||||
},
|
||||
conninfo => #{
|
||||
clientid => ?CLIENT_ID,
|
||||
receive_maximum => 32,
|
||||
expiry_interval => ?UINT_MAX
|
||||
}
|
||||
},
|
||||
?assertMatch(
|
||||
{ok, _},
|
||||
emqx_eviction_agent_channel:start_supervised(Opts)
|
||||
).
|
||||
|
||||
t_kick(_Config) ->
|
||||
erlang:process_flag(trap_exit, true),
|
||||
|
||||
_ = emqtt_connect(?CLIENT_ID, false),
|
||||
Opts = evict_session_opts(?CLIENT_ID),
|
||||
|
||||
{ok, Pid} = emqx_eviction_agent_channel:start_supervised(Opts),
|
||||
|
||||
?assertEqual(
|
||||
ok,
|
||||
emqx_eviction_agent_channel:call(Pid, kick)
|
||||
).
|
||||
|
||||
t_discard(_Config) ->
|
||||
erlang:process_flag(trap_exit, true),
|
||||
|
||||
_ = emqtt_connect(?CLIENT_ID, false),
|
||||
Opts = evict_session_opts(?CLIENT_ID),
|
||||
|
||||
{ok, Pid} = emqx_eviction_agent_channel:start_supervised(Opts),
|
||||
|
||||
?assertEqual(
|
||||
ok,
|
||||
emqx_eviction_agent_channel:call(Pid, discard)
|
||||
).
|
||||
|
||||
t_stop(_Config) ->
|
||||
erlang:process_flag(trap_exit, true),
|
||||
|
||||
_ = emqtt_connect(?CLIENT_ID, false),
|
||||
Opts = evict_session_opts(?CLIENT_ID),
|
||||
|
||||
{ok, Pid} = emqx_eviction_agent_channel:start_supervised(Opts),
|
||||
|
||||
?assertEqual(
|
||||
ok,
|
||||
emqx_eviction_agent_channel:stop(Pid)
|
||||
).
|
||||
|
||||
t_ignored_calls(_Config) ->
|
||||
erlang:process_flag(trap_exit, true),
|
||||
|
||||
_ = emqtt_connect(?CLIENT_ID, false),
|
||||
Opts = evict_session_opts(?CLIENT_ID),
|
||||
|
||||
{ok, Pid} = emqx_eviction_agent_channel:start_supervised(Opts),
|
||||
|
||||
ok = emqx_eviction_agent_channel:cast(Pid, unknown),
|
||||
Pid ! unknown,
|
||||
|
||||
?assertEqual(
|
||||
[],
|
||||
emqx_eviction_agent_channel:call(Pid, list_acl_cache)
|
||||
),
|
||||
|
||||
?assertEqual(
|
||||
ok,
|
||||
emqx_eviction_agent_channel:call(Pid, {quota, quota})
|
||||
),
|
||||
|
||||
?assertEqual(
|
||||
ignored,
|
||||
emqx_eviction_agent_channel:call(Pid, unknown)
|
||||
).
|
||||
|
||||
t_expire(_Config) ->
|
||||
erlang:process_flag(trap_exit, true),
|
||||
|
||||
_ = emqtt_connect(?CLIENT_ID, false),
|
||||
#{conninfo := ConnInfo} = Opts0 = evict_session_opts(?CLIENT_ID),
|
||||
Opts1 = Opts0#{conninfo => ConnInfo#{expiry_interval => 1}},
|
||||
|
||||
{ok, Pid} = emqx_eviction_agent_channel:start_supervised(Opts1),
|
||||
|
||||
ct:sleep(1500),
|
||||
|
||||
?assertNot(is_process_alive(Pid)).
|
||||
|
||||
t_get_connected_client_count(_Config) ->
|
||||
erlang:process_flag(trap_exit, true),
|
||||
|
||||
_ = emqtt_connect(?CLIENT_ID, false),
|
||||
|
||||
?assertEqual(
|
||||
1,
|
||||
emqx_cm:get_connected_client_count()
|
||||
),
|
||||
|
||||
Opts = evict_session_opts(?CLIENT_ID),
|
||||
|
||||
{ok, _} = emqx_eviction_agent_channel:start_supervised(Opts),
|
||||
|
||||
?assertEqual(
|
||||
0,
|
||||
emqx_cm:get_connected_client_count()
|
||||
).
|
||||
|
||||
t_persistence(_Config) ->
|
||||
erlang:process_flag(trap_exit, true),
|
||||
|
||||
Topic = <<"t1">>,
|
||||
Message = <<"message_to_persist">>,
|
||||
|
||||
{ok, C0} = emqtt_connect(?CLIENT_ID, false),
|
||||
{ok, _, _} = emqtt:subscribe(C0, Topic, 0),
|
||||
|
||||
Opts = evict_session_opts(?CLIENT_ID),
|
||||
{ok, Pid} = emqx_eviction_agent_channel:start_supervised(Opts),
|
||||
|
||||
{ok, C1} = emqtt_connect(),
|
||||
{ok, _} = emqtt:publish(C1, Topic, Message, 1),
|
||||
ok = emqtt:disconnect(C1),
|
||||
|
||||
%% Kill channel so that the session is only persisted
|
||||
ok = emqx_eviction_agent_channel:call(Pid, kick),
|
||||
|
||||
%% Should restore session from persistents storage and receive messages
|
||||
{ok, C2} = emqtt_connect(?CLIENT_ID, false),
|
||||
|
||||
receive
|
||||
{publish, #{
|
||||
payload := Message,
|
||||
topic := Topic
|
||||
}} ->
|
||||
ok
|
||||
after 1000 ->
|
||||
ct:fail("message not received")
|
||||
end,
|
||||
|
||||
ok = emqtt:disconnect(C2).
|
||||
|
||||
%%--------------------------------------------------------------------
|
||||
%% Helpers
|
||||
%%--------------------------------------------------------------------
|
||||
|
||||
evict_session_opts(ClientId) ->
|
||||
maps:with(
|
||||
[conninfo, clientinfo],
|
||||
emqx_cm:get_chan_info(ClientId)
|
||||
).
|
|
@ -0,0 +1,39 @@
|
|||
%%--------------------------------------------------------------------
|
||||
%% Copyright (c) 2022-2023 EMQ Technologies Co., Ltd. All Rights Reserved.
|
||||
%%--------------------------------------------------------------------
|
||||
|
||||
-module(emqx_eviction_agent_cli_SUITE).
|
||||
|
||||
-compile(export_all).
|
||||
-compile(nowarn_export_all).
|
||||
|
||||
-include_lib("eunit/include/eunit.hrl").
|
||||
-include_lib("common_test/include/ct.hrl").
|
||||
|
||||
all() ->
|
||||
emqx_common_test_helpers:all(?MODULE).
|
||||
|
||||
init_per_suite(Config) ->
|
||||
emqx_common_test_helpers:start_apps([emqx_eviction_agent]),
|
||||
Config.
|
||||
|
||||
end_per_suite(Config) ->
|
||||
_ = emqx_eviction_agent:disable(foo),
|
||||
emqx_common_test_helpers:stop_apps([emqx_eviction_agent]),
|
||||
Config.
|
||||
|
||||
%%--------------------------------------------------------------------
|
||||
%% Tests
|
||||
%%--------------------------------------------------------------------
|
||||
|
||||
t_status(_Config) ->
|
||||
%% usage
|
||||
ok = emqx_eviction_agent_cli:cli(["foobar"]),
|
||||
|
||||
%% status
|
||||
ok = emqx_eviction_agent_cli:cli(["status"]),
|
||||
|
||||
ok = emqx_eviction_agent:enable(foo, undefined),
|
||||
|
||||
%% status
|
||||
ok = emqx_eviction_agent_cli:cli(["status"]).
|
|
@ -0,0 +1,134 @@
|
|||
%%--------------------------------------------------------------------
|
||||
%% Copyright (c) 2022-2023 EMQ Technologies Co., Ltd. All Rights Reserved.
|
||||
%%--------------------------------------------------------------------
|
||||
|
||||
-module(emqx_eviction_agent_test_helpers).
|
||||
|
||||
-export([
|
||||
emqtt_connect/0,
|
||||
emqtt_connect/1,
|
||||
emqtt_connect/2,
|
||||
emqtt_connect_many/2,
|
||||
stop_many/1,
|
||||
|
||||
emqtt_try_connect/1,
|
||||
|
||||
start_cluster/2,
|
||||
start_cluster/3,
|
||||
stop_cluster/2,
|
||||
|
||||
case_specific_node_name/2,
|
||||
case_specific_node_name/3,
|
||||
concat_atoms/1
|
||||
]).
|
||||
|
||||
emqtt_connect() ->
|
||||
emqtt_connect(<<"client1">>, true).
|
||||
|
||||
emqtt_connect(ClientId, CleanStart) ->
|
||||
emqtt_connect([{clientid, ClientId}, {clean_start, CleanStart}]).
|
||||
|
||||
emqtt_connect(Opts) ->
|
||||
{ok, C} = emqtt:start_link(
|
||||
Opts ++
|
||||
[
|
||||
{proto_ver, v5},
|
||||
{properties, #{'Session-Expiry-Interval' => 600}}
|
||||
]
|
||||
),
|
||||
case emqtt:connect(C) of
|
||||
{ok, _} -> {ok, C};
|
||||
{error, _} = Error -> Error
|
||||
end.
|
||||
|
||||
emqtt_connect_many(Port, Count) ->
|
||||
lists:map(
|
||||
fun(N) ->
|
||||
NBin = integer_to_binary(N),
|
||||
ClientId = <<"client-", NBin/binary>>,
|
||||
{ok, C} = emqtt_connect([{clientid, ClientId}, {clean_start, false}, {port, Port}]),
|
||||
C
|
||||
end,
|
||||
lists:seq(1, Count)
|
||||
).
|
||||
|
||||
stop_many(Clients) ->
|
||||
lists:foreach(
|
||||
fun(C) ->
|
||||
catch emqtt:disconnect(C)
|
||||
end,
|
||||
Clients
|
||||
),
|
||||
ct:sleep(100).
|
||||
|
||||
emqtt_try_connect(Opts) ->
|
||||
case emqtt_connect(Opts) of
|
||||
{ok, C} ->
|
||||
emqtt:disconnect(C),
|
||||
ok;
|
||||
{error, _} = Error ->
|
||||
Error
|
||||
end.
|
||||
|
||||
start_cluster(NamesWithPorts, Apps) ->
|
||||
start_cluster(NamesWithPorts, Apps, []).
|
||||
|
||||
start_cluster(NamesWithPorts, Apps, Env) ->
|
||||
Specs = lists:map(
|
||||
fun({ShortName, Port}) ->
|
||||
{core, ShortName, #{listener_ports => [{tcp, Port}]}}
|
||||
end,
|
||||
NamesWithPorts
|
||||
),
|
||||
Opts0 = [
|
||||
{env, [{emqx, boot_modules, [broker, listeners]}] ++ Env},
|
||||
{apps, Apps},
|
||||
{conf,
|
||||
[{[listeners, Proto, default, enabled], false} || Proto <- [ssl, ws, wss]] ++
|
||||
[{[rpc, mode], async}]}
|
||||
],
|
||||
Cluster = emqx_common_test_helpers:emqx_cluster(
|
||||
Specs,
|
||||
Opts0
|
||||
),
|
||||
NodesWithPorts = [
|
||||
{
|
||||
emqx_common_test_helpers:start_slave(Name, Opts),
|
||||
proplists:get_value(Name, NamesWithPorts)
|
||||
}
|
||||
|| {Name, Opts} <- Cluster
|
||||
],
|
||||
NodesWithPorts.
|
||||
|
||||
stop_cluster(NodesWithPorts, Apps) ->
|
||||
lists:foreach(
|
||||
fun({Node, _Port}) ->
|
||||
lists:foreach(
|
||||
fun(App) ->
|
||||
rpc:call(Node, application, stop, [App])
|
||||
end,
|
||||
Apps
|
||||
),
|
||||
%% This sleep is just to make logs cleaner
|
||||
ct:sleep(100),
|
||||
_ = rpc:call(Node, emqx_common_test_helpers, stop_apps, []),
|
||||
emqx_common_test_helpers:stop_slave(Node)
|
||||
end,
|
||||
NodesWithPorts
|
||||
).
|
||||
|
||||
case_specific_node_name(Module, Case) ->
|
||||
concat_atoms([Module, '__', Case]).
|
||||
|
||||
case_specific_node_name(Module, Case, Node) ->
|
||||
concat_atoms([Module, '__', Case, '__', Node]).
|
||||
|
||||
concat_atoms(Atoms) ->
|
||||
binary_to_atom(
|
||||
iolist_to_binary(
|
||||
lists:map(
|
||||
fun atom_to_binary/1,
|
||||
Atoms
|
||||
)
|
||||
)
|
||||
).
|
|
@ -155,7 +155,9 @@ basic_reboot_apps() ->
|
|||
CE ++
|
||||
[
|
||||
emqx_s3,
|
||||
emqx_ft
|
||||
emqx_ft,
|
||||
emqx_eviction_agent,
|
||||
emqx_node_rebalance
|
||||
]
|
||||
end.
|
||||
|
||||
|
|
|
@ -0,0 +1,94 @@
|
|||
Business Source License 1.1
|
||||
|
||||
Licensor: Hangzhou EMQ Technologies Co., Ltd.
|
||||
Licensed Work: EMQX Enterprise Edition
|
||||
The Licensed Work is (c) 2023
|
||||
Hangzhou EMQ Technologies Co., Ltd.
|
||||
Additional Use Grant: Students and educators are granted right to copy,
|
||||
modify, and create derivative work for research
|
||||
or education.
|
||||
Change Date: 2027-02-01
|
||||
Change License: Apache License, Version 2.0
|
||||
|
||||
For information about alternative licensing arrangements for the Software,
|
||||
please contact Licensor: https://www.emqx.com/en/contact
|
||||
|
||||
Notice
|
||||
|
||||
The Business Source License (this document, or the “License”) is not an Open
|
||||
Source license. However, the Licensed Work will eventually be made available
|
||||
under an Open Source License, as stated in this License.
|
||||
|
||||
License text copyright (c) 2017 MariaDB Corporation Ab, All Rights Reserved.
|
||||
“Business Source License” is a trademark of MariaDB Corporation Ab.
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
|
||||
Business Source License 1.1
|
||||
|
||||
Terms
|
||||
|
||||
The Licensor hereby grants you the right to copy, modify, create derivative
|
||||
works, redistribute, and make non-production use of the Licensed Work. The
|
||||
Licensor may make an Additional Use Grant, above, permitting limited
|
||||
production use.
|
||||
|
||||
Effective on the Change Date, or the fourth anniversary of the first publicly
|
||||
available distribution of a specific version of the Licensed Work under this
|
||||
License, whichever comes first, the Licensor hereby grants you rights under
|
||||
the terms of the Change License, and the rights granted in the paragraph
|
||||
above terminate.
|
||||
|
||||
If your use of the Licensed Work does not comply with the requirements
|
||||
currently in effect as described in this License, you must purchase a
|
||||
commercial license from the Licensor, its affiliated entities, or authorized
|
||||
resellers, or you must refrain from using the Licensed Work.
|
||||
|
||||
All copies of the original and modified Licensed Work, and derivative works
|
||||
of the Licensed Work, are subject to this License. This License applies
|
||||
separately for each version of the Licensed Work and the Change Date may vary
|
||||
for each version of the Licensed Work released by Licensor.
|
||||
|
||||
You must conspicuously display this License on each original or modified copy
|
||||
of the Licensed Work. If you receive the Licensed Work in original or
|
||||
modified form from a third party, the terms and conditions set forth in this
|
||||
License apply to your use of that work.
|
||||
|
||||
Any use of the Licensed Work in violation of this License will automatically
|
||||
terminate your rights under this License for the current and all other
|
||||
versions of the Licensed Work.
|
||||
|
||||
This License does not grant you any right in any trademark or logo of
|
||||
Licensor or its affiliates (provided that you may use a trademark or logo of
|
||||
Licensor as expressly required by this License).
|
||||
|
||||
TO THE EXTENT PERMITTED BY APPLICABLE LAW, THE LICENSED WORK IS PROVIDED ON
|
||||
AN “AS IS” BASIS. LICENSOR HEREBY DISCLAIMS ALL WARRANTIES AND CONDITIONS,
|
||||
EXPRESS OR IMPLIED, INCLUDING (WITHOUT LIMITATION) WARRANTIES OF
|
||||
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NON-INFRINGEMENT, AND
|
||||
TITLE.
|
||||
|
||||
MariaDB hereby grants you permission to use this License’s text to license
|
||||
your works, and to refer to it using the trademark “Business Source License”,
|
||||
as long as you comply with the Covenants of Licensor below.
|
||||
|
||||
Covenants of Licensor
|
||||
|
||||
In consideration of the right to use this License’s text and the “Business
|
||||
Source License” name and trademark, Licensor covenants to MariaDB, and to all
|
||||
other recipients of the licensed work to be provided by Licensor:
|
||||
|
||||
1. To specify as the Change License the GPL Version 2.0 or any later version,
|
||||
or a license that is compatible with GPL Version 2.0 or a later version,
|
||||
where “compatible” means that software provided under the Change License can
|
||||
be included in a program with software provided under GPL Version 2.0 or a
|
||||
later version. Licensor may specify additional Change Licenses without
|
||||
limitation.
|
||||
|
||||
2. To either: (a) specify an additional grant of rights to use that does not
|
||||
impose any additional restriction on the right granted in this License, as
|
||||
the Additional Use Grant; or (b) insert the text “None”.
|
||||
|
||||
3. To specify a Change Date.
|
||||
|
||||
4. Not to modify this License in any other way.
|
|
@ -0,0 +1,40 @@
|
|||
# EMQX Node Rebalance
|
||||
|
||||
`emqx_node_rebalance` is a part of the node evacuation/node rebalance feature in EMQX.
|
||||
It implements high-level scenarios for node evacuation and rebalancing.
|
||||
|
||||
## Application Responsibilities
|
||||
|
||||
`emqx_node_rebalance` application's core concept is a _rebalance coordinator_.
|
||||
_Rebalance сoordinator_ is an entity that implements the rebalancing logic and orchestrates the rebalancing process.
|
||||
In particular, it:
|
||||
|
||||
* Enables/Disables Eviction Agent on nodes.
|
||||
* Sends connection/session eviction commands to Eviction Agents according to the evacuation logic.
|
||||
|
||||
We have two implementations of the _rebalance coordinator_:
|
||||
* `emqx_node_rebalance` - a coordinator that implements node rebalancing;
|
||||
* `emqx_node_rebalance_evacuation` - a coordinator that implements node evacuation.
|
||||
|
||||
## EMQX Integration
|
||||
|
||||
`emqx_node_rebalance` is a high-level application that is loosely coupled with the rest of the system.
|
||||
It uses Eviction Agent to perform the required operations.
|
||||
|
||||
## User Facing API
|
||||
|
||||
The application provides API (CLI and HTTP) to perform the following operations:
|
||||
* Start/Stop rebalancing across a set of nodes or the whole cluster;
|
||||
* Start/Stop evacuation of a node;
|
||||
* Get the current rebalancing status of a local node.
|
||||
* Get the current rebalancing status of the whole cluster.
|
||||
|
||||
Also, an HTTP endpoint is provided for liveness probes.
|
||||
|
||||
# Documentation
|
||||
|
||||
The rebalancing concept is described in the corresponding [EIP](https://github.com/emqx/eip/blob/main/active/0020-node-rebalance.md).
|
||||
|
||||
# Contributing
|
||||
|
||||
Please see our [contributing.md](../../CONTRIBUTING.md).
|
|
@ -0,0 +1,3 @@
|
|||
##--------------------------------------------------------------------
|
||||
## EMQX Node Rebalance Plugin
|
||||
##--------------------------------------------------------------------
|
|
@ -0,0 +1,21 @@
|
|||
%%--------------------------------------------------------------------
|
||||
%% Copyright (c) 2022-2023 EMQ Technologies Co., Ltd. All Rights Reserved.
|
||||
%%--------------------------------------------------------------------
|
||||
|
||||
-define(DEFAULT_CONN_EVICT_RATE, 500).
|
||||
-define(DEFAULT_SESS_EVICT_RATE, 500).
|
||||
|
||||
%% sec
|
||||
-define(DEFAULT_WAIT_HEALTH_CHECK, 60).
|
||||
%% sec
|
||||
-define(DEFAULT_WAIT_TAKEOVER, 60).
|
||||
|
||||
-define(DEFAULT_ABS_CONN_THRESHOLD, 1000).
|
||||
-define(DEFAULT_ABS_SESS_THRESHOLD, 1000).
|
||||
|
||||
-define(DEFAULT_REL_CONN_THRESHOLD, 1.1).
|
||||
-define(DEFAULT_REL_SESS_THRESHOLD, 1.1).
|
||||
|
||||
-define(EVICT_INTERVAL, 1000).
|
||||
|
||||
-define(EVACUATION_FILENAME, <<".evacuation">>).
|
|
@ -0,0 +1,2 @@
|
|||
{deps, [{emqx, {path, "../../apps/emqx"}}]}.
|
||||
{project_plugins, [erlfmt]}.
|
|
@ -0,0 +1,21 @@
|
|||
{application, emqx_node_rebalance, [
|
||||
{description, "EMQX Node Rebalance"},
|
||||
{vsn, "5.0.0"},
|
||||
{registered, [
|
||||
emqx_node_rebalance_sup,
|
||||
emqx_node_rebalance,
|
||||
emqx_node_rebalance_agent,
|
||||
emqx_node_rebalance_evacuation
|
||||
]},
|
||||
{applications, [
|
||||
kernel,
|
||||
stdlib
|
||||
]},
|
||||
{mod, {emqx_node_rebalance_app, []}},
|
||||
{env, []},
|
||||
{modules, []},
|
||||
{links, [
|
||||
{"Homepage", "https://www.emqx.com/"},
|
||||
{"Github", "https://github.com/emqx"}
|
||||
]}
|
||||
]}.
|
|
@ -0,0 +1,3 @@
|
|||
%% -*- mode: erlang -*-
|
||||
%% Unless you know what you are doing, DO NOT edit manually!!
|
||||
{VSN, [{<<".*">>, []}], [{<<".*">>, []}]}.
|
|
@ -0,0 +1,438 @@
|
|||
%%--------------------------------------------------------------------
|
||||
%% Copyright (c) 2022-2023 EMQ Technologies Co., Ltd. All Rights Reserved.
|
||||
%%--------------------------------------------------------------------
|
||||
|
||||
-module(emqx_node_rebalance).
|
||||
|
||||
-include("emqx_node_rebalance.hrl").
|
||||
|
||||
-include_lib("emqx/include/logger.hrl").
|
||||
-include_lib("emqx/include/types.hrl").
|
||||
-include_lib("snabbkaffe/include/snabbkaffe.hrl").
|
||||
|
||||
-export([
|
||||
start/1,
|
||||
status/0,
|
||||
status/1,
|
||||
stop/0
|
||||
]).
|
||||
|
||||
-export([start_link/0]).
|
||||
|
||||
-behaviour(gen_statem).
|
||||
|
||||
-export([
|
||||
init/1,
|
||||
callback_mode/0,
|
||||
handle_event/4,
|
||||
code_change/4
|
||||
]).
|
||||
|
||||
-export([
|
||||
is_node_available/0,
|
||||
available_nodes/1,
|
||||
connection_count/0,
|
||||
session_count/0,
|
||||
disconnected_session_count/0
|
||||
]).
|
||||
|
||||
-export_type([
|
||||
start_opts/0,
|
||||
start_error/0
|
||||
]).
|
||||
|
||||
%%--------------------------------------------------------------------
|
||||
%% APIs
|
||||
%%--------------------------------------------------------------------
|
||||
|
||||
-type start_opts() :: #{
|
||||
conn_evict_rate => pos_integer(),
|
||||
sess_evict_rate => pos_integer(),
|
||||
wait_health_check => pos_integer(),
|
||||
wait_takeover => pos_integer(),
|
||||
abs_conn_threshold => pos_integer(),
|
||||
rel_conn_threshold => number(),
|
||||
abs_sess_threshold => pos_integer(),
|
||||
rel_sess_threshold => number(),
|
||||
nodes => [node()]
|
||||
}.
|
||||
-type start_error() :: already_started | [{node(), term()}].
|
||||
|
||||
-spec start(start_opts()) -> ok_or_error(start_error()).
|
||||
start(StartOpts) ->
|
||||
Opts = maps:merge(default_opts(), StartOpts),
|
||||
gen_statem:call(?MODULE, {start, Opts}).
|
||||
|
||||
-spec stop() -> ok_or_error(not_started).
|
||||
stop() ->
|
||||
gen_statem:call(?MODULE, stop).
|
||||
|
||||
-spec status() -> disabled | {enabled, map()}.
|
||||
status() ->
|
||||
gen_statem:call(?MODULE, status).
|
||||
|
||||
-spec status(pid()) -> disabled | {enabled, map()}.
|
||||
status(Pid) ->
|
||||
gen_statem:call(Pid, status).
|
||||
|
||||
-spec start_link() -> startlink_ret().
|
||||
start_link() ->
|
||||
gen_statem:start_link({local, ?MODULE}, ?MODULE, [], []).
|
||||
|
||||
-spec available_nodes(list(node())) -> list(node()).
|
||||
available_nodes(Nodes) when is_list(Nodes) ->
|
||||
{Available, _} = emqx_node_rebalance_proto_v1:available_nodes(Nodes),
|
||||
lists:filter(fun is_atom/1, Available).
|
||||
|
||||
%%--------------------------------------------------------------------
|
||||
%% gen_statem callbacks
|
||||
%%--------------------------------------------------------------------
|
||||
|
||||
callback_mode() -> handle_event_function.
|
||||
|
||||
%% states: disabled, wait_health_check, evicting_conns, wait_takeover, evicting_sessions
|
||||
|
||||
init([]) ->
|
||||
?tp(debug, emqx_node_rebalance_started, #{}),
|
||||
{ok, disabled, #{}}.
|
||||
|
||||
%% start
|
||||
handle_event(
|
||||
{call, From},
|
||||
{start, #{wait_health_check := WaitHealthCheck} = Opts},
|
||||
disabled,
|
||||
#{} = Data
|
||||
) ->
|
||||
case enable_rebalance(Data#{opts => Opts}) of
|
||||
{ok, NewData} ->
|
||||
?SLOG(warning, #{msg => "node_rebalance_enabled", opts => Opts}),
|
||||
{next_state, wait_health_check, NewData, [
|
||||
{state_timeout, seconds(WaitHealthCheck), evict_conns},
|
||||
{reply, From, ok}
|
||||
]};
|
||||
{error, Reason} ->
|
||||
?SLOG(warning, #{
|
||||
msg => "node_rebalance_enable_failed",
|
||||
reason => Reason
|
||||
}),
|
||||
{keep_state_and_data, [{reply, From, {error, Reason}}]}
|
||||
end;
|
||||
handle_event({call, From}, {start, _Opts}, _State, #{}) ->
|
||||
{keep_state_and_data, [{reply, From, {error, already_started}}]};
|
||||
%% stop
|
||||
handle_event({call, From}, stop, disabled, #{}) ->
|
||||
{keep_state_and_data, [{reply, From, {error, not_started}}]};
|
||||
handle_event({call, From}, stop, _State, Data) ->
|
||||
ok = disable_rebalance(Data),
|
||||
?SLOG(warning, #{msg => "node_rebalance_stopped"}),
|
||||
{next_state, disabled, deinit(Data), [{reply, From, ok}]};
|
||||
%% status
|
||||
handle_event({call, From}, status, disabled, #{}) ->
|
||||
{keep_state_and_data, [{reply, From, disabled}]};
|
||||
handle_event({call, From}, status, State, Data) ->
|
||||
Stats = get_stats(State, Data),
|
||||
{keep_state_and_data, [
|
||||
{reply, From,
|
||||
{enabled, Stats#{
|
||||
state => State,
|
||||
coordinator_node => node()
|
||||
}}}
|
||||
]};
|
||||
%% conn eviction
|
||||
handle_event(
|
||||
state_timeout,
|
||||
evict_conns,
|
||||
wait_health_check,
|
||||
Data
|
||||
) ->
|
||||
?SLOG(warning, #{msg => "node_rebalance_wait_health_check_over"}),
|
||||
{next_state, evicting_conns, Data, [{state_timeout, 0, evict_conns}]};
|
||||
handle_event(
|
||||
state_timeout,
|
||||
evict_conns,
|
||||
evicting_conns,
|
||||
#{
|
||||
opts := #{
|
||||
wait_takeover := WaitTakeover,
|
||||
evict_interval := EvictInterval
|
||||
}
|
||||
} = Data
|
||||
) ->
|
||||
case evict_conns(Data) of
|
||||
ok ->
|
||||
?SLOG(warning, #{msg => "node_rebalance_evict_conns_over"}),
|
||||
{next_state, wait_takeover, Data, [
|
||||
{state_timeout, seconds(WaitTakeover), evict_sessions}
|
||||
]};
|
||||
{continue, NewData} ->
|
||||
{keep_state, NewData, [{state_timeout, EvictInterval, evict_conns}]}
|
||||
end;
|
||||
handle_event(
|
||||
state_timeout,
|
||||
evict_sessions,
|
||||
wait_takeover,
|
||||
Data
|
||||
) ->
|
||||
?SLOG(warning, #{msg => "node_rebalance_wait_takeover_over"}),
|
||||
{next_state, evicting_sessions, Data, [{state_timeout, 0, evict_sessions}]};
|
||||
handle_event(
|
||||
state_timeout,
|
||||
evict_sessions,
|
||||
evicting_sessions,
|
||||
#{opts := #{evict_interval := EvictInterval}} = Data
|
||||
) ->
|
||||
case evict_sessions(Data) of
|
||||
ok ->
|
||||
?tp(debug, emqx_node_rebalance_evict_sess_over, #{}),
|
||||
?SLOG(warning, #{msg => "node_rebalance_evict_sessions_over"}),
|
||||
ok = disable_rebalance(Data),
|
||||
?SLOG(warning, #{msg => "node_rebalance_finished_successfully"}),
|
||||
{next_state, disabled, deinit(Data)};
|
||||
{continue, NewData} ->
|
||||
{keep_state, NewData, [{state_timeout, EvictInterval, evict_sessions}]}
|
||||
end;
|
||||
handle_event({call, From}, Msg, _State, _Data) ->
|
||||
?SLOG(warning, #{msg => "node_rebalance_unknown_call", call => Msg}),
|
||||
{keep_state_and_data, [{reply, From, ignored}]};
|
||||
handle_event(info, Msg, _State, _Data) ->
|
||||
?SLOG(warning, #{msg => "node_rebalance_unknown_info", info => Msg}),
|
||||
keep_state_and_data;
|
||||
handle_event(cast, Msg, _State, _Data) ->
|
||||
?SLOG(warning, #{msg => "node_rebalance_unknown_cast", cast => Msg}),
|
||||
keep_state_and_data.
|
||||
|
||||
code_change(_Vsn, State, Data, _Extra) ->
|
||||
{ok, State, Data}.
|
||||
|
||||
%%--------------------------------------------------------------------
|
||||
%% internal funs
|
||||
%%--------------------------------------------------------------------
|
||||
|
||||
enable_rebalance(#{opts := Opts} = Data) ->
|
||||
Nodes = maps:get(nodes, Opts),
|
||||
ConnCounts = multicall(Nodes, connection_counts, []),
|
||||
SessCounts = multicall(Nodes, session_counts, []),
|
||||
{_, Counts} = lists:unzip(ConnCounts),
|
||||
Avg = avg(Counts),
|
||||
{DonorCounts, RecipientCounts} = lists:partition(
|
||||
fun({_Node, Count}) ->
|
||||
Count >= Avg
|
||||
end,
|
||||
ConnCounts
|
||||
),
|
||||
?SLOG(warning, #{
|
||||
msg => "node_rebalance_enabling",
|
||||
conn_counts => ConnCounts,
|
||||
donor_counts => DonorCounts,
|
||||
recipient_counts => RecipientCounts
|
||||
}),
|
||||
{DonorNodes, _} = lists:unzip(DonorCounts),
|
||||
{RecipientNodes, _} = lists:unzip(RecipientCounts),
|
||||
case need_rebalance(DonorNodes, RecipientNodes, ConnCounts, SessCounts, Opts) of
|
||||
false ->
|
||||
{error, nothing_to_balance};
|
||||
true ->
|
||||
_ = multicall(DonorNodes, enable_rebalance_agent, [self()]),
|
||||
{ok, Data#{
|
||||
donors => DonorNodes,
|
||||
recipients => RecipientNodes,
|
||||
initial_conn_counts => maps:from_list(ConnCounts),
|
||||
initial_sess_counts => maps:from_list(SessCounts)
|
||||
}}
|
||||
end.
|
||||
|
||||
disable_rebalance(#{donors := DonorNodes}) ->
|
||||
_ = multicall(DonorNodes, disable_rebalance_agent, [self()]),
|
||||
ok.
|
||||
|
||||
evict_conns(#{donors := DonorNodes, recipients := RecipientNodes, opts := Opts} = Data) ->
|
||||
DonorNodeCounts = multicall(DonorNodes, connection_counts, []),
|
||||
{_, DonorCounts} = lists:unzip(DonorNodeCounts),
|
||||
RecipientNodeCounts = multicall(RecipientNodes, connection_counts, []),
|
||||
{_, RecipientCounts} = lists:unzip(RecipientNodeCounts),
|
||||
|
||||
DonorAvg = avg(DonorCounts),
|
||||
RecipientAvg = avg(RecipientCounts),
|
||||
Thresholds = thresholds(conn, Opts),
|
||||
NewData = Data#{
|
||||
donor_conn_avg => DonorAvg,
|
||||
recipient_conn_avg => RecipientAvg,
|
||||
donor_conn_counts => maps:from_list(DonorNodeCounts),
|
||||
recipient_conn_counts => maps:from_list(RecipientNodeCounts)
|
||||
},
|
||||
case within_thresholds(DonorAvg, RecipientAvg, Thresholds) of
|
||||
true ->
|
||||
ok;
|
||||
false ->
|
||||
ConnEvictRate = maps:get(conn_evict_rate, Opts),
|
||||
NodesToEvict = nodes_to_evict(RecipientAvg, DonorNodeCounts),
|
||||
?SLOG(warning, #{
|
||||
msg => "node_rebalance_evict_conns",
|
||||
nodes => NodesToEvict,
|
||||
counts => ConnEvictRate
|
||||
}),
|
||||
_ = multicall(NodesToEvict, evict_connections, [ConnEvictRate]),
|
||||
{continue, NewData}
|
||||
end.
|
||||
|
||||
evict_sessions(#{donors := DonorNodes, recipients := RecipientNodes, opts := Opts} = Data) ->
|
||||
DonorNodeCounts = multicall(DonorNodes, disconnected_session_counts, []),
|
||||
{_, DonorCounts} = lists:unzip(DonorNodeCounts),
|
||||
RecipientNodeCounts = multicall(RecipientNodes, disconnected_session_counts, []),
|
||||
{_, RecipientCounts} = lists:unzip(RecipientNodeCounts),
|
||||
|
||||
DonorAvg = avg(DonorCounts),
|
||||
RecipientAvg = avg(RecipientCounts),
|
||||
Thresholds = thresholds(sess, Opts),
|
||||
NewData = Data#{
|
||||
donor_sess_avg => DonorAvg,
|
||||
recipient_sess_avg => RecipientAvg,
|
||||
donor_sess_counts => maps:from_list(DonorNodeCounts),
|
||||
recipient_sess_counts => maps:from_list(RecipientNodeCounts)
|
||||
},
|
||||
case within_thresholds(DonorAvg, RecipientAvg, Thresholds) of
|
||||
true ->
|
||||
ok;
|
||||
false ->
|
||||
SessEvictRate = maps:get(sess_evict_rate, Opts),
|
||||
NodesToEvict = nodes_to_evict(RecipientAvg, DonorNodeCounts),
|
||||
?SLOG(warning, #{
|
||||
msg => "node_rebalance_evict_sessions",
|
||||
nodes => NodesToEvict,
|
||||
counts => SessEvictRate
|
||||
}),
|
||||
_ = multicall(
|
||||
NodesToEvict,
|
||||
evict_sessions,
|
||||
[SessEvictRate, RecipientNodes, disconnected]
|
||||
),
|
||||
{continue, NewData}
|
||||
end.
|
||||
|
||||
need_rebalance([] = _DonorNodes, _RecipientNodes, _ConnCounts, _SessCounts, _Opts) ->
|
||||
false;
|
||||
need_rebalance(_DonorNodes, [] = _RecipientNodes, _ConnCounts, _SessCounts, _Opts) ->
|
||||
false;
|
||||
need_rebalance(DonorNodes, RecipientNodes, ConnCounts, SessCounts, Opts) ->
|
||||
DonorConnAvg = avg_for_nodes(DonorNodes, ConnCounts),
|
||||
RecipientConnAvg = avg_for_nodes(RecipientNodes, ConnCounts),
|
||||
DonorSessAvg = avg_for_nodes(DonorNodes, SessCounts),
|
||||
RecipientSessAvg = avg_for_nodes(RecipientNodes, SessCounts),
|
||||
Result =
|
||||
(not within_thresholds(DonorConnAvg, RecipientConnAvg, thresholds(conn, Opts))) orelse
|
||||
(not within_thresholds(DonorSessAvg, RecipientSessAvg, thresholds(sess, Opts))),
|
||||
?tp(
|
||||
debug,
|
||||
emqx_node_rebalance_need_rebalance,
|
||||
#{
|
||||
donors => DonorNodes,
|
||||
recipients => RecipientNodes,
|
||||
conn_counts => ConnCounts,
|
||||
sess_counts => SessCounts,
|
||||
opts => Opts,
|
||||
result => Result
|
||||
}
|
||||
),
|
||||
Result.
|
||||
|
||||
avg_for_nodes(Nodes, Counts) ->
|
||||
avg(maps:values(maps:with(Nodes, maps:from_list(Counts)))).
|
||||
|
||||
within_thresholds(Value, GoalValue, {AbsThres, RelThres}) ->
|
||||
(Value =< GoalValue + AbsThres) orelse (Value =< GoalValue * RelThres).
|
||||
|
||||
thresholds(conn, #{abs_conn_threshold := Abs, rel_conn_threshold := Rel}) ->
|
||||
{Abs, Rel};
|
||||
thresholds(sess, #{abs_sess_threshold := Abs, rel_sess_threshold := Rel}) ->
|
||||
{Abs, Rel}.
|
||||
|
||||
nodes_to_evict(Goal, NodeCounts) ->
|
||||
{Nodes, _} = lists:unzip(
|
||||
lists:filter(
|
||||
fun({_Node, Count}) ->
|
||||
Count > Goal
|
||||
end,
|
||||
NodeCounts
|
||||
)
|
||||
),
|
||||
Nodes.
|
||||
|
||||
get_stats(disabled, _Data) -> #{};
|
||||
get_stats(_State, Data) -> Data.
|
||||
|
||||
avg(List) when length(List) >= 1 ->
|
||||
lists:sum(List) / length(List).
|
||||
|
||||
multicall(Nodes, F, A) ->
|
||||
case apply(emqx_node_rebalance_proto_v1, F, [Nodes | A]) of
|
||||
{Results, []} ->
|
||||
case lists:partition(fun is_ok/1, lists:zip(Nodes, Results)) of
|
||||
{OkResults, []} ->
|
||||
[{Node, ok_result(Result)} || {Node, Result} <- OkResults];
|
||||
{_, BadResults} ->
|
||||
error({bad_nodes, BadResults})
|
||||
end;
|
||||
{_, [_BadNode | _] = BadNodes} ->
|
||||
error({bad_nodes, BadNodes})
|
||||
end.
|
||||
|
||||
is_ok({_Node, {ok, _}}) -> true;
|
||||
is_ok({_Node, ok}) -> true;
|
||||
is_ok(_) -> false.
|
||||
|
||||
ok_result({ok, Result}) -> Result;
|
||||
ok_result(ok) -> ok.
|
||||
|
||||
connection_count() ->
|
||||
{ok, emqx_eviction_agent:connection_count()}.
|
||||
|
||||
session_count() ->
|
||||
{ok, emqx_eviction_agent:session_count()}.
|
||||
|
||||
disconnected_session_count() ->
|
||||
{ok, emqx_eviction_agent:session_count(disconnected)}.
|
||||
|
||||
default_opts() ->
|
||||
#{
|
||||
conn_evict_rate => ?DEFAULT_CONN_EVICT_RATE,
|
||||
abs_conn_threshold => ?DEFAULT_ABS_CONN_THRESHOLD,
|
||||
rel_conn_threshold => ?DEFAULT_REL_CONN_THRESHOLD,
|
||||
|
||||
sess_evict_rate => ?DEFAULT_SESS_EVICT_RATE,
|
||||
abs_sess_threshold => ?DEFAULT_ABS_SESS_THRESHOLD,
|
||||
rel_sess_threshold => ?DEFAULT_REL_SESS_THRESHOLD,
|
||||
|
||||
wait_health_check => ?DEFAULT_WAIT_HEALTH_CHECK,
|
||||
wait_takeover => ?DEFAULT_WAIT_TAKEOVER,
|
||||
|
||||
evict_interval => ?EVICT_INTERVAL,
|
||||
|
||||
nodes => all_nodes()
|
||||
}.
|
||||
|
||||
deinit(Data) ->
|
||||
Keys = [
|
||||
recipient_conn_avg,
|
||||
recipient_sess_avg,
|
||||
donor_conn_avg,
|
||||
donor_sess_avg,
|
||||
recipient_conn_counts,
|
||||
recipient_sess_counts,
|
||||
donor_conn_counts,
|
||||
donor_sess_counts,
|
||||
initial_conn_counts,
|
||||
initial_sess_counts,
|
||||
opts
|
||||
],
|
||||
maps:without(Keys, Data).
|
||||
|
||||
is_node_available() ->
|
||||
true = is_pid(whereis(emqx_node_rebalance_agent)),
|
||||
disabled = emqx_eviction_agent:status(),
|
||||
node().
|
||||
|
||||
all_nodes() ->
|
||||
mria_mnesia:running_nodes().
|
||||
|
||||
seconds(Sec) ->
|
||||
round(timer:seconds(Sec)).
|
|
@ -0,0 +1,131 @@
|
|||
%%--------------------------------------------------------------------
|
||||
%% Copyright (c) 2022-2023 EMQ Technologies Co., Ltd. All Rights Reserved.
|
||||
%%--------------------------------------------------------------------
|
||||
|
||||
-module(emqx_node_rebalance_agent).
|
||||
|
||||
-include_lib("emqx/include/emqx_mqtt.hrl").
|
||||
-include_lib("emqx/include/logger.hrl").
|
||||
-include_lib("emqx/include/types.hrl").
|
||||
|
||||
-include_lib("stdlib/include/qlc.hrl").
|
||||
-include_lib("snabbkaffe/include/snabbkaffe.hrl").
|
||||
|
||||
-export([
|
||||
start_link/0,
|
||||
enable/1,
|
||||
disable/1,
|
||||
status/0
|
||||
]).
|
||||
|
||||
-export([
|
||||
init/1,
|
||||
handle_call/3,
|
||||
handle_info/2,
|
||||
handle_cast/2,
|
||||
code_change/3
|
||||
]).
|
||||
|
||||
-define(ENABLE_KIND, emqx_node_rebalance).
|
||||
|
||||
%%--------------------------------------------------------------------
|
||||
%% APIs
|
||||
%%--------------------------------------------------------------------
|
||||
|
||||
-type status() :: {enabled, pid()} | disabled.
|
||||
|
||||
-spec start_link() -> startlink_ret().
|
||||
start_link() ->
|
||||
gen_server:start_link({local, ?MODULE}, ?MODULE, [], []).
|
||||
|
||||
-spec enable(pid()) -> ok_or_error(already_enabled | eviction_agent_busy).
|
||||
enable(CoordinatorPid) ->
|
||||
gen_server:call(?MODULE, {enable, CoordinatorPid}).
|
||||
|
||||
-spec disable(pid()) -> ok_or_error(already_disabled | invalid_coordinator).
|
||||
disable(CoordinatorPid) ->
|
||||
gen_server:call(?MODULE, {disable, CoordinatorPid}).
|
||||
|
||||
-spec status() -> status().
|
||||
status() ->
|
||||
gen_server:call(?MODULE, status).
|
||||
|
||||
%%--------------------------------------------------------------------
|
||||
%% gen_server callbacks
|
||||
%%--------------------------------------------------------------------
|
||||
|
||||
init([]) ->
|
||||
{ok, #{}}.
|
||||
|
||||
handle_call({enable, CoordinatorPid}, _From, St) ->
|
||||
case St of
|
||||
#{coordinator_pid := _Pid} ->
|
||||
{reply, {error, already_enabled}, St};
|
||||
_ ->
|
||||
true = link(CoordinatorPid),
|
||||
EvictionAgentPid = whereis(emqx_eviction_agent),
|
||||
true = link(EvictionAgentPid),
|
||||
case emqx_eviction_agent:enable(?ENABLE_KIND, undefined) of
|
||||
ok ->
|
||||
{reply, ok, #{
|
||||
coordinator_pid => CoordinatorPid,
|
||||
eviction_agent_pid => EvictionAgentPid
|
||||
}};
|
||||
{error, eviction_agent_busy} ->
|
||||
true = unlink(EvictionAgentPid),
|
||||
true = unlink(CoordinatorPid),
|
||||
{reply, {error, eviction_agent_busy}, St}
|
||||
end
|
||||
end;
|
||||
handle_call({disable, CoordinatorPid}, _From, St) ->
|
||||
case St of
|
||||
#{
|
||||
coordinator_pid := CoordinatorPid,
|
||||
eviction_agent_pid := EvictionAgentPid
|
||||
} ->
|
||||
_ = emqx_eviction_agent:disable(?ENABLE_KIND),
|
||||
true = unlink(EvictionAgentPid),
|
||||
true = unlink(CoordinatorPid),
|
||||
NewSt = maps:without(
|
||||
[coordinator_pid, eviction_agent_pid],
|
||||
St
|
||||
),
|
||||
{reply, ok, NewSt};
|
||||
#{coordinator_pid := _CoordinatorPid} ->
|
||||
{reply, {error, invalid_coordinator}, St};
|
||||
#{} ->
|
||||
{reply, {error, already_disabled}, St}
|
||||
end;
|
||||
handle_call(status, _From, St) ->
|
||||
case St of
|
||||
#{coordinator_pid := Pid} ->
|
||||
{reply, {enabled, Pid}, St};
|
||||
_ ->
|
||||
{reply, disabled, St}
|
||||
end;
|
||||
handle_call(Msg, _From, St) ->
|
||||
?SLOG(warning, #{
|
||||
msg => "unknown_call",
|
||||
call => Msg,
|
||||
state => St
|
||||
}),
|
||||
{reply, ignored, St}.
|
||||
|
||||
handle_info(Msg, St) ->
|
||||
?SLOG(warning, #{
|
||||
msg => "unknown_info",
|
||||
info => Msg,
|
||||
state => St
|
||||
}),
|
||||
{noreply, St}.
|
||||
|
||||
handle_cast(Msg, St) ->
|
||||
?SLOG(warning, #{
|
||||
msg => "unknown_cast",
|
||||
cast => Msg,
|
||||
state => St
|
||||
}),
|
||||
{noreply, St}.
|
||||
|
||||
code_change(_Vsn, State, _Extra) ->
|
||||
{ok, State}.
|
|
@ -0,0 +1,733 @@
|
|||
%%--------------------------------------------------------------------
|
||||
%% Copyright (c) 2022-2023 EMQ Technologies Co., Ltd. All Rights Reserved.
|
||||
%%--------------------------------------------------------------------
|
||||
-module(emqx_node_rebalance_api).
|
||||
|
||||
-behaviour(minirest_api).
|
||||
|
||||
-include_lib("typerefl/include/types.hrl").
|
||||
-include_lib("hocon/include/hoconsc.hrl").
|
||||
-include_lib("emqx/include/logger.hrl").
|
||||
-include_lib("emqx_utils/include/emqx_utils_api.hrl").
|
||||
|
||||
%% Swagger specs from hocon schema
|
||||
-export([
|
||||
api_spec/0,
|
||||
paths/0,
|
||||
schema/1,
|
||||
namespace/0
|
||||
]).
|
||||
|
||||
-export([
|
||||
fields/1,
|
||||
roots/0
|
||||
]).
|
||||
|
||||
%% API callbacks
|
||||
-export([
|
||||
'/load_rebalance/status'/2,
|
||||
'/load_rebalance/global_status'/2,
|
||||
'/load_rebalance/availability_check'/2,
|
||||
'/load_rebalance/:node/start'/2,
|
||||
'/load_rebalance/:node/stop'/2,
|
||||
'/load_rebalance/:node/evacuation/start'/2,
|
||||
'/load_rebalance/:node/evacuation/stop'/2
|
||||
]).
|
||||
|
||||
%% Schema examples
|
||||
-export([
|
||||
rebalance_example/0,
|
||||
rebalance_evacuation_example/0,
|
||||
translate/2
|
||||
]).
|
||||
|
||||
-import(hoconsc, [mk/2, ref/1, ref/2]).
|
||||
-import(emqx_dashboard_swagger, [error_codes/2]).
|
||||
|
||||
-define(BAD_REQUEST, 'BAD_REQUEST').
|
||||
-define(NODE_EVACUATING, 'NODE_EVACUATING').
|
||||
-define(RPC_ERROR, 'RPC_ERROR').
|
||||
-define(NOT_FOUND, 'NOT_FOUND').
|
||||
|
||||
%%--------------------------------------------------------------------
|
||||
%% API Spec
|
||||
%%--------------------------------------------------------------------
|
||||
|
||||
namespace() -> "load_rebalance".
|
||||
|
||||
api_spec() ->
|
||||
emqx_dashboard_swagger:spec(?MODULE, #{check_schema => true}).
|
||||
|
||||
paths() ->
|
||||
[
|
||||
"/load_rebalance/status",
|
||||
"/load_rebalance/global_status",
|
||||
"/load_rebalance/availability_check",
|
||||
"/load_rebalance/:node/start",
|
||||
"/load_rebalance/:node/stop",
|
||||
"/load_rebalance/:node/evacuation/start",
|
||||
"/load_rebalance/:node/evacuation/stop"
|
||||
].
|
||||
|
||||
schema("/load_rebalance/status") ->
|
||||
#{
|
||||
'operationId' => '/load_rebalance/status',
|
||||
get => #{
|
||||
tags => [<<"load_rebalance">>],
|
||||
summary => <<"Get rebalance status">>,
|
||||
description => ?DESC("load_rebalance_status"),
|
||||
responses => #{
|
||||
200 => local_status_response_schema()
|
||||
}
|
||||
}
|
||||
};
|
||||
schema("/load_rebalance/global_status") ->
|
||||
#{
|
||||
'operationId' => '/load_rebalance/global_status',
|
||||
get => #{
|
||||
tags => [<<"load_rebalance">>],
|
||||
summary => <<"Get global rebalance status">>,
|
||||
description => ?DESC("load_rebalance_global_status"),
|
||||
responses => #{
|
||||
200 => response_schema()
|
||||
}
|
||||
}
|
||||
};
|
||||
schema("/load_rebalance/availability_check") ->
|
||||
#{
|
||||
'operationId' => '/load_rebalance/availability_check',
|
||||
get => #{
|
||||
tags => [<<"load_rebalance">>],
|
||||
summary => <<"Node rebalance availability check">>,
|
||||
description => ?DESC("load_rebalance_availability_check"),
|
||||
responses => #{
|
||||
200 => response_schema(),
|
||||
503 => error_codes([?NODE_EVACUATING], <<"Node Evacuating">>)
|
||||
}
|
||||
}
|
||||
};
|
||||
schema("/load_rebalance/:node/start") ->
|
||||
#{
|
||||
'operationId' => '/load_rebalance/:node/start',
|
||||
post => #{
|
||||
tags => [<<"load_rebalance">>],
|
||||
summary => <<"Start rebalancing with the node as coordinator">>,
|
||||
description => ?DESC("load_rebalance_start"),
|
||||
parameters => [param_node()],
|
||||
'requestBody' =>
|
||||
emqx_dashboard_swagger:schema_with_examples(
|
||||
ref(rebalance_start),
|
||||
rebalance_example()
|
||||
),
|
||||
responses => #{
|
||||
200 => response_schema(),
|
||||
400 => error_codes([?BAD_REQUEST], <<"Bad Request">>),
|
||||
404 => error_codes([?NOT_FOUND], <<"Not Found">>)
|
||||
}
|
||||
}
|
||||
};
|
||||
schema("/load_rebalance/:node/stop") ->
|
||||
#{
|
||||
'operationId' => '/load_rebalance/:node/stop',
|
||||
post => #{
|
||||
tags => [<<"load_rebalance">>],
|
||||
summary => <<"Stop rebalancing coordinated by the node">>,
|
||||
description => ?DESC("load_rebalance_stop"),
|
||||
parameters => [param_node()],
|
||||
responses => #{
|
||||
200 => response_schema(),
|
||||
400 => error_codes([?BAD_REQUEST], <<"Bad Request">>),
|
||||
404 => error_codes([?NOT_FOUND], <<"Not Found">>)
|
||||
}
|
||||
}
|
||||
};
|
||||
schema("/load_rebalance/:node/evacuation/start") ->
|
||||
#{
|
||||
'operationId' => '/load_rebalance/:node/evacuation/start',
|
||||
post => #{
|
||||
tags => [<<"load_rebalance">>],
|
||||
summary => <<"Start evacuation on a node">>,
|
||||
description => ?DESC("load_rebalance_evacuation_start"),
|
||||
parameters => [param_node()],
|
||||
'requestBody' =>
|
||||
emqx_dashboard_swagger:schema_with_examples(
|
||||
ref(rebalance_evacuation_start),
|
||||
rebalance_evacuation_example()
|
||||
),
|
||||
responses => #{
|
||||
200 => response_schema(),
|
||||
400 => error_codes([?BAD_REQUEST], <<"Bad Request">>),
|
||||
404 => error_codes([?NOT_FOUND], <<"Not Found">>)
|
||||
}
|
||||
}
|
||||
};
|
||||
schema("/load_rebalance/:node/evacuation/stop") ->
|
||||
#{
|
||||
'operationId' => '/load_rebalance/:node/evacuation/stop',
|
||||
post => #{
|
||||
tags => [<<"load_rebalance">>],
|
||||
summary => <<"Stop evacuation on a node">>,
|
||||
description => ?DESC("load_rebalance_evacuation_stop"),
|
||||
parameters => [param_node()],
|
||||
responses => #{
|
||||
200 => response_schema(),
|
||||
400 => error_codes([?BAD_REQUEST], <<"Bad Request">>),
|
||||
404 => error_codes([?NOT_FOUND], <<"Not Found">>)
|
||||
}
|
||||
}
|
||||
}.
|
||||
|
||||
%%--------------------------------------------------------------------
|
||||
%% Handlers
|
||||
%%--------------------------------------------------------------------
|
||||
|
||||
'/load_rebalance/status'(get, #{}) ->
|
||||
case emqx_node_rebalance_status:local_status() of
|
||||
disabled ->
|
||||
{200, #{status => disabled}};
|
||||
{rebalance, Stats} ->
|
||||
{200, format_status(rebalance, Stats)};
|
||||
{evacuation, Stats} ->
|
||||
{200, format_status(evacuation, Stats)}
|
||||
end.
|
||||
|
||||
'/load_rebalance/global_status'(get, #{}) ->
|
||||
#{
|
||||
evacuations := Evacuations,
|
||||
rebalances := Rebalances
|
||||
} = emqx_node_rebalance_status:global_status(),
|
||||
{200, #{
|
||||
evacuations => format_as_map_list(Evacuations),
|
||||
rebalances => format_as_map_list(Rebalances)
|
||||
}}.
|
||||
|
||||
'/load_rebalance/availability_check'(get, #{}) ->
|
||||
case emqx_eviction_agent:status() of
|
||||
disabled ->
|
||||
{200, #{}};
|
||||
{enabled, _Stats} ->
|
||||
error_response(503, ?NODE_EVACUATING, <<"Node Evacuating">>)
|
||||
end.
|
||||
|
||||
'/load_rebalance/:node/start'(post, #{bindings := #{node := NodeBin}, body := Params0}) ->
|
||||
emqx_utils_api:with_node(NodeBin, fun(Node) ->
|
||||
Params1 = translate(rebalance_start, Params0),
|
||||
with_nodes_at_key(nodes, Params1, fun(Params2) ->
|
||||
wrap_rpc(
|
||||
Node, emqx_node_rebalance_api_proto_v1:node_rebalance_start(Node, Params2)
|
||||
)
|
||||
end)
|
||||
end).
|
||||
|
||||
'/load_rebalance/:node/stop'(post, #{bindings := #{node := NodeBin}}) ->
|
||||
emqx_utils_api:with_node(NodeBin, fun(Node) ->
|
||||
wrap_rpc(
|
||||
Node, emqx_node_rebalance_api_proto_v1:node_rebalance_stop(Node)
|
||||
)
|
||||
end).
|
||||
|
||||
'/load_rebalance/:node/evacuation/start'(post, #{
|
||||
bindings := #{node := NodeBin}, body := Params0
|
||||
}) ->
|
||||
emqx_utils_api:with_node(NodeBin, fun(Node) ->
|
||||
Params1 = translate(rebalance_evacuation_start, Params0),
|
||||
with_nodes_at_key(migrate_to, Params1, fun(Params2) ->
|
||||
wrap_rpc(
|
||||
Node,
|
||||
emqx_node_rebalance_api_proto_v1:node_rebalance_evacuation_start(
|
||||
Node, Params2
|
||||
)
|
||||
)
|
||||
end)
|
||||
end).
|
||||
|
||||
'/load_rebalance/:node/evacuation/stop'(post, #{bindings := #{node := NodeBin}}) ->
|
||||
emqx_utils_api:with_node(NodeBin, fun(Node) ->
|
||||
wrap_rpc(
|
||||
Node, emqx_node_rebalance_api_proto_v1:node_rebalance_evacuation_stop(Node)
|
||||
)
|
||||
end).
|
||||
|
||||
%%--------------------------------------------------------------------
|
||||
%% Helpers
|
||||
%%--------------------------------------------------------------------
|
||||
|
||||
wrap_rpc(Node, RPCResult) ->
|
||||
case RPCResult of
|
||||
ok ->
|
||||
{200, #{}};
|
||||
{error, Reason} ->
|
||||
error_response(
|
||||
400, ?BAD_REQUEST, io_lib:format("error on node ~p: ~p", [Node, Reason])
|
||||
);
|
||||
{badrpc, Reason} ->
|
||||
error_response(
|
||||
503, ?RPC_ERROR, io_lib:format("RPC error on node ~p: ~p", [Node, Reason])
|
||||
)
|
||||
end.
|
||||
|
||||
format_status(Process, Stats) ->
|
||||
Stats#{process => Process, status => enabled}.
|
||||
|
||||
validate_nodes(Key, Params) when is_map_key(Key, Params) ->
|
||||
BinNodes = maps:get(Key, Params),
|
||||
{ValidNodes, InvalidNodes} = lists:foldl(
|
||||
fun(BinNode, {Nodes, UnknownNodes}) ->
|
||||
case parse_node(BinNode) of
|
||||
{ok, Node} -> {[Node | Nodes], UnknownNodes};
|
||||
{error, _} -> {Nodes, [BinNode | UnknownNodes]}
|
||||
end
|
||||
end,
|
||||
{[], []},
|
||||
BinNodes
|
||||
),
|
||||
case InvalidNodes of
|
||||
[] ->
|
||||
case emqx_node_rebalance_evacuation:available_nodes(ValidNodes) of
|
||||
ValidNodes -> {ok, Params#{Key => ValidNodes}};
|
||||
OtherNodes -> {error, {unavailable, ValidNodes -- OtherNodes}}
|
||||
end;
|
||||
_ ->
|
||||
{error, {invalid, InvalidNodes}}
|
||||
end;
|
||||
validate_nodes(_Key, Params) ->
|
||||
{ok, Params}.
|
||||
|
||||
with_nodes_at_key(Key, Params, Fun) ->
|
||||
Res = validate_nodes(Key, Params),
|
||||
case Res of
|
||||
{ok, Params1} ->
|
||||
Fun(Params1);
|
||||
{error, {unavailable, Nodes}} ->
|
||||
error_response(400, ?NOT_FOUND, io_lib:format("Nodes unavailable: ~p", [Nodes]));
|
||||
{error, {invalid, Nodes}} ->
|
||||
error_response(400, ?BAD_REQUEST, io_lib:format("Invalid nodes: ~p", [Nodes]))
|
||||
end.
|
||||
|
||||
parse_node(Bin) when is_binary(Bin) ->
|
||||
try
|
||||
{ok, binary_to_existing_atom(Bin)}
|
||||
catch
|
||||
error:badarg ->
|
||||
{error, {unknown, Bin}}
|
||||
end.
|
||||
|
||||
format_as_map_list(List) ->
|
||||
lists:map(
|
||||
fun({Node, Info}) ->
|
||||
Info#{node => Node}
|
||||
end,
|
||||
List
|
||||
).
|
||||
|
||||
error_response(HttpCode, Code, Message) ->
|
||||
{HttpCode, ?ERROR_MSG(Code, Message)}.
|
||||
|
||||
without(Keys, Props) ->
|
||||
lists:filter(
|
||||
fun({Key, _}) ->
|
||||
not lists:member(Key, Keys)
|
||||
end,
|
||||
Props
|
||||
).
|
||||
|
||||
%%------------------------------------------------------------------------------
|
||||
%% Schema
|
||||
%%------------------------------------------------------------------------------
|
||||
|
||||
translate(Ref, Conf) ->
|
||||
Options = #{atom_key => true},
|
||||
#{Ref := TranslatedConf} = hocon_tconf:check_plain(
|
||||
?MODULE, #{atom_to_binary(Ref) => Conf}, Options, [Ref]
|
||||
),
|
||||
TranslatedConf.
|
||||
|
||||
param_node() ->
|
||||
{
|
||||
node,
|
||||
mk(binary(), #{
|
||||
in => path,
|
||||
desc => ?DESC(param_node),
|
||||
required => true
|
||||
})
|
||||
}.
|
||||
|
||||
fields(rebalance_start) ->
|
||||
[
|
||||
{"wait_health_check",
|
||||
mk(
|
||||
emqx_schema:duration_s(),
|
||||
#{
|
||||
desc => ?DESC(wait_health_check),
|
||||
required => false
|
||||
}
|
||||
)},
|
||||
{"conn_evict_rate",
|
||||
mk(
|
||||
pos_integer(),
|
||||
#{
|
||||
desc => ?DESC(conn_evict_rate),
|
||||
required => false
|
||||
}
|
||||
)},
|
||||
{"sess_evict_rate",
|
||||
mk(
|
||||
pos_integer(),
|
||||
#{
|
||||
desc => ?DESC(sess_evict_rate),
|
||||
required => false
|
||||
}
|
||||
)},
|
||||
{"abs_conn_threshold",
|
||||
mk(
|
||||
pos_integer(),
|
||||
#{
|
||||
desc => ?DESC(abs_conn_threshold),
|
||||
required => false
|
||||
}
|
||||
)},
|
||||
{"rel_conn_threshold",
|
||||
mk(
|
||||
number(),
|
||||
#{
|
||||
desc => ?DESC(rel_conn_threshold),
|
||||
required => false,
|
||||
validator => [fun(Value) -> Value > 1.0 end]
|
||||
}
|
||||
)},
|
||||
{"abs_sess_threshold",
|
||||
mk(
|
||||
pos_integer(),
|
||||
#{
|
||||
desc => ?DESC(abs_sess_threshold),
|
||||
required => false
|
||||
}
|
||||
)},
|
||||
{"rel_sess_threshold",
|
||||
mk(
|
||||
number(),
|
||||
#{
|
||||
desc => ?DESC(rel_sess_threshold),
|
||||
required => false,
|
||||
validator => [fun(Value) -> Value > 1.0 end]
|
||||
}
|
||||
)},
|
||||
{"wait_takeover",
|
||||
mk(
|
||||
emqx_schema:duration_s(),
|
||||
#{
|
||||
desc => ?DESC(wait_takeover),
|
||||
required => false
|
||||
}
|
||||
)},
|
||||
{"nodes",
|
||||
mk(
|
||||
list(binary()),
|
||||
#{
|
||||
desc => ?DESC(rebalance_nodes),
|
||||
required => false,
|
||||
validator => [fun(Values) -> length(Values) > 0 end]
|
||||
}
|
||||
)}
|
||||
];
|
||||
fields(rebalance_evacuation_start) ->
|
||||
[
|
||||
{"conn_evict_rate",
|
||||
mk(
|
||||
pos_integer(),
|
||||
#{
|
||||
desc => ?DESC(conn_evict_rate),
|
||||
required => false
|
||||
}
|
||||
)},
|
||||
{"sess_evict_rate",
|
||||
mk(
|
||||
pos_integer(),
|
||||
#{
|
||||
desc => ?DESC(sess_evict_rate),
|
||||
required => false
|
||||
}
|
||||
)},
|
||||
{"redirect_to",
|
||||
mk(
|
||||
binary(),
|
||||
#{
|
||||
desc => ?DESC(redirect_to),
|
||||
required => false
|
||||
}
|
||||
)},
|
||||
{"wait_takeover",
|
||||
mk(
|
||||
pos_integer(),
|
||||
#{
|
||||
desc => ?DESC(wait_takeover),
|
||||
required => false
|
||||
}
|
||||
)},
|
||||
{"migrate_to",
|
||||
mk(
|
||||
nonempty_list(binary()),
|
||||
#{
|
||||
desc => ?DESC(migrate_to),
|
||||
required => false
|
||||
}
|
||||
)}
|
||||
];
|
||||
fields(local_status_disabled) ->
|
||||
[
|
||||
{"status",
|
||||
mk(
|
||||
disabled,
|
||||
#{
|
||||
desc => ?DESC(local_status_enabled),
|
||||
required => true
|
||||
}
|
||||
)}
|
||||
];
|
||||
fields(local_status_enabled) ->
|
||||
[
|
||||
{"status",
|
||||
mk(
|
||||
enabled,
|
||||
#{
|
||||
desc => ?DESC(local_status_enabled),
|
||||
required => true
|
||||
}
|
||||
)},
|
||||
{"process",
|
||||
mk(
|
||||
hoconsc:union([rebalance, evacuation]),
|
||||
#{
|
||||
desc => ?DESC(local_status_process),
|
||||
required => true
|
||||
}
|
||||
)},
|
||||
{"state",
|
||||
mk(
|
||||
atom(),
|
||||
#{
|
||||
desc => ?DESC(local_status_state),
|
||||
required => true
|
||||
}
|
||||
)},
|
||||
{"coordinator_node",
|
||||
mk(
|
||||
binary(),
|
||||
#{
|
||||
desc => ?DESC(local_status_coordinator_node),
|
||||
required => false
|
||||
}
|
||||
)},
|
||||
{"connection_eviction_rate",
|
||||
mk(
|
||||
pos_integer(),
|
||||
#{
|
||||
desc => ?DESC(local_status_connection_eviction_rate),
|
||||
required => false
|
||||
}
|
||||
)},
|
||||
{"session_eviction_rate",
|
||||
mk(
|
||||
pos_integer(),
|
||||
#{
|
||||
desc => ?DESC(local_status_session_eviction_rate),
|
||||
required => false
|
||||
}
|
||||
)},
|
||||
{"connection_goal",
|
||||
mk(
|
||||
non_neg_integer(),
|
||||
#{
|
||||
desc => ?DESC(local_status_connection_goal),
|
||||
required => false
|
||||
}
|
||||
)},
|
||||
{"session_goal",
|
||||
mk(
|
||||
non_neg_integer(),
|
||||
#{
|
||||
desc => ?DESC(local_status_session_goal),
|
||||
required => false
|
||||
}
|
||||
)},
|
||||
{"disconnected_session_goal",
|
||||
mk(
|
||||
non_neg_integer(),
|
||||
#{
|
||||
desc => ?DESC(local_status_disconnected_session_goal),
|
||||
required => false
|
||||
}
|
||||
)},
|
||||
{"session_recipients",
|
||||
mk(
|
||||
list(binary()),
|
||||
#{
|
||||
desc => ?DESC(local_status_session_recipients),
|
||||
required => false
|
||||
}
|
||||
)},
|
||||
{"recipients",
|
||||
mk(
|
||||
list(binary()),
|
||||
#{
|
||||
desc => ?DESC(local_status_recipients),
|
||||
required => false
|
||||
}
|
||||
)},
|
||||
{"stats",
|
||||
mk(
|
||||
ref(status_stats),
|
||||
#{
|
||||
desc => ?DESC(local_status_stats),
|
||||
required => false
|
||||
}
|
||||
)}
|
||||
];
|
||||
fields(status_stats) ->
|
||||
[
|
||||
{"initial_connected",
|
||||
mk(
|
||||
non_neg_integer(),
|
||||
#{
|
||||
desc => ?DESC(status_stats_initial_connected),
|
||||
required => true
|
||||
}
|
||||
)},
|
||||
{"current_connected",
|
||||
mk(
|
||||
non_neg_integer(),
|
||||
#{
|
||||
desc => ?DESC(status_stats_current_connected),
|
||||
required => true
|
||||
}
|
||||
)},
|
||||
{"initial_sessions",
|
||||
mk(
|
||||
non_neg_integer(),
|
||||
#{
|
||||
desc => ?DESC(status_stats_initial_sessions),
|
||||
required => true
|
||||
}
|
||||
)},
|
||||
{"current_sessions",
|
||||
mk(
|
||||
non_neg_integer(),
|
||||
#{
|
||||
desc => ?DESC(status_stats_current_sessions),
|
||||
required => true
|
||||
}
|
||||
)},
|
||||
{"current_disconnected_sessions",
|
||||
mk(
|
||||
non_neg_integer(),
|
||||
#{
|
||||
desc => ?DESC(status_stats_current_disconnected_sessions),
|
||||
required => false
|
||||
}
|
||||
)}
|
||||
];
|
||||
fields(global_coordinator_status) ->
|
||||
without(
|
||||
["status", "process", "session_goal", "session_recipients", "stats"],
|
||||
fields(local_status_enabled)
|
||||
) ++
|
||||
[
|
||||
{"donors",
|
||||
mk(
|
||||
list(binary()),
|
||||
#{
|
||||
desc => ?DESC(coordinator_status_donors),
|
||||
required => false
|
||||
}
|
||||
)},
|
||||
{"donor_conn_avg",
|
||||
mk(
|
||||
non_neg_integer(),
|
||||
#{
|
||||
desc => ?DESC(coordinator_status_donor_conn_avg),
|
||||
required => false
|
||||
}
|
||||
)},
|
||||
{"donor_sess_avg",
|
||||
mk(
|
||||
non_neg_integer(),
|
||||
#{
|
||||
desc => ?DESC(coordinator_status_donor_sess_avg),
|
||||
required => false
|
||||
}
|
||||
)},
|
||||
{"node",
|
||||
mk(
|
||||
binary(),
|
||||
#{
|
||||
desc => ?DESC(coordinator_status_node),
|
||||
required => true
|
||||
}
|
||||
)}
|
||||
];
|
||||
fields(global_evacuation_status) ->
|
||||
without(["status", "process"], fields(local_status_enabled)) ++
|
||||
[
|
||||
{"node",
|
||||
mk(
|
||||
binary(),
|
||||
#{
|
||||
desc => ?DESC(evacuation_status_node),
|
||||
required => true
|
||||
}
|
||||
)}
|
||||
];
|
||||
fields(global_status) ->
|
||||
[
|
||||
{"evacuations",
|
||||
mk(
|
||||
hoconsc:array(ref(global_evacuation_status)),
|
||||
#{
|
||||
desc => ?DESC(global_status_evacuations),
|
||||
required => false
|
||||
}
|
||||
)},
|
||||
{"rebalances",
|
||||
mk(
|
||||
hoconsc:array(ref(global_coordinator_status)),
|
||||
#{
|
||||
desc => ?DESC(global_status_rebalances),
|
||||
required => false
|
||||
}
|
||||
)}
|
||||
].
|
||||
|
||||
rebalance_example() ->
|
||||
#{
|
||||
wait_health_check => 10,
|
||||
conn_evict_rate => 10,
|
||||
sess_evict_rate => 20,
|
||||
abs_conn_threshold => 10,
|
||||
rel_conn_threshold => 1.5,
|
||||
abs_sess_threshold => 10,
|
||||
rel_sess_threshold => 1.5,
|
||||
wait_takeover => 10,
|
||||
nodes => [<<"othernode@127.0.0.1">>]
|
||||
}.
|
||||
|
||||
rebalance_evacuation_example() ->
|
||||
#{
|
||||
conn_evict_rate => 100,
|
||||
sess_evict_rate => 100,
|
||||
redirect_to => <<"othernode:1883">>,
|
||||
wait_takeover => 10,
|
||||
migrate_to => [<<"othernode@127.0.0.1">>]
|
||||
}.
|
||||
|
||||
local_status_response_schema() ->
|
||||
hoconsc:union([ref(local_status_disabled), ref(local_status_enabled)]).
|
||||
|
||||
response_schema() ->
|
||||
mk(
|
||||
map(),
|
||||
#{
|
||||
desc => ?DESC(empty_response)
|
||||
}
|
||||
).
|
||||
|
||||
roots() -> [].
|
|
@ -0,0 +1,22 @@
|
|||
%%--------------------------------------------------------------------
|
||||
%% Copyright (c) 2022-2023 EMQ Technologies Co., Ltd. All Rights Reserved.
|
||||
%%--------------------------------------------------------------------
|
||||
|
||||
-module(emqx_node_rebalance_app).
|
||||
|
||||
-behaviour(application).
|
||||
|
||||
-emqx_plugin(?MODULE).
|
||||
|
||||
-export([
|
||||
start/2,
|
||||
stop/1
|
||||
]).
|
||||
|
||||
start(_Type, _Args) ->
|
||||
{ok, Sup} = emqx_node_rebalance_sup:start_link(),
|
||||
ok = emqx_node_rebalance_cli:load(),
|
||||
{ok, Sup}.
|
||||
|
||||
stop(_State) ->
|
||||
emqx_node_rebalance_cli:unload().
|
|
@ -0,0 +1,305 @@
|
|||
%%--------------------------------------------------------------------
|
||||
%% Copyright (c) 2022-2023 EMQ Technologies Co., Ltd. All Rights Reserved.
|
||||
%%--------------------------------------------------------------------
|
||||
|
||||
-module(emqx_node_rebalance_cli).
|
||||
|
||||
%% APIs
|
||||
-export([
|
||||
load/0,
|
||||
unload/0,
|
||||
cli/1
|
||||
]).
|
||||
|
||||
load() ->
|
||||
emqx_ctl:register_command(rebalance, {?MODULE, cli}, []).
|
||||
|
||||
unload() ->
|
||||
emqx_ctl:unregister_command(rebalance).
|
||||
|
||||
cli(["start" | StartArgs]) ->
|
||||
case start_args(StartArgs) of
|
||||
{evacuation, Opts} ->
|
||||
case emqx_node_rebalance_evacuation:status() of
|
||||
disabled ->
|
||||
ok = emqx_node_rebalance_evacuation:start(Opts),
|
||||
emqx_ctl:print("Rebalance(evacuation) started~n"),
|
||||
true;
|
||||
{enabled, _} ->
|
||||
emqx_ctl:print("Rebalance is already enabled~n"),
|
||||
false
|
||||
end;
|
||||
{rebalance, Opts} ->
|
||||
case emqx_node_rebalance:start(Opts) of
|
||||
ok ->
|
||||
emqx_ctl:print("Rebalance started~n"),
|
||||
true;
|
||||
{error, Reason} ->
|
||||
emqx_ctl:print("Rebalance start error: ~p~n", [Reason]),
|
||||
false
|
||||
end;
|
||||
{error, Error} ->
|
||||
emqx_ctl:print("Rebalance start error: ~s~n", [Error]),
|
||||
false
|
||||
end;
|
||||
cli(["node-status", NodeStr]) ->
|
||||
case emqx_utils:safe_to_existing_atom(NodeStr, utf8) of
|
||||
{ok, Node} ->
|
||||
node_status(emqx_node_rebalance_status:local_status(Node));
|
||||
{error, _} ->
|
||||
emqx_ctl:print("Node status error: invalid node~n"),
|
||||
false
|
||||
end;
|
||||
cli(["node-status"]) ->
|
||||
node_status(emqx_node_rebalance_status:local_status());
|
||||
cli(["status"]) ->
|
||||
#{
|
||||
evacuations := Evacuations,
|
||||
rebalances := Rebalances
|
||||
} = emqx_node_rebalance_status:global_status(),
|
||||
lists:foreach(
|
||||
fun({Node, Status}) ->
|
||||
emqx_ctl:print(
|
||||
"--------------------------------------------------------------------~n"
|
||||
),
|
||||
emqx_ctl:print(
|
||||
"Node ~p: evacuation~n~s",
|
||||
[Node, emqx_node_rebalance_status:format_local_status(Status)]
|
||||
)
|
||||
end,
|
||||
Evacuations
|
||||
),
|
||||
lists:foreach(
|
||||
fun({Node, Status}) ->
|
||||
emqx_ctl:print(
|
||||
"--------------------------------------------------------------------~n"
|
||||
),
|
||||
emqx_ctl:print(
|
||||
"Node ~p: rebalance coordinator~n~s",
|
||||
[Node, emqx_node_rebalance_status:format_coordinator_status(Status)]
|
||||
)
|
||||
end,
|
||||
Rebalances
|
||||
);
|
||||
cli(["stop"]) ->
|
||||
case emqx_node_rebalance_evacuation:status() of
|
||||
{enabled, _} ->
|
||||
ok = emqx_node_rebalance_evacuation:stop(),
|
||||
emqx_ctl:print("Rebalance(evacuation) stopped~n"),
|
||||
true;
|
||||
disabled ->
|
||||
case emqx_node_rebalance:status() of
|
||||
{enabled, _} ->
|
||||
ok = emqx_node_rebalance:stop(),
|
||||
emqx_ctl:print("Rebalance stopped~n"),
|
||||
true;
|
||||
disabled ->
|
||||
emqx_ctl:print("Rebalance is already disabled~n"),
|
||||
false
|
||||
end
|
||||
end;
|
||||
cli(_) ->
|
||||
emqx_ctl:usage(
|
||||
[
|
||||
{
|
||||
"rebalance start --evacuation \\\n"
|
||||
" [--redirect-to \"Host1:Port1 Host2:Port2 ...\"] \\\n"
|
||||
" [--conn-evict-rate CountPerSec] \\\n"
|
||||
" [--migrate-to \"node1@host1 node2@host2 ...\"] \\\n"
|
||||
" [--wait-takeover Secs] \\\n"
|
||||
" [--sess-evict-rate CountPerSec]",
|
||||
"Start current node evacuation with optional server redirect to the specified servers"
|
||||
},
|
||||
|
||||
{
|
||||
"rebalance start \\\n"
|
||||
" [--nodes \"node1@host1 node2@host2\"] \\\n"
|
||||
" [--wait-health-check Secs] \\\n"
|
||||
" [--conn-evict-rate ConnPerSec] \\\n"
|
||||
" [--abs-conn-threshold Count] \\\n"
|
||||
" [--rel-conn-threshold Fraction] \\\n"
|
||||
" [--conn-evict-rate ConnPerSec] \\\n"
|
||||
" [--wait-takeover Secs] \\\n"
|
||||
" [--sess-evict-rate CountPerSec] \\\n"
|
||||
" [--abs-sess-threshold Count] \\\n"
|
||||
" [--rel-sess-threshold Fraction]",
|
||||
"Start rebalance on the specified nodes using the current node as the coordinator"
|
||||
},
|
||||
|
||||
{"rebalance node-status", "Get current node rebalance status"},
|
||||
|
||||
{"rebalance node-status \"node1@host1\"", "Get remote node rebalance status"},
|
||||
|
||||
{"rebalance status",
|
||||
"Get statuses of all current rebalance/evacuation processes across the cluster"},
|
||||
|
||||
{"rebalance stop", "Stop node rebalance"}
|
||||
]
|
||||
).
|
||||
|
||||
node_status(NodeStatus) ->
|
||||
case NodeStatus of
|
||||
{Process, Status} when Process =:= evacuation orelse Process =:= rebalance ->
|
||||
emqx_ctl:print(
|
||||
"Rebalance type: ~p~n~s~n",
|
||||
[Process, emqx_node_rebalance_status:format_local_status(Status)]
|
||||
);
|
||||
disabled ->
|
||||
emqx_ctl:print("Rebalance disabled~n");
|
||||
Other ->
|
||||
emqx_ctl:print("Error detecting rebalance status: ~p~n", [Other])
|
||||
end.
|
||||
|
||||
start_args(Args) ->
|
||||
case collect_args(Args, #{}) of
|
||||
{ok, #{"--evacuation" := true} = Collected} ->
|
||||
case validate_evacuation(maps:to_list(Collected), #{}) of
|
||||
{ok, Validated} ->
|
||||
{evacuation, Validated};
|
||||
{error, _} = Error ->
|
||||
Error
|
||||
end;
|
||||
{ok, #{} = Collected} ->
|
||||
case validate_rebalance(maps:to_list(Collected), #{}) of
|
||||
{ok, Validated} ->
|
||||
{rebalance, Validated};
|
||||
{error, _} = Error ->
|
||||
Error
|
||||
end;
|
||||
{error, _} = Error ->
|
||||
Error
|
||||
end.
|
||||
|
||||
collect_args([], Map) ->
|
||||
{ok, Map};
|
||||
%% evacuation
|
||||
collect_args(["--evacuation" | Args], Map) ->
|
||||
collect_args(Args, Map#{"--evacuation" => true});
|
||||
collect_args(["--redirect-to", ServerReference | Args], Map) ->
|
||||
collect_args(Args, Map#{"--redirect-to" => ServerReference});
|
||||
collect_args(["--migrate-to", MigrateTo | Args], Map) ->
|
||||
collect_args(Args, Map#{"--migrate-to" => MigrateTo});
|
||||
%% rebalance
|
||||
collect_args(["--nodes", Nodes | Args], Map) ->
|
||||
collect_args(Args, Map#{"--nodes" => Nodes});
|
||||
collect_args(["--wait-health-check", WaitHealthCheck | Args], Map) ->
|
||||
collect_args(Args, Map#{"--wait-health-check" => WaitHealthCheck});
|
||||
collect_args(["--abs-conn-threshold", AbsConnThres | Args], Map) ->
|
||||
collect_args(Args, Map#{"--abs-conn-threshold" => AbsConnThres});
|
||||
collect_args(["--rel-conn-threshold", RelConnThres | Args], Map) ->
|
||||
collect_args(Args, Map#{"--rel-conn-threshold" => RelConnThres});
|
||||
collect_args(["--abs-sess-threshold", AbsSessThres | Args], Map) ->
|
||||
collect_args(Args, Map#{"--abs-sess-threshold" => AbsSessThres});
|
||||
collect_args(["--rel-sess-threshold", RelSessThres | Args], Map) ->
|
||||
collect_args(Args, Map#{"--rel-sess-threshold" => RelSessThres});
|
||||
%% common
|
||||
collect_args(["--conn-evict-rate", ConnEvictRate | Args], Map) ->
|
||||
collect_args(Args, Map#{"--conn-evict-rate" => ConnEvictRate});
|
||||
collect_args(["--wait-takeover", WaitTakeover | Args], Map) ->
|
||||
collect_args(Args, Map#{"--wait-takeover" => WaitTakeover});
|
||||
collect_args(["--sess-evict-rate", SessEvictRate | Args], Map) ->
|
||||
collect_args(Args, Map#{"--sess-evict-rate" => SessEvictRate});
|
||||
%% fallback
|
||||
collect_args(Args, _Map) ->
|
||||
{error, io_lib:format("unknown arguments: ~p", [Args])}.
|
||||
|
||||
validate_evacuation([], Map) ->
|
||||
{ok, Map};
|
||||
validate_evacuation([{"--evacuation", _} | Rest], Map) ->
|
||||
validate_evacuation(Rest, Map);
|
||||
validate_evacuation([{"--redirect-to", ServerReference} | Rest], Map) ->
|
||||
validate_evacuation(Rest, Map#{server_reference => list_to_binary(ServerReference)});
|
||||
validate_evacuation([{"--conn-evict-rate", _} | _] = Opts, Map) ->
|
||||
validate_pos_int(conn_evict_rate, Opts, Map, fun validate_evacuation/2);
|
||||
validate_evacuation([{"--sess-evict-rate", _} | _] = Opts, Map) ->
|
||||
validate_pos_int(sess_evict_rate, Opts, Map, fun validate_evacuation/2);
|
||||
validate_evacuation([{"--wait-takeover", _} | _] = Opts, Map) ->
|
||||
validate_pos_int(wait_takeover, Opts, Map, fun validate_evacuation/2);
|
||||
validate_evacuation([{"--migrate-to", MigrateTo} | Rest], Map) ->
|
||||
case strings_to_atoms(string:tokens(MigrateTo, ", ")) of
|
||||
{_, Invalid} when Invalid =/= [] ->
|
||||
{error, io_lib:format("invalid --migrate-to, invalid nodes: ~p", [Invalid])};
|
||||
{Nodes, []} ->
|
||||
case emqx_node_rebalance_evacuation:available_nodes(Nodes) of
|
||||
[] ->
|
||||
{error, "invalid --migrate-to, no nodes"};
|
||||
Nodes ->
|
||||
validate_evacuation(Rest, Map#{migrate_to => Nodes});
|
||||
OtherNodes ->
|
||||
{error,
|
||||
io_lib:format(
|
||||
"invalid --migrate-to, unavailable nodes: ~p",
|
||||
[Nodes -- OtherNodes]
|
||||
)}
|
||||
end
|
||||
end;
|
||||
validate_evacuation(Rest, _Map) ->
|
||||
{error, io_lib:format("unknown evacuation arguments: ~p", [Rest])}.
|
||||
|
||||
validate_rebalance([], Map) ->
|
||||
{ok, Map};
|
||||
validate_rebalance([{"--wait-health-check", _} | _] = Opts, Map) ->
|
||||
validate_pos_int(wait_health_check, Opts, Map, fun validate_rebalance/2);
|
||||
validate_rebalance([{"--conn-evict-rate", _} | _] = Opts, Map) ->
|
||||
validate_pos_int(conn_evict_rate, Opts, Map, fun validate_rebalance/2);
|
||||
validate_rebalance([{"--sess-evict-rate", _} | _] = Opts, Map) ->
|
||||
validate_pos_int(sess_evict_rate, Opts, Map, fun validate_rebalance/2);
|
||||
validate_rebalance([{"--abs-conn-threshold", _} | _] = Opts, Map) ->
|
||||
validate_pos_int(abs_conn_threshold, Opts, Map, fun validate_rebalance/2);
|
||||
validate_rebalance([{"--rel-conn-threshold", _} | _] = Opts, Map) ->
|
||||
validate_fraction(rel_conn_threshold, Opts, Map, fun validate_rebalance/2);
|
||||
validate_rebalance([{"--abs-sess-threshold", _} | _] = Opts, Map) ->
|
||||
validate_pos_int(abs_sess_threshold, Opts, Map, fun validate_rebalance/2);
|
||||
validate_rebalance([{"--rel-sess-threshold", _} | _] = Opts, Map) ->
|
||||
validate_fraction(rel_sess_threshold, Opts, Map, fun validate_rebalance/2);
|
||||
validate_rebalance([{"--wait-takeover", _} | _] = Opts, Map) ->
|
||||
validate_pos_int(wait_takeover, Opts, Map, fun validate_rebalance/2);
|
||||
validate_rebalance([{"--nodes", NodeStr} | Rest], Map) ->
|
||||
case strings_to_atoms(string:tokens(NodeStr, ", ")) of
|
||||
{_, Invalid} when Invalid =/= [] ->
|
||||
{error, io_lib:format("invalid --nodes, invalid nodes: ~p", [Invalid])};
|
||||
{Nodes, []} ->
|
||||
case emqx_node_rebalance:available_nodes(Nodes) of
|
||||
[] ->
|
||||
{error, "invalid --nodes, no nodes"};
|
||||
Nodes ->
|
||||
validate_rebalance(Rest, Map#{nodes => Nodes});
|
||||
OtherNodes ->
|
||||
{error,
|
||||
io_lib:format(
|
||||
"invalid --nodes, unavailable nodes: ~p",
|
||||
[Nodes -- OtherNodes]
|
||||
)}
|
||||
end
|
||||
end;
|
||||
validate_rebalance(Rest, _Map) ->
|
||||
{error, io_lib:format("unknown rebalance arguments: ~p", [Rest])}.
|
||||
|
||||
validate_fraction(Name, [{OptionName, Value} | Rest], Map, Next) ->
|
||||
case string:to_float(Value) of
|
||||
{Num, ""} when Num > 1.0 ->
|
||||
Next(Rest, Map#{Name => Num});
|
||||
_ ->
|
||||
{error, "invalid " ++ OptionName ++ " value"}
|
||||
end.
|
||||
|
||||
validate_pos_int(Name, [{OptionName, Value} | Rest], Map, Next) ->
|
||||
case string:to_integer(Value) of
|
||||
{Int, ""} when Int > 0 ->
|
||||
Next(Rest, Map#{Name => Int});
|
||||
_ ->
|
||||
{error, "invalid " ++ OptionName ++ " value"}
|
||||
end.
|
||||
|
||||
strings_to_atoms(Strings) ->
|
||||
strings_to_atoms(Strings, [], []).
|
||||
|
||||
strings_to_atoms([], Atoms, Invalid) ->
|
||||
{lists:reverse(Atoms), lists:reverse(Invalid)};
|
||||
strings_to_atoms([Str | Rest], Atoms, Invalid) ->
|
||||
case emqx_utils:safe_to_existing_atom(Str, utf8) of
|
||||
{ok, Atom} ->
|
||||
strings_to_atoms(Rest, [Atom | Atoms], Invalid);
|
||||
{error, _} ->
|
||||
strings_to_atoms(Rest, Atoms, [Str | Invalid])
|
||||
end.
|
|
@ -0,0 +1,308 @@
|
|||
%%--------------------------------------------------------------------
|
||||
%% Copyright (c) 2022-2023 EMQ Technologies Co., Ltd. All Rights Reserved.
|
||||
%%--------------------------------------------------------------------
|
||||
|
||||
-module(emqx_node_rebalance_evacuation).
|
||||
|
||||
-include("emqx_node_rebalance.hrl").
|
||||
|
||||
-include_lib("emqx/include/logger.hrl").
|
||||
-include_lib("emqx/include/types.hrl").
|
||||
-include_lib("snabbkaffe/include/snabbkaffe.hrl").
|
||||
|
||||
-export([
|
||||
start/1,
|
||||
status/0,
|
||||
stop/0
|
||||
]).
|
||||
|
||||
-export([start_link/0]).
|
||||
|
||||
-behaviour(gen_statem).
|
||||
|
||||
-export([
|
||||
init/1,
|
||||
callback_mode/0,
|
||||
handle_event/4,
|
||||
code_change/4
|
||||
]).
|
||||
|
||||
-export([
|
||||
is_node_available/0,
|
||||
available_nodes/1
|
||||
]).
|
||||
|
||||
-export_type([
|
||||
start_opts/0,
|
||||
start_error/0
|
||||
]).
|
||||
|
||||
-ifdef(TEST).
|
||||
-export([migrate_to/1]).
|
||||
-endif.
|
||||
|
||||
%%--------------------------------------------------------------------
|
||||
%% APIs
|
||||
%%--------------------------------------------------------------------
|
||||
|
||||
-define(EVICT_INTERVAL_NO_NODES, 30000).
|
||||
|
||||
-type migrate_to() :: [node()] | undefined.
|
||||
|
||||
-type start_opts() :: #{
|
||||
server_reference => emqx_eviction_agent:server_reference(),
|
||||
conn_evict_rate => pos_integer(),
|
||||
sess_evict_rate => pos_integer(),
|
||||
wait_takeover => pos_integer(),
|
||||
migrate_to => migrate_to()
|
||||
}.
|
||||
-type start_error() :: already_started | eviction_agent_busy.
|
||||
-type stats() :: #{
|
||||
initial_conns := non_neg_integer(),
|
||||
initial_sessions := non_neg_integer(),
|
||||
current_conns := non_neg_integer(),
|
||||
current_sessions := non_neg_integer(),
|
||||
conn_evict_rate := pos_integer(),
|
||||
sess_evict_rate := pos_integer(),
|
||||
server_reference := emqx_eviction_agent:server_reference(),
|
||||
migrate_to := migrate_to()
|
||||
}.
|
||||
-type status() :: {enabled, stats()} | disabled.
|
||||
|
||||
-spec start(start_opts()) -> ok_or_error(start_error()).
|
||||
start(StartOpts) ->
|
||||
Opts = maps:merge(default_opts(), StartOpts),
|
||||
gen_statem:call(?MODULE, {start, Opts}).
|
||||
|
||||
-spec stop() -> ok_or_error(not_started).
|
||||
stop() ->
|
||||
gen_statem:call(?MODULE, stop).
|
||||
|
||||
-spec status() -> status().
|
||||
status() ->
|
||||
gen_statem:call(?MODULE, status).
|
||||
|
||||
-spec start_link() -> startlink_ret().
|
||||
start_link() ->
|
||||
gen_statem:start_link({local, ?MODULE}, ?MODULE, [], []).
|
||||
|
||||
-spec available_nodes(list(node())) -> list(node()).
|
||||
available_nodes(Nodes) when is_list(Nodes) ->
|
||||
{Available, _} = emqx_node_rebalance_evacuation_proto_v1:available_nodes(Nodes),
|
||||
lists:filter(fun is_atom/1, Available).
|
||||
|
||||
%%--------------------------------------------------------------------
|
||||
%% gen_statem callbacks
|
||||
%%--------------------------------------------------------------------
|
||||
|
||||
callback_mode() -> handle_event_function.
|
||||
|
||||
%% states: disabled, evicting_conns, waiting_takeover, evicting_sessions, prohibiting
|
||||
|
||||
init([]) ->
|
||||
case emqx_node_rebalance_evacuation_persist:read(default_opts()) of
|
||||
{ok, #{server_reference := ServerReference} = Opts} ->
|
||||
?SLOG(warning, #{msg => "restoring_evacuation_state", opts => Opts}),
|
||||
case emqx_eviction_agent:enable(?MODULE, ServerReference) of
|
||||
ok ->
|
||||
Data = init_data(#{}, Opts),
|
||||
ok = warn_enabled(),
|
||||
{ok, evicting_conns, Data, [{state_timeout, 0, evict_conns}]};
|
||||
{error, eviction_agent_busy} ->
|
||||
emqx_node_rebalance_evacuation_persist:clear(),
|
||||
{ok, disabled, #{}}
|
||||
end;
|
||||
none ->
|
||||
{ok, disabled, #{}}
|
||||
end.
|
||||
|
||||
%% start
|
||||
handle_event(
|
||||
{call, From},
|
||||
{start, #{server_reference := ServerReference} = Opts},
|
||||
disabled,
|
||||
#{} = Data
|
||||
) ->
|
||||
case emqx_eviction_agent:enable(?MODULE, ServerReference) of
|
||||
ok ->
|
||||
NewData = init_data(Data, Opts),
|
||||
ok = emqx_node_rebalance_evacuation_persist:save(Opts),
|
||||
?SLOG(warning, #{
|
||||
msg => "node_evacuation_started",
|
||||
opts => Opts
|
||||
}),
|
||||
{next_state, evicting_conns, NewData, [
|
||||
{state_timeout, 0, evict_conns},
|
||||
{reply, From, ok}
|
||||
]};
|
||||
{error, eviction_agent_busy} ->
|
||||
{keep_state_and_data, [{reply, From, {error, eviction_agent_busy}}]}
|
||||
end;
|
||||
handle_event({call, From}, {start, _Opts}, _State, #{}) ->
|
||||
{keep_state_and_data, [{reply, From, {error, already_started}}]};
|
||||
%% stop
|
||||
handle_event({call, From}, stop, disabled, #{}) ->
|
||||
{keep_state_and_data, [{reply, From, {error, not_started}}]};
|
||||
handle_event({call, From}, stop, _State, Data) ->
|
||||
ok = emqx_node_rebalance_evacuation_persist:clear(),
|
||||
_ = emqx_eviction_agent:disable(?MODULE),
|
||||
?SLOG(warning, #{msg => "node_evacuation_stopped"}),
|
||||
{next_state, disabled, deinit(Data), [{reply, From, ok}]};
|
||||
%% status
|
||||
handle_event({call, From}, status, disabled, #{}) ->
|
||||
{keep_state_and_data, [{reply, From, disabled}]};
|
||||
handle_event({call, From}, status, State, #{migrate_to := MigrateTo} = Data) ->
|
||||
Stats = maps:with(
|
||||
[
|
||||
initial_conns,
|
||||
current_conns,
|
||||
initial_sessions,
|
||||
current_sessions,
|
||||
server_reference,
|
||||
conn_evict_rate,
|
||||
sess_evict_rate
|
||||
],
|
||||
Data
|
||||
),
|
||||
{keep_state_and_data, [
|
||||
{reply, From, {enabled, Stats#{state => State, migrate_to => migrate_to(MigrateTo)}}}
|
||||
]};
|
||||
%% conn eviction
|
||||
handle_event(
|
||||
state_timeout,
|
||||
evict_conns,
|
||||
evicting_conns,
|
||||
#{
|
||||
conn_evict_rate := ConnEvictRate,
|
||||
wait_takeover := WaitTakeover
|
||||
} = Data
|
||||
) ->
|
||||
case emqx_eviction_agent:status() of
|
||||
{enabled, #{connections := Conns}} when Conns > 0 ->
|
||||
ok = emqx_eviction_agent:evict_connections(ConnEvictRate),
|
||||
?tp(debug, node_evacuation_evict_conn, #{conn_evict_rate => ConnEvictRate}),
|
||||
?SLOG(
|
||||
warning,
|
||||
#{
|
||||
msg => "node_evacuation_evict_conns",
|
||||
count => Conns,
|
||||
conn_evict_rate => ConnEvictRate
|
||||
}
|
||||
),
|
||||
NewData = Data#{current_conns => Conns},
|
||||
{keep_state, NewData, [{state_timeout, ?EVICT_INTERVAL, evict_conns}]};
|
||||
{enabled, #{connections := 0}} ->
|
||||
NewData = Data#{current_conns => 0},
|
||||
?SLOG(warning, #{msg => "node_evacuation_evict_conns_done"}),
|
||||
{next_state, waiting_takeover, NewData, [
|
||||
{state_timeout, timer:seconds(WaitTakeover), evict_sessions}
|
||||
]}
|
||||
end;
|
||||
handle_event(
|
||||
state_timeout,
|
||||
evict_sessions,
|
||||
waiting_takeover,
|
||||
Data
|
||||
) ->
|
||||
?SLOG(warning, #{msg => "node_evacuation_waiting_takeover_done"}),
|
||||
{next_state, evicting_sessions, Data, [{state_timeout, 0, evict_sessions}]};
|
||||
%% session eviction
|
||||
handle_event(
|
||||
state_timeout,
|
||||
evict_sessions,
|
||||
evicting_sessions,
|
||||
#{
|
||||
sess_evict_rate := SessEvictRate,
|
||||
migrate_to := MigrateTo,
|
||||
current_sessions := CurrSessCount
|
||||
} = Data
|
||||
) ->
|
||||
case emqx_eviction_agent:status() of
|
||||
{enabled, #{sessions := SessCount}} when SessCount > 0 ->
|
||||
case migrate_to(MigrateTo) of
|
||||
[] ->
|
||||
?SLOG(warning, #{
|
||||
msg => "no_nodes_to_evacuate_sessions", session_count => CurrSessCount
|
||||
}),
|
||||
{keep_state_and_data, [
|
||||
{state_timeout, ?EVICT_INTERVAL_NO_NODES, evict_sessions}
|
||||
]};
|
||||
Nodes ->
|
||||
ok = emqx_eviction_agent:evict_sessions(SessEvictRate, Nodes),
|
||||
?SLOG(
|
||||
warning,
|
||||
#{
|
||||
msg => "node_evacuation_evict_sessions",
|
||||
session_count => SessCount,
|
||||
session_evict_rate => SessEvictRate,
|
||||
target_nodes => Nodes
|
||||
}
|
||||
),
|
||||
NewData = Data#{current_sessions => SessCount},
|
||||
{keep_state, NewData, [{state_timeout, ?EVICT_INTERVAL, evict_sessions}]}
|
||||
end;
|
||||
{enabled, #{sessions := 0}} ->
|
||||
?tp(debug, node_evacuation_evict_sess_over, #{}),
|
||||
?SLOG(warning, #{msg => "node_evacuation_evict_sessions_over"}),
|
||||
NewData = Data#{current_sessions => 0},
|
||||
{next_state, prohibiting, NewData}
|
||||
end;
|
||||
handle_event({call, From}, Msg, State, Data) ->
|
||||
?SLOG(warning, #{msg => "unknown_call", call => Msg, state => State, data => Data}),
|
||||
{keep_state_and_data, [{reply, From, ignored}]};
|
||||
handle_event(info, Msg, State, Data) ->
|
||||
?SLOG(warning, #{msg => "unknown_info", info => Msg, state => State, data => Data}),
|
||||
keep_state_and_data;
|
||||
handle_event(cast, Msg, State, Data) ->
|
||||
?SLOG(warning, #{msg => "unknown_cast", cast => Msg, state => State, data => Data}),
|
||||
keep_state_and_data.
|
||||
|
||||
code_change(_Vsn, State, Data, _Extra) ->
|
||||
{ok, State, Data}.
|
||||
|
||||
%%--------------------------------------------------------------------
|
||||
%% internal funs
|
||||
%%--------------------------------------------------------------------
|
||||
|
||||
default_opts() ->
|
||||
#{
|
||||
server_reference => undefined,
|
||||
conn_evict_rate => ?DEFAULT_CONN_EVICT_RATE,
|
||||
sess_evict_rate => ?DEFAULT_SESS_EVICT_RATE,
|
||||
wait_takeover => ?DEFAULT_WAIT_TAKEOVER,
|
||||
migrate_to => undefined
|
||||
}.
|
||||
|
||||
init_data(Data0, Opts) ->
|
||||
Data1 = maps:merge(Data0, Opts),
|
||||
{enabled, #{connections := ConnCount, sessions := SessCount}} = emqx_eviction_agent:status(),
|
||||
Data1#{
|
||||
initial_conns => ConnCount,
|
||||
current_conns => ConnCount,
|
||||
initial_sessions => SessCount,
|
||||
current_sessions => SessCount
|
||||
}.
|
||||
|
||||
deinit(Data) ->
|
||||
Keys =
|
||||
[initial_conns, current_conns, initial_sessions, current_sessions] ++
|
||||
maps:keys(default_opts()),
|
||||
maps:without(Keys, Data).
|
||||
|
||||
warn_enabled() ->
|
||||
?SLOG(warning, #{msg => "node_evacuation_enabled"}),
|
||||
io:format(
|
||||
standard_error, "Node evacuation is enabled. The node will not receive connections.~n", []
|
||||
).
|
||||
|
||||
migrate_to(undefined) ->
|
||||
migrate_to(all_nodes());
|
||||
migrate_to(Nodes) when is_list(Nodes) ->
|
||||
available_nodes(Nodes).
|
||||
|
||||
is_node_available() ->
|
||||
disabled = emqx_eviction_agent:status(),
|
||||
node().
|
||||
|
||||
all_nodes() ->
|
||||
mria_mnesia:running_nodes() -- [node()].
|
|
@ -0,0 +1,120 @@
|
|||
%%--------------------------------------------------------------------
|
||||
%% Copyright (c) 2022-2023 EMQ Technologies Co., Ltd. All Rights Reserved.
|
||||
%%--------------------------------------------------------------------
|
||||
|
||||
-module(emqx_node_rebalance_evacuation_persist).
|
||||
|
||||
-export([
|
||||
save/1,
|
||||
clear/0,
|
||||
read/1
|
||||
]).
|
||||
|
||||
-ifdef(TEST).
|
||||
-export([evacuation_filepath/0]).
|
||||
-endif.
|
||||
|
||||
-include("emqx_node_rebalance.hrl").
|
||||
-include_lib("emqx/include/types.hrl").
|
||||
|
||||
%%--------------------------------------------------------------------
|
||||
%% APIs
|
||||
%%--------------------------------------------------------------------
|
||||
|
||||
%% do not persist `migrate_to`:
|
||||
%% * after restart there is nothing to migrate
|
||||
%% * this value may be invalid after node was offline
|
||||
-type persisted_start_opts() :: #{
|
||||
server_reference => emqx_eviction_agent:server_reference(),
|
||||
conn_evict_rate => pos_integer(),
|
||||
sess_evict_rate => pos_integer(),
|
||||
wait_takeover => pos_integer()
|
||||
}.
|
||||
-type start_opts() :: #{
|
||||
server_reference => emqx_eviction_agent:server_reference(),
|
||||
conn_evict_rate => pos_integer(),
|
||||
sess_evict_rate => pos_integer(),
|
||||
wait_takeover => pos_integer(),
|
||||
migrate_to => emqx_node_rebalance_evacuation:migrate_to()
|
||||
}.
|
||||
|
||||
-spec save(persisted_start_opts()) -> ok_or_error(term()).
|
||||
save(
|
||||
#{
|
||||
server_reference := ServerReference,
|
||||
conn_evict_rate := ConnEvictRate,
|
||||
sess_evict_rate := SessEvictRate,
|
||||
wait_takeover := WaitTakeover
|
||||
} = Data
|
||||
) when
|
||||
(is_binary(ServerReference) orelse ServerReference =:= undefined) andalso
|
||||
is_integer(ConnEvictRate) andalso ConnEvictRate > 0 andalso
|
||||
is_integer(SessEvictRate) andalso SessEvictRate > 0 andalso
|
||||
is_integer(WaitTakeover) andalso WaitTakeover >= 0
|
||||
->
|
||||
Filepath = evacuation_filepath(),
|
||||
case filelib:ensure_dir(Filepath) of
|
||||
ok ->
|
||||
JsonData = emqx_utils_json:encode(
|
||||
prepare_for_encode(maps:with(persist_keys(), Data)),
|
||||
[pretty]
|
||||
),
|
||||
file:write_file(Filepath, JsonData);
|
||||
{error, _} = Error ->
|
||||
Error
|
||||
end.
|
||||
|
||||
-spec clear() -> ok.
|
||||
clear() ->
|
||||
file:delete(evacuation_filepath()).
|
||||
|
||||
-spec read(start_opts()) -> {ok, start_opts()} | none.
|
||||
read(DefaultOpts) ->
|
||||
case file:read_file(evacuation_filepath()) of
|
||||
{ok, Data} ->
|
||||
case emqx_utils_json:safe_decode(Data, [return_maps]) of
|
||||
{ok, Map} when is_map(Map) ->
|
||||
{ok, map_to_opts(DefaultOpts, Map)};
|
||||
_NotAMap ->
|
||||
{ok, DefaultOpts}
|
||||
end;
|
||||
{error, _} ->
|
||||
none
|
||||
end.
|
||||
|
||||
%%--------------------------------------------------------------------
|
||||
%% Internal funcs
|
||||
%%--------------------------------------------------------------------
|
||||
|
||||
persist_keys() ->
|
||||
[
|
||||
server_reference,
|
||||
conn_evict_rate,
|
||||
sess_evict_rate,
|
||||
wait_takeover
|
||||
].
|
||||
|
||||
prepare_for_encode(#{server_reference := undefined} = Data) ->
|
||||
Data#{server_reference => null};
|
||||
prepare_for_encode(Data) ->
|
||||
Data.
|
||||
|
||||
format_after_decode(#{server_reference := null} = Data) ->
|
||||
Data#{server_reference => undefined};
|
||||
format_after_decode(Data) ->
|
||||
Data.
|
||||
|
||||
map_to_opts(DefaultOpts, Map) ->
|
||||
format_after_decode(
|
||||
map_to_opts(
|
||||
maps:to_list(DefaultOpts), Map, #{}
|
||||
)
|
||||
).
|
||||
|
||||
map_to_opts([], _Map, Opts) ->
|
||||
Opts;
|
||||
map_to_opts([{Key, DefaultVal} | Rest], Map, Opts) ->
|
||||
map_to_opts(Rest, Map, Opts#{Key => maps:get(atom_to_binary(Key), Map, DefaultVal)}).
|
||||
|
||||
evacuation_filepath() ->
|
||||
filename:join([emqx:data_dir(), ?EVACUATION_FILENAME]).
|
|
@ -0,0 +1,238 @@
|
|||
%%--------------------------------------------------------------------
|
||||
%% Copyright (c) 2022-2023 EMQ Technologies Co., Ltd. All Rights Reserved.
|
||||
%%--------------------------------------------------------------------
|
||||
|
||||
-module(emqx_node_rebalance_status).
|
||||
|
||||
-export([
|
||||
local_status/0,
|
||||
local_status/1,
|
||||
global_status/0,
|
||||
format_local_status/1,
|
||||
format_coordinator_status/1
|
||||
]).
|
||||
|
||||
%% For RPC
|
||||
-export([
|
||||
evacuation_status/0,
|
||||
rebalance_status/0
|
||||
]).
|
||||
|
||||
%%--------------------------------------------------------------------
|
||||
%% APIs
|
||||
%%--------------------------------------------------------------------
|
||||
|
||||
-spec local_status() -> disabled | {evacuation, map()} | {rebalance, map()}.
|
||||
local_status() ->
|
||||
case emqx_node_rebalance_evacuation:status() of
|
||||
{enabled, Status} ->
|
||||
{evacuation, evacuation(Status)};
|
||||
disabled ->
|
||||
case emqx_node_rebalance_agent:status() of
|
||||
{enabled, CoordinatorPid} ->
|
||||
case emqx_node_rebalance:status(CoordinatorPid) of
|
||||
{enabled, Status} ->
|
||||
local_rebalance(Status, node());
|
||||
disabled ->
|
||||
disabled
|
||||
end;
|
||||
disabled ->
|
||||
disabled
|
||||
end
|
||||
end.
|
||||
|
||||
-spec local_status(node()) -> disabled | {evacuation, map()} | {rebalance, map()}.
|
||||
local_status(Node) ->
|
||||
emqx_node_rebalance_status_proto_v1:local_status(Node).
|
||||
|
||||
-spec format_local_status(map()) -> iodata().
|
||||
format_local_status(Status) ->
|
||||
format_status(Status, local_status_field_format_order()).
|
||||
|
||||
-spec global_status() -> #{rebalances := [{node(), map()}], evacuations := [{node(), map()}]}.
|
||||
global_status() ->
|
||||
Nodes = mria_mnesia:running_nodes(),
|
||||
{RebalanceResults, _} = emqx_node_rebalance_status_proto_v1:rebalance_status(Nodes),
|
||||
Rebalances = [
|
||||
{Node, coordinator_rebalance(Status)}
|
||||
|| {Node, {enabled, Status}} <- RebalanceResults
|
||||
],
|
||||
{EvacuatioResults, _} = emqx_node_rebalance_status_proto_v1:evacuation_status(Nodes),
|
||||
Evacuations = [{Node, evacuation(Status)} || {Node, {enabled, Status}} <- EvacuatioResults],
|
||||
#{rebalances => Rebalances, evacuations => Evacuations}.
|
||||
|
||||
-spec format_coordinator_status(map()) -> iodata().
|
||||
format_coordinator_status(Status) ->
|
||||
format_status(Status, coordinator_status_field_format_order()).
|
||||
|
||||
%%--------------------------------------------------------------------
|
||||
%% Internal functions
|
||||
%%--------------------------------------------------------------------
|
||||
|
||||
evacuation(Status) ->
|
||||
#{
|
||||
state => maps:get(state, Status),
|
||||
connection_eviction_rate => maps:get(conn_evict_rate, Status),
|
||||
session_eviction_rate => maps:get(sess_evict_rate, Status),
|
||||
connection_goal => 0,
|
||||
session_goal => 0,
|
||||
session_recipients => maps:get(migrate_to, Status),
|
||||
stats => #{
|
||||
initial_connected => maps:get(initial_conns, Status),
|
||||
current_connected => maps:get(current_conns, Status),
|
||||
initial_sessions => maps:get(initial_sessions, Status),
|
||||
current_sessions => maps:get(current_sessions, Status)
|
||||
}
|
||||
}.
|
||||
|
||||
local_rebalance(#{donors := Donors} = Stats, Node) ->
|
||||
case lists:member(Node, Donors) of
|
||||
true -> {rebalance, donor_rebalance(Stats, Node)};
|
||||
false -> disabled
|
||||
end.
|
||||
|
||||
donor_rebalance(Status, Node) ->
|
||||
Opts = maps:get(opts, Status),
|
||||
InitialConnCounts = maps:get(initial_conn_counts, Status),
|
||||
InitialSessCounts = maps:get(initial_sess_counts, Status),
|
||||
|
||||
CurrentStats = #{
|
||||
initial_connected => maps:get(Node, InitialConnCounts),
|
||||
initial_sessions => maps:get(Node, InitialSessCounts),
|
||||
current_connected => emqx_eviction_agent:connection_count(),
|
||||
current_sessions => emqx_eviction_agent:session_count(),
|
||||
current_disconnected_sessions => emqx_eviction_agent:session_count(
|
||||
disconnected
|
||||
)
|
||||
},
|
||||
maps:from_list(
|
||||
[
|
||||
{state, maps:get(state, Status)},
|
||||
{coordinator_node, maps:get(coordinator_node, Status)},
|
||||
{connection_eviction_rate, maps:get(conn_evict_rate, Opts)},
|
||||
{session_eviction_rate, maps:get(sess_evict_rate, Opts)},
|
||||
{recipients, maps:get(recipients, Status)},
|
||||
{stats, CurrentStats}
|
||||
] ++
|
||||
[
|
||||
{connection_goal, maps:get(recipient_conn_avg, Status)}
|
||||
|| maps:is_key(recipient_conn_avg, Status)
|
||||
] ++
|
||||
[
|
||||
{disconnected_session_goal, maps:get(recipient_sess_avg, Status)}
|
||||
|| maps:is_key(recipient_sess_avg, Status)
|
||||
]
|
||||
).
|
||||
|
||||
coordinator_rebalance(Status) ->
|
||||
Opts = maps:get(opts, Status),
|
||||
maps:from_list(
|
||||
[
|
||||
{state, maps:get(state, Status)},
|
||||
{coordinator_node, maps:get(coordinator_node, Status)},
|
||||
{connection_eviction_rate, maps:get(conn_evict_rate, Opts)},
|
||||
{session_eviction_rate, maps:get(sess_evict_rate, Opts)},
|
||||
{recipients, maps:get(recipients, Status)},
|
||||
{donors, maps:get(donors, Status)}
|
||||
] ++
|
||||
[
|
||||
{connection_goal, maps:get(recipient_conn_avg, Status)}
|
||||
|| maps:is_key(recipient_conn_avg, Status)
|
||||
] ++
|
||||
[
|
||||
{disconnected_session_goal, maps:get(recipient_sess_avg, Status)}
|
||||
|| maps:is_key(recipient_sess_avg, Status)
|
||||
] ++
|
||||
[
|
||||
{donor_conn_avg, maps:get(donor_conn_avg, Status)}
|
||||
|| maps:is_key(donor_conn_avg, Status)
|
||||
] ++
|
||||
[
|
||||
{donor_sess_avg, maps:get(donor_sess_avg, Status)}
|
||||
|| maps:is_key(donor_sess_avg, Status)
|
||||
]
|
||||
).
|
||||
|
||||
local_status_field_format_order() ->
|
||||
[
|
||||
state,
|
||||
coordinator_node,
|
||||
connection_eviction_rate,
|
||||
session_eviction_rate,
|
||||
connection_goal,
|
||||
session_goal,
|
||||
disconnected_session_goal,
|
||||
session_recipients,
|
||||
recipients,
|
||||
stats
|
||||
].
|
||||
|
||||
coordinator_status_field_format_order() ->
|
||||
[
|
||||
state,
|
||||
coordinator_node,
|
||||
donors,
|
||||
recipients,
|
||||
connection_eviction_rate,
|
||||
session_eviction_rate,
|
||||
connection_goal,
|
||||
disconnected_session_goal,
|
||||
donor_conn_avg,
|
||||
donor_sess_avg
|
||||
].
|
||||
|
||||
format_status(Status, FieldOrder) ->
|
||||
Fields = lists:flatmap(
|
||||
fun(FieldName) ->
|
||||
maps:to_list(maps:with([FieldName], Status))
|
||||
end,
|
||||
FieldOrder
|
||||
),
|
||||
lists:map(
|
||||
fun format_local_status_field/1,
|
||||
Fields
|
||||
).
|
||||
|
||||
format_local_status_field({state, State}) ->
|
||||
io_lib:format("Rebalance state: ~p~n", [State]);
|
||||
format_local_status_field({coordinator_node, Node}) ->
|
||||
io_lib:format("Coordinator node: ~p~n", [Node]);
|
||||
format_local_status_field({connection_eviction_rate, ConnEvictRate}) ->
|
||||
io_lib:format("Connection eviction rate: ~p connections/second~n", [ConnEvictRate]);
|
||||
format_local_status_field({session_eviction_rate, SessEvictRate}) ->
|
||||
io_lib:format("Session eviction rate: ~p sessions/second~n", [SessEvictRate]);
|
||||
format_local_status_field({connection_goal, ConnGoal}) ->
|
||||
io_lib:format("Connection goal: ~p~n", [ConnGoal]);
|
||||
format_local_status_field({session_goal, SessGoal}) ->
|
||||
io_lib:format("Session goal: ~p~n", [SessGoal]);
|
||||
format_local_status_field({disconnected_session_goal, DisconnSessGoal}) ->
|
||||
io_lib:format("Disconnected session goal: ~p~n", [DisconnSessGoal]);
|
||||
format_local_status_field({session_recipients, SessionRecipients}) ->
|
||||
io_lib:format("Session recipient nodes: ~p~n", [SessionRecipients]);
|
||||
format_local_status_field({recipients, Recipients}) ->
|
||||
io_lib:format("Recipient nodes: ~p~n", [Recipients]);
|
||||
format_local_status_field({donors, Donors}) ->
|
||||
io_lib:format("Donor nodes: ~p~n", [Donors]);
|
||||
format_local_status_field({donor_conn_avg, DonorConnAvg}) ->
|
||||
io_lib:format("Current average donor node connection count: ~p~n", [DonorConnAvg]);
|
||||
format_local_status_field({donor_sess_avg, DonorSessAvg}) ->
|
||||
io_lib:format("Current average donor node disconnected session count: ~p~n", [DonorSessAvg]);
|
||||
format_local_status_field({stats, Stats}) ->
|
||||
format_local_stats(Stats).
|
||||
|
||||
format_local_stats(Stats) ->
|
||||
[
|
||||
"Channel statistics:\n"
|
||||
| lists:map(
|
||||
fun({Name, Value}) ->
|
||||
io_lib:format(" ~p: ~p~n", [Name, Value])
|
||||
end,
|
||||
maps:to_list(Stats)
|
||||
)
|
||||
].
|
||||
|
||||
evacuation_status() ->
|
||||
{node(), emqx_node_rebalance_evacuation:status()}.
|
||||
|
||||
rebalance_status() ->
|
||||
{node(), emqx_node_rebalance:status()}.
|
|
@ -0,0 +1,35 @@
|
|||
%%--------------------------------------------------------------------
|
||||
%% Copyright (c) 2022-2023 EMQ Technologies Co., Ltd. All Rights Reserved.
|
||||
%%--------------------------------------------------------------------
|
||||
|
||||
-module(emqx_node_rebalance_sup).
|
||||
|
||||
-behaviour(supervisor).
|
||||
|
||||
-export([start_link/0]).
|
||||
|
||||
-export([init/1]).
|
||||
|
||||
start_link() ->
|
||||
supervisor:start_link({local, ?MODULE}, ?MODULE, []).
|
||||
|
||||
init([]) ->
|
||||
Childs = [
|
||||
child_spec(emqx_node_rebalance_evacuation, []),
|
||||
child_spec(emqx_node_rebalance_agent, []),
|
||||
child_spec(emqx_node_rebalance, [])
|
||||
],
|
||||
{ok, {
|
||||
#{strategy => one_for_one, intensity => 10, period => 3600},
|
||||
Childs
|
||||
}}.
|
||||
|
||||
child_spec(Mod, Args) ->
|
||||
#{
|
||||
id => Mod,
|
||||
start => {Mod, start_link, Args},
|
||||
restart => permanent,
|
||||
shutdown => 5000,
|
||||
type => worker,
|
||||
modules => [Mod]
|
||||
}.
|
|
@ -0,0 +1,43 @@
|
|||
%%--------------------------------------------------------------------
|
||||
%% Copyright (c) 2022-2023 EMQ Technologies Co., Ltd. All Rights Reserved.
|
||||
%%--------------------------------------------------------------------
|
||||
|
||||
-module(emqx_node_rebalance_api_proto_v1).
|
||||
|
||||
-behaviour(emqx_bpapi).
|
||||
|
||||
-export([
|
||||
introduced_in/0,
|
||||
|
||||
node_rebalance_evacuation_start/2,
|
||||
node_rebalance_evacuation_stop/1,
|
||||
|
||||
node_rebalance_start/2,
|
||||
node_rebalance_stop/1
|
||||
]).
|
||||
|
||||
-include_lib("emqx/include/bpapi.hrl").
|
||||
-include_lib("emqx/include/types.hrl").
|
||||
|
||||
introduced_in() ->
|
||||
"5.0.22".
|
||||
|
||||
-spec node_rebalance_evacuation_start(node(), emqx_node_rebalance_evacuation:start_opts()) ->
|
||||
emqx_rpc:badrpc() | ok_or_error(emqx_node_rebalance_evacuation:start_error()).
|
||||
node_rebalance_evacuation_start(Node, #{} = Opts) ->
|
||||
rpc:call(Node, emqx_node_rebalance_evacuation, start, [Opts]).
|
||||
|
||||
-spec node_rebalance_evacuation_stop(node()) ->
|
||||
emqx_rpc:badrpc() | ok_or_error(not_started).
|
||||
node_rebalance_evacuation_stop(Node) ->
|
||||
rpc:call(Node, emqx_node_rebalance_evacuation, stop, []).
|
||||
|
||||
-spec node_rebalance_start(node(), emqx_node_rebalance:start_opts()) ->
|
||||
emqx_rpc:badrpc() | ok_or_error(emqx_node_rebalance:start_error()).
|
||||
node_rebalance_start(Node, Opts) ->
|
||||
rpc:call(Node, emqx_node_rebalance, start, [Opts]).
|
||||
|
||||
-spec node_rebalance_stop(node()) ->
|
||||
emqx_rpc:badrpc() | ok_or_error(not_started).
|
||||
node_rebalance_stop(Node) ->
|
||||
rpc:call(Node, emqx_node_rebalance, stop, []).
|
|
@ -0,0 +1,22 @@
|
|||
%%--------------------------------------------------------------------
|
||||
%% Copyright (c) 2022-2023 EMQ Technologies Co., Ltd. All Rights Reserved.
|
||||
%%--------------------------------------------------------------------
|
||||
|
||||
-module(emqx_node_rebalance_evacuation_proto_v1).
|
||||
|
||||
-behaviour(emqx_bpapi).
|
||||
|
||||
-export([
|
||||
introduced_in/0,
|
||||
|
||||
available_nodes/1
|
||||
]).
|
||||
|
||||
-include_lib("emqx/include/bpapi.hrl").
|
||||
|
||||
introduced_in() ->
|
||||
"5.0.22".
|
||||
|
||||
-spec available_nodes([node()]) -> emqx_rpc:multicall_result(node()).
|
||||
available_nodes(Nodes) ->
|
||||
rpc:multicall(Nodes, emqx_node_rebalance_evacuation, is_node_available, []).
|
|
@ -0,0 +1,62 @@
|
|||
%%--------------------------------------------------------------------
|
||||
%% Copyright (c) 2022-2023 EMQ Technologies Co., Ltd. All Rights Reserved.
|
||||
%%--------------------------------------------------------------------
|
||||
|
||||
-module(emqx_node_rebalance_proto_v1).
|
||||
|
||||
-behaviour(emqx_bpapi).
|
||||
|
||||
-export([
|
||||
introduced_in/0,
|
||||
|
||||
available_nodes/1,
|
||||
evict_connections/2,
|
||||
evict_sessions/4,
|
||||
connection_counts/1,
|
||||
session_counts/1,
|
||||
enable_rebalance_agent/2,
|
||||
disable_rebalance_agent/2,
|
||||
disconnected_session_counts/1
|
||||
]).
|
||||
|
||||
-include_lib("emqx/include/bpapi.hrl").
|
||||
-include_lib("emqx/include/types.hrl").
|
||||
|
||||
introduced_in() ->
|
||||
"5.0.22".
|
||||
|
||||
-spec available_nodes([node()]) -> emqx_rpc:multicall_result(node()).
|
||||
available_nodes(Nodes) ->
|
||||
rpc:multicall(Nodes, emqx_node_rebalance, is_node_available, []).
|
||||
|
||||
-spec evict_connections([node()], non_neg_integer()) ->
|
||||
emqx_rpc:multicall_result(ok_or_error(disabled)).
|
||||
evict_connections(Nodes, Count) ->
|
||||
rpc:multicall(Nodes, emqx_eviction_agent, evict_connections, [Count]).
|
||||
|
||||
-spec evict_sessions([node()], non_neg_integer(), [node()], emqx_channel:conn_state()) ->
|
||||
emqx_rpc:multicall_result(ok_or_error(disabled)).
|
||||
evict_sessions(Nodes, Count, RecipientNodes, ConnState) ->
|
||||
rpc:multicall(Nodes, emqx_eviction_agent, evict_sessions, [Count, RecipientNodes, ConnState]).
|
||||
|
||||
-spec connection_counts([node()]) -> emqx_rpc:multicall_result({ok, non_neg_integer()}).
|
||||
connection_counts(Nodes) ->
|
||||
rpc:multicall(Nodes, emqx_node_rebalance, connection_count, []).
|
||||
|
||||
-spec session_counts([node()]) -> emqx_rpc:multicall_result({ok, non_neg_integer()}).
|
||||
session_counts(Nodes) ->
|
||||
rpc:multicall(Nodes, emqx_node_rebalance, session_count, []).
|
||||
|
||||
-spec enable_rebalance_agent([node()], pid()) ->
|
||||
emqx_rpc:multicall_result(ok_or_error(already_enabled | eviction_agent_busy)).
|
||||
enable_rebalance_agent(Nodes, OwnerPid) ->
|
||||
rpc:multicall(Nodes, emqx_node_rebalance_agent, enable, [OwnerPid]).
|
||||
|
||||
-spec disable_rebalance_agent([node()], pid()) ->
|
||||
emqx_rpc:multicall_result(ok_or_error(already_disabled | invalid_coordinator)).
|
||||
disable_rebalance_agent(Nodes, OwnerPid) ->
|
||||
rpc:multicall(Nodes, emqx_node_rebalance_agent, disable, [OwnerPid]).
|
||||
|
||||
-spec disconnected_session_counts([node()]) -> emqx_rpc:multicall_result({ok, non_neg_integer()}).
|
||||
disconnected_session_counts(Nodes) ->
|
||||
rpc:multicall(Nodes, emqx_node_rebalance, disconnected_session_count, []).
|
|
@ -0,0 +1,36 @@
|
|||
%%--------------------------------------------------------------------
|
||||
%% Copyright (c) 2022-2023 EMQ Technologies Co., Ltd. All Rights Reserved.
|
||||
%%--------------------------------------------------------------------
|
||||
|
||||
-module(emqx_node_rebalance_status_proto_v1).
|
||||
|
||||
-behaviour(emqx_bpapi).
|
||||
|
||||
-export([
|
||||
introduced_in/0,
|
||||
|
||||
local_status/1,
|
||||
rebalance_status/1,
|
||||
evacuation_status/1
|
||||
]).
|
||||
|
||||
-include_lib("emqx/include/bpapi.hrl").
|
||||
-include_lib("emqx/include/types.hrl").
|
||||
|
||||
introduced_in() ->
|
||||
"5.0.22".
|
||||
|
||||
-spec local_status(node()) ->
|
||||
emqx_rpc:badrpc() | disabled | {evacuation, map()} | {rebalance, map()}.
|
||||
local_status(Node) ->
|
||||
rpc:call(Node, emqx_node_rebalance_status, local_status, []).
|
||||
|
||||
-spec rebalance_status([node()]) ->
|
||||
emqx_rpc:multicall_result({node(), map()}).
|
||||
rebalance_status(Nodes) ->
|
||||
rpc:multicall(Nodes, emqx_node_rebalance_status, rebalance_status, []).
|
||||
|
||||
-spec evacuation_status([node()]) ->
|
||||
emqx_rpc:multicall_result({node(), map()}).
|
||||
evacuation_status(Nodes) ->
|
||||
rpc:multicall(Nodes, emqx_node_rebalance_status, evacuation_status, []).
|
|
@ -0,0 +1,229 @@
|
|||
%%--------------------------------------------------------------------
|
||||
%% Copyright (c) 2022-2023 EMQ Technologies Co., Ltd. All Rights Reserved.
|
||||
%%--------------------------------------------------------------------
|
||||
|
||||
-module(emqx_node_rebalance_SUITE).
|
||||
|
||||
-compile(export_all).
|
||||
-compile(nowarn_export_all).
|
||||
|
||||
-include_lib("emqx/include/emqx.hrl").
|
||||
-include_lib("emqx/include/emqx_mqtt.hrl").
|
||||
-include_lib("emqx/include/asserts.hrl").
|
||||
-include_lib("eunit/include/eunit.hrl").
|
||||
-include_lib("common_test/include/ct.hrl").
|
||||
-include_lib("snabbkaffe/include/snabbkaffe.hrl").
|
||||
|
||||
-import(
|
||||
emqx_eviction_agent_test_helpers,
|
||||
[emqtt_connect_many/1, emqtt_connect_many/2, stop_many/1, case_specific_node_name/3]
|
||||
).
|
||||
|
||||
-define(START_APPS, [emqx_eviction_agent, emqx_node_rebalance]).
|
||||
|
||||
all() ->
|
||||
emqx_common_test_helpers:all(?MODULE).
|
||||
|
||||
init_per_suite(Config) ->
|
||||
ok = emqx_common_test_helpers:start_apps([]),
|
||||
Config.
|
||||
|
||||
end_per_suite(_Config) ->
|
||||
ok = emqx_common_test_helpers:stop_apps([]),
|
||||
ok.
|
||||
|
||||
init_per_testcase(Case, Config) ->
|
||||
ClusterNodes = emqx_eviction_agent_test_helpers:start_cluster(
|
||||
[
|
||||
{case_specific_node_name(?MODULE, Case, '_donor'), 2883},
|
||||
{case_specific_node_name(?MODULE, Case, '_recipient'), 3883}
|
||||
],
|
||||
?START_APPS
|
||||
),
|
||||
ok = snabbkaffe:start_trace(),
|
||||
[{cluster_nodes, ClusterNodes} | Config].
|
||||
|
||||
end_per_testcase(_Case, Config) ->
|
||||
ok = snabbkaffe:stop(),
|
||||
ok = emqx_eviction_agent_test_helpers:stop_cluster(
|
||||
?config(cluster_nodes, Config),
|
||||
?START_APPS
|
||||
).
|
||||
|
||||
%%--------------------------------------------------------------------
|
||||
%% Tests
|
||||
%%--------------------------------------------------------------------
|
||||
|
||||
t_rebalance(Config) ->
|
||||
process_flag(trap_exit, true),
|
||||
|
||||
[{DonorNode, DonorPort}, {RecipientNode, _RecipientPort}] = ?config(cluster_nodes, Config),
|
||||
|
||||
Nodes = [DonorNode, RecipientNode],
|
||||
|
||||
Conns = emqtt_connect_many(DonorPort, 500),
|
||||
|
||||
Opts = #{
|
||||
conn_evict_rate => 10,
|
||||
sess_evict_rate => 10,
|
||||
evict_interval => 10,
|
||||
abs_conn_threshold => 50,
|
||||
abs_sess_threshold => 50,
|
||||
rel_conn_threshold => 1.0,
|
||||
rel_sess_threshold => 1.0,
|
||||
wait_health_check => 0.01,
|
||||
wait_takeover => 0.01,
|
||||
nodes => Nodes
|
||||
},
|
||||
|
||||
?assertWaitEvent(
|
||||
ok = rpc:call(DonorNode, emqx_node_rebalance, start, [Opts]),
|
||||
#{?snk_kind := emqx_node_rebalance_evict_sess_over},
|
||||
10000
|
||||
),
|
||||
|
||||
DonorConnCount = rpc:call(DonorNode, emqx_eviction_agent, connection_count, []),
|
||||
DonorSessCount = rpc:call(DonorNode, emqx_eviction_agent, session_count, []),
|
||||
DonorDSessCount = rpc:call(DonorNode, emqx_eviction_agent, session_count, [disconnected]),
|
||||
|
||||
RecipientConnCount = rpc:call(RecipientNode, emqx_eviction_agent, connection_count, []),
|
||||
RecipientSessCount = rpc:call(RecipientNode, emqx_eviction_agent, session_count, []),
|
||||
RecipientDSessCount = rpc:call(RecipientNode, emqx_eviction_agent, session_count, [disconnected]),
|
||||
|
||||
ct:pal(
|
||||
"Donor: conn=~p, sess=~p, dsess=~p",
|
||||
[DonorConnCount, DonorSessCount, DonorDSessCount]
|
||||
),
|
||||
ct:pal(
|
||||
"Recipient: conn=~p, sess=~p, dsess=~p",
|
||||
[RecipientConnCount, RecipientSessCount, RecipientDSessCount]
|
||||
),
|
||||
|
||||
?assert(DonorConnCount - 50 =< RecipientConnCount),
|
||||
?assert(DonorDSessCount - 50 =< RecipientDSessCount),
|
||||
|
||||
ok = stop_many(Conns).
|
||||
|
||||
t_rebalance_node_crash(Config) ->
|
||||
process_flag(trap_exit, true),
|
||||
|
||||
[{DonorNode, DonorPort}, {RecipientNode, _RecipientPort}] = ?config(cluster_nodes, Config),
|
||||
|
||||
Nodes = [DonorNode, RecipientNode],
|
||||
|
||||
Conns = emqtt_connect_many(DonorPort, 500),
|
||||
|
||||
Opts = #{
|
||||
conn_evict_rate => 10,
|
||||
sess_evict_rate => 10,
|
||||
evict_interval => 10,
|
||||
abs_conn_threshold => 50,
|
||||
abs_sess_threshold => 50,
|
||||
rel_conn_threshold => 1.0,
|
||||
rel_sess_threshold => 1.0,
|
||||
wait_health_check => 0.01,
|
||||
wait_takeover => 0.01,
|
||||
nodes => Nodes
|
||||
},
|
||||
|
||||
?assertWaitEvent(
|
||||
begin
|
||||
ok = rpc:call(DonorNode, emqx_node_rebalance, start, [Opts]),
|
||||
emqx_common_test_helpers:stop_slave(RecipientNode)
|
||||
end,
|
||||
#{?snk_kind := emqx_node_rebalance_started},
|
||||
1000
|
||||
),
|
||||
|
||||
?assertEqual(
|
||||
disabled,
|
||||
rpc:call(DonorNode, emqx_node_rebalance, status, [])
|
||||
),
|
||||
|
||||
ok = stop_many(Conns).
|
||||
|
||||
t_no_need_to_rebalance(Config) ->
|
||||
process_flag(trap_exit, true),
|
||||
|
||||
[{DonorNode, DonorPort}, {RecipientNode, _RecipientPort}] = ?config(cluster_nodes, Config),
|
||||
|
||||
Nodes = [DonorNode, RecipientNode],
|
||||
|
||||
Opts = #{
|
||||
conn_evict_rate => 10,
|
||||
sess_evict_rate => 10,
|
||||
evict_interval => 10,
|
||||
abs_conn_threshold => 50,
|
||||
abs_sess_threshold => 50,
|
||||
rel_conn_threshold => 1.0,
|
||||
rel_sess_threshold => 1.0,
|
||||
wait_health_check => 0.01,
|
||||
wait_takeover => 0.01,
|
||||
nodes => Nodes
|
||||
},
|
||||
|
||||
?assertEqual(
|
||||
{error, nothing_to_balance},
|
||||
rpc:call(DonorNode, emqx_node_rebalance, start, [Opts])
|
||||
),
|
||||
|
||||
Conns = emqtt_connect_many(DonorPort, 50),
|
||||
|
||||
?assertEqual(
|
||||
{error, nothing_to_balance},
|
||||
rpc:call(DonorNode, emqx_node_rebalance, start, [Opts])
|
||||
),
|
||||
|
||||
ok = stop_many(Conns).
|
||||
|
||||
t_unknown_mesages(Config) ->
|
||||
process_flag(trap_exit, true),
|
||||
[{DonorNode, DonorPort}, {RecipientNode, _RecipientPort}] = ?config(cluster_nodes, Config),
|
||||
|
||||
Nodes = [DonorNode, RecipientNode],
|
||||
|
||||
Conns = emqtt_connect_many(DonorPort, 500),
|
||||
|
||||
Opts = #{
|
||||
wait_health_check => 100,
|
||||
abs_conn_threshold => 50,
|
||||
nodes => Nodes
|
||||
},
|
||||
|
||||
Pid = rpc:call(DonorNode, erlang, whereis, [emqx_node_rebalance]),
|
||||
|
||||
Pid ! unknown,
|
||||
ok = gen_server:cast(Pid, unknown),
|
||||
?assertEqual(
|
||||
ignored,
|
||||
gen_server:call(Pid, unknown)
|
||||
),
|
||||
|
||||
ok = rpc:call(DonorNode, emqx_node_rebalance, start, [Opts]),
|
||||
|
||||
Pid ! unknown,
|
||||
ok = gen_server:cast(Pid, unknown),
|
||||
?assertEqual(
|
||||
ignored,
|
||||
gen_server:call(Pid, unknown)
|
||||
),
|
||||
|
||||
ok = stop_many(Conns).
|
||||
|
||||
t_available_nodes(Config) ->
|
||||
[{DonorNode, _DonorPort}, {RecipientNode, _RecipientPort}] = ?config(cluster_nodes, Config),
|
||||
|
||||
%% Start eviction agent on RecipientNode so that it will be "occupied"
|
||||
%% and not available for rebalance
|
||||
ok = rpc:call(RecipientNode, emqx_eviction_agent, enable, [test_rebalance, undefined]),
|
||||
|
||||
%% Only DonorNode should be is available for rebalance, since RecipientNode is "occupied"
|
||||
?assertEqual(
|
||||
[DonorNode],
|
||||
rpc:call(
|
||||
DonorNode,
|
||||
emqx_node_rebalance,
|
||||
available_nodes,
|
||||
[[DonorNode, RecipientNode]]
|
||||
)
|
||||
).
|
|
@ -0,0 +1,214 @@
|
|||
%%--------------------------------------------------------------------
|
||||
%% Copyright (c) 2022-2023 EMQ Technologies Co., Ltd. All Rights Reserved.
|
||||
%%--------------------------------------------------------------------
|
||||
|
||||
-module(emqx_node_rebalance_agent_SUITE).
|
||||
|
||||
-compile(export_all).
|
||||
-compile(nowarn_export_all).
|
||||
|
||||
-include_lib("emqx/include/emqx.hrl").
|
||||
-include_lib("emqx/include/emqx_mqtt.hrl").
|
||||
-include_lib("eunit/include/eunit.hrl").
|
||||
-include_lib("common_test/include/ct.hrl").
|
||||
-include_lib("snabbkaffe/include/snabbkaffe.hrl").
|
||||
|
||||
-import(
|
||||
emqx_eviction_agent_test_helpers,
|
||||
[case_specific_node_name/2]
|
||||
).
|
||||
|
||||
all() ->
|
||||
[
|
||||
{group, local},
|
||||
{group, cluster}
|
||||
].
|
||||
|
||||
groups() ->
|
||||
[
|
||||
{local, [], [
|
||||
t_enable_disable,
|
||||
t_enable_egent_busy,
|
||||
t_unknown_messages
|
||||
]},
|
||||
{cluster, [], [
|
||||
t_rebalance_agent_coordinator_fail,
|
||||
t_rebalance_agent_fail
|
||||
]}
|
||||
].
|
||||
|
||||
init_per_suite(Config) ->
|
||||
ok = emqx_common_test_helpers:start_apps([emqx_eviction_agent, emqx_node_rebalance]),
|
||||
Config.
|
||||
|
||||
end_per_suite(_Config) ->
|
||||
ok = emqx_common_test_helpers:stop_apps([emqx_eviction_agent, emqx_node_rebalance]),
|
||||
ok.
|
||||
|
||||
init_per_group(local, Config) ->
|
||||
[{cluster, false} | Config];
|
||||
init_per_group(cluster, Config) ->
|
||||
[{cluster, true} | Config].
|
||||
|
||||
end_per_group(_Group, _Config) ->
|
||||
ok.
|
||||
|
||||
init_per_testcase(Case, Config) ->
|
||||
case ?config(cluster, Config) of
|
||||
true ->
|
||||
ClusterNodes = emqx_eviction_agent_test_helpers:start_cluster(
|
||||
[{case_specific_node_name(?MODULE, Case), 2883}],
|
||||
[emqx_eviction_agent, emqx_node_rebalance]
|
||||
),
|
||||
[{cluster_nodes, ClusterNodes} | Config];
|
||||
false ->
|
||||
Config
|
||||
end.
|
||||
|
||||
end_per_testcase(_Case, Config) ->
|
||||
case ?config(cluster, Config) of
|
||||
true ->
|
||||
emqx_eviction_agent_test_helpers:stop_cluster(
|
||||
?config(cluster_nodes, Config),
|
||||
[emqx_eviction_agent, emqx_node_rebalance]
|
||||
);
|
||||
false ->
|
||||
ok
|
||||
end.
|
||||
|
||||
%%--------------------------------------------------------------------
|
||||
%% Tests
|
||||
%%--------------------------------------------------------------------
|
||||
|
||||
%% Local tests
|
||||
|
||||
t_enable_disable(_Config) ->
|
||||
?assertEqual(
|
||||
disabled,
|
||||
emqx_node_rebalance_agent:status()
|
||||
),
|
||||
|
||||
?assertEqual(
|
||||
ok,
|
||||
emqx_node_rebalance_agent:enable(self())
|
||||
),
|
||||
|
||||
?assertEqual(
|
||||
{error, already_enabled},
|
||||
emqx_node_rebalance_agent:enable(self())
|
||||
),
|
||||
|
||||
?assertEqual(
|
||||
{enabled, self()},
|
||||
emqx_node_rebalance_agent:status()
|
||||
),
|
||||
|
||||
?assertEqual(
|
||||
{error, invalid_coordinator},
|
||||
emqx_node_rebalance_agent:disable(spawn_link(fun() -> ok end))
|
||||
),
|
||||
|
||||
?assertEqual(
|
||||
ok,
|
||||
emqx_node_rebalance_agent:disable(self())
|
||||
),
|
||||
|
||||
?assertEqual(
|
||||
{error, already_disabled},
|
||||
emqx_node_rebalance_agent:disable(self())
|
||||
),
|
||||
|
||||
?assertEqual(
|
||||
disabled,
|
||||
emqx_node_rebalance_agent:status()
|
||||
).
|
||||
|
||||
t_enable_egent_busy(_Config) ->
|
||||
ok = emqx_eviction_agent:enable(rebalance_test, undefined),
|
||||
|
||||
?assertEqual(
|
||||
{error, eviction_agent_busy},
|
||||
emqx_node_rebalance_agent:enable(self())
|
||||
),
|
||||
|
||||
ok = emqx_eviction_agent:disable(rebalance_test).
|
||||
|
||||
t_unknown_messages(_Config) ->
|
||||
Pid = whereis(emqx_node_rebalance_agent),
|
||||
|
||||
ok = gen_server:cast(Pid, unknown),
|
||||
|
||||
Pid ! unknown,
|
||||
|
||||
ignored = gen_server:call(Pid, unknown).
|
||||
|
||||
%% Cluster tests
|
||||
|
||||
% The following tests verify that emqx_node_rebalance_agent correctly links
|
||||
% coordinator process with emqx_eviction_agent-s.
|
||||
|
||||
t_rebalance_agent_coordinator_fail(Config) ->
|
||||
process_flag(trap_exit, true),
|
||||
|
||||
[{Node, _}] = ?config(cluster_nodes, Config),
|
||||
|
||||
CoordinatorPid = spawn_link(
|
||||
fun() ->
|
||||
receive
|
||||
done -> ok
|
||||
end
|
||||
end
|
||||
),
|
||||
|
||||
?assertEqual(
|
||||
disabled,
|
||||
rpc:call(Node, emqx_eviction_agent, status, [])
|
||||
),
|
||||
|
||||
?assertEqual(
|
||||
ok,
|
||||
rpc:call(Node, emqx_node_rebalance_agent, enable, [CoordinatorPid])
|
||||
),
|
||||
|
||||
?assertMatch(
|
||||
{enabled, _},
|
||||
rpc:call(Node, emqx_eviction_agent, status, [])
|
||||
),
|
||||
|
||||
EvictionAgentPid = rpc:call(Node, erlang, whereis, [emqx_eviction_agent]),
|
||||
true = link(EvictionAgentPid),
|
||||
|
||||
true = exit(CoordinatorPid, kill),
|
||||
|
||||
receive
|
||||
{'EXIT', EvictionAgentPid, _} -> true
|
||||
after 1000 ->
|
||||
ct:fail("emqx_eviction_agent did not exit")
|
||||
end.
|
||||
|
||||
t_rebalance_agent_fail(Config) ->
|
||||
process_flag(trap_exit, true),
|
||||
|
||||
[{Node, _}] = ?config(cluster_nodes, Config),
|
||||
|
||||
CoordinatorPid = spawn_link(
|
||||
fun() ->
|
||||
receive
|
||||
done -> ok
|
||||
end
|
||||
end
|
||||
),
|
||||
|
||||
?assertEqual(
|
||||
ok,
|
||||
rpc:call(Node, emqx_node_rebalance_agent, enable, [CoordinatorPid])
|
||||
),
|
||||
|
||||
EvictionAgentPid = rpc:call(Node, erlang, whereis, [emqx_eviction_agent]),
|
||||
true = exit(EvictionAgentPid, kill),
|
||||
|
||||
receive
|
||||
{'EXIT', CoordinatorPid, _} -> true
|
||||
after 1000 ->
|
||||
ct:fail("emqx_node_rebalance_agent did not exit")
|
||||
end.
|
|
@ -0,0 +1,444 @@
|
|||
%%--------------------------------------------------------------------
|
||||
%% Copyright (c) 2022-2023 EMQ Technologies Co., Ltd. All Rights Reserved.
|
||||
%%--------------------------------------------------------------------
|
||||
|
||||
-module(emqx_node_rebalance_api_SUITE).
|
||||
|
||||
-compile(export_all).
|
||||
-compile(nowarn_export_all).
|
||||
|
||||
-include_lib("eunit/include/eunit.hrl").
|
||||
-include_lib("common_test/include/ct.hrl").
|
||||
|
||||
-import(
|
||||
emqx_mgmt_api_test_util,
|
||||
[
|
||||
request/2,
|
||||
request/3,
|
||||
uri/1
|
||||
]
|
||||
).
|
||||
|
||||
-import(
|
||||
emqx_eviction_agent_test_helpers,
|
||||
[emqtt_connect_many/2, stop_many/1, case_specific_node_name/3]
|
||||
).
|
||||
|
||||
-define(START_APPS, [emqx_eviction_agent, emqx_node_rebalance]).
|
||||
|
||||
all() ->
|
||||
emqx_common_test_helpers:all(?MODULE).
|
||||
|
||||
init_per_suite(Config) ->
|
||||
ok = emqx_common_test_helpers:start_apps(?START_APPS),
|
||||
Config.
|
||||
|
||||
end_per_suite(_Config) ->
|
||||
ok = emqx_common_test_helpers:stop_apps(?START_APPS),
|
||||
ok.
|
||||
|
||||
init_per_testcase(Case, Config) ->
|
||||
[{DonorNode, _} | _] =
|
||||
ClusterNodes = emqx_eviction_agent_test_helpers:start_cluster(
|
||||
[
|
||||
{case_specific_node_name(?MODULE, Case, '_donor'), 2883},
|
||||
{case_specific_node_name(?MODULE, Case, '_recipient'), 3883}
|
||||
],
|
||||
?START_APPS,
|
||||
[{emqx, data_dir, case_specific_data_dir(Case, Config)}]
|
||||
),
|
||||
|
||||
ok = rpc:call(DonorNode, emqx_mgmt_api_test_util, init_suite, []),
|
||||
ok = take_auth_header_from(DonorNode),
|
||||
|
||||
[{cluster_nodes, ClusterNodes} | Config].
|
||||
end_per_testcase(_Case, Config) ->
|
||||
_ = emqx_eviction_agent_test_helpers:stop_cluster(
|
||||
?config(cluster_nodes, Config),
|
||||
?START_APPS
|
||||
).
|
||||
|
||||
%%--------------------------------------------------------------------
|
||||
%% Tests
|
||||
%%--------------------------------------------------------------------
|
||||
|
||||
t_start_evacuation_validation(Config) ->
|
||||
[{DonorNode, _}, {RecipientNode, _}] = ?config(cluster_nodes, Config),
|
||||
BadOpts = [
|
||||
#{conn_evict_rate => <<"conn">>},
|
||||
#{sess_evict_rate => <<"sess">>},
|
||||
#{redirect_to => 123},
|
||||
#{wait_takeover => <<"wait">>},
|
||||
#{migrate_to => []},
|
||||
#{migrate_to => <<"migrate_to">>},
|
||||
#{migrate_to => [<<"bad_node">>]},
|
||||
#{migrate_to => [<<"bad_node">>, atom_to_binary(DonorNode)]},
|
||||
#{unknown => <<"Value">>}
|
||||
],
|
||||
lists:foreach(
|
||||
fun(Opts) ->
|
||||
?assertMatch(
|
||||
{ok, 400, #{}},
|
||||
api_post(
|
||||
["load_rebalance", atom_to_list(DonorNode), "evacuation", "start"],
|
||||
Opts
|
||||
)
|
||||
)
|
||||
end,
|
||||
BadOpts
|
||||
),
|
||||
?assertMatch(
|
||||
{ok, 404, #{}},
|
||||
api_post(
|
||||
["load_rebalance", "bad@node", "evacuation", "start"],
|
||||
#{}
|
||||
)
|
||||
),
|
||||
|
||||
?assertMatch(
|
||||
{ok, 200, #{}},
|
||||
api_post(
|
||||
["load_rebalance", atom_to_list(DonorNode), "evacuation", "start"],
|
||||
#{
|
||||
conn_evict_rate => 10,
|
||||
sess_evict_rate => 10,
|
||||
wait_takeover => 10,
|
||||
redirect_to => <<"srv">>,
|
||||
migrate_to => [atom_to_binary(RecipientNode)]
|
||||
}
|
||||
)
|
||||
),
|
||||
|
||||
DonorNodeBin = atom_to_binary(DonorNode),
|
||||
?assertMatch(
|
||||
{ok, 200, #{<<"evacuations">> := [#{<<"node">> := DonorNodeBin}]}},
|
||||
api_get(["load_rebalance", "global_status"])
|
||||
).
|
||||
|
||||
t_start_rebalance_validation(Config) ->
|
||||
process_flag(trap_exit, true),
|
||||
|
||||
[{DonorNode, DonorPort}, {RecipientNode, _}] = ?config(cluster_nodes, Config),
|
||||
|
||||
BadOpts = [
|
||||
#{conn_evict_rate => <<"conn">>},
|
||||
#{sess_evict_rate => <<"sess">>},
|
||||
#{abs_conn_threshold => <<"act">>},
|
||||
#{rel_conn_threshold => <<"rct">>},
|
||||
#{abs_sess_threshold => <<"act">>},
|
||||
#{rel_sess_threshold => <<"rct">>},
|
||||
#{wait_takeover => <<"wait">>},
|
||||
#{wait_health_check => <<"wait">>},
|
||||
#{nodes => <<"nodes">>},
|
||||
#{nodes => []},
|
||||
#{nodes => [<<"bad_node">>]},
|
||||
#{nodes => [<<"bad_node">>, atom_to_binary(DonorNode)]},
|
||||
#{unknown => <<"Value">>}
|
||||
],
|
||||
lists:foreach(
|
||||
fun(Opts) ->
|
||||
?assertMatch(
|
||||
{ok, 400, #{}},
|
||||
api_post(
|
||||
["load_rebalance", atom_to_list(DonorNode), "start"],
|
||||
Opts
|
||||
)
|
||||
)
|
||||
end,
|
||||
BadOpts
|
||||
),
|
||||
?assertMatch(
|
||||
{ok, 404, #{}},
|
||||
api_post(
|
||||
["load_rebalance", "bad@node", "start"],
|
||||
#{}
|
||||
)
|
||||
),
|
||||
|
||||
Conns = emqtt_connect_many(DonorPort, 50),
|
||||
|
||||
?assertMatch(
|
||||
{ok, 200, #{}},
|
||||
api_post(
|
||||
["load_rebalance", atom_to_list(DonorNode), "start"],
|
||||
#{
|
||||
conn_evict_rate => 10,
|
||||
sess_evict_rate => 10,
|
||||
wait_takeover => 10,
|
||||
wait_health_check => 10,
|
||||
abs_conn_threshold => 10,
|
||||
rel_conn_threshold => 1.001,
|
||||
abs_sess_threshold => 10,
|
||||
rel_sess_threshold => 1.001,
|
||||
nodes => [
|
||||
atom_to_binary(DonorNode),
|
||||
atom_to_binary(RecipientNode)
|
||||
]
|
||||
}
|
||||
)
|
||||
),
|
||||
|
||||
DonorNodeBin = atom_to_binary(DonorNode),
|
||||
?assertMatch(
|
||||
{ok, 200, #{<<"rebalances">> := [#{<<"node">> := DonorNodeBin}]}},
|
||||
api_get(["load_rebalance", "global_status"])
|
||||
),
|
||||
|
||||
ok = stop_many(Conns).
|
||||
|
||||
t_start_stop_evacuation(Config) ->
|
||||
[{DonorNode, _}, {RecipientNode, _}] = ?config(cluster_nodes, Config),
|
||||
|
||||
StartOpts = maps:merge(
|
||||
emqx_node_rebalance_api:rebalance_evacuation_example(),
|
||||
#{migrate_to => [atom_to_binary(RecipientNode)]}
|
||||
),
|
||||
|
||||
?assertMatch(
|
||||
{ok, 200, #{}},
|
||||
api_post(
|
||||
["load_rebalance", atom_to_list(DonorNode), "evacuation", "start"],
|
||||
StartOpts
|
||||
)
|
||||
),
|
||||
|
||||
StatusResponse = api_get(["load_rebalance", "status"]),
|
||||
|
||||
?assertMatch(
|
||||
{ok, 200, _},
|
||||
StatusResponse
|
||||
),
|
||||
|
||||
{ok, 200, Status} = StatusResponse,
|
||||
|
||||
?assertMatch(
|
||||
#{
|
||||
process := evacuation,
|
||||
connection_eviction_rate := 100,
|
||||
session_eviction_rate := 100,
|
||||
connection_goal := 0,
|
||||
session_goal := 0,
|
||||
stats := #{
|
||||
initial_connected := _,
|
||||
current_connected := _,
|
||||
initial_sessions := _,
|
||||
current_sessions := _
|
||||
}
|
||||
},
|
||||
emqx_node_rebalance_api:translate(local_status_enabled, Status)
|
||||
),
|
||||
|
||||
DonorNodeBin = atom_to_binary(DonorNode),
|
||||
|
||||
GlobalStatusResponse = api_get(["load_rebalance", "global_status"]),
|
||||
|
||||
?assertMatch(
|
||||
{ok, 200, _},
|
||||
GlobalStatusResponse
|
||||
),
|
||||
|
||||
{ok, 200, GlobalStatus} = GlobalStatusResponse,
|
||||
|
||||
?assertMatch(
|
||||
#{
|
||||
rebalances := [],
|
||||
evacuations := [
|
||||
#{
|
||||
node := DonorNodeBin,
|
||||
connection_eviction_rate := 100,
|
||||
session_eviction_rate := 100,
|
||||
connection_goal := 0,
|
||||
session_goal := 0,
|
||||
stats := #{
|
||||
initial_connected := _,
|
||||
current_connected := _,
|
||||
initial_sessions := _,
|
||||
current_sessions := _
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
emqx_node_rebalance_api:translate(global_status, GlobalStatus)
|
||||
),
|
||||
|
||||
?assertMatch(
|
||||
{ok, 200, #{}},
|
||||
api_post(
|
||||
["load_rebalance", atom_to_list(DonorNode), "evacuation", "stop"],
|
||||
#{}
|
||||
)
|
||||
),
|
||||
|
||||
?assertMatch(
|
||||
{ok, 200, #{<<"status">> := <<"disabled">>}},
|
||||
api_get(["load_rebalance", "status"])
|
||||
),
|
||||
|
||||
?assertMatch(
|
||||
{ok, 200, #{<<"evacuations">> := [], <<"rebalances">> := []}},
|
||||
api_get(["load_rebalance", "global_status"])
|
||||
).
|
||||
|
||||
t_start_stop_rebalance(Config) ->
|
||||
process_flag(trap_exit, true),
|
||||
|
||||
[{DonorNode, DonorPort}, {RecipientNode, _}] = ?config(cluster_nodes, Config),
|
||||
|
||||
?assertMatch(
|
||||
{ok, 200, #{<<"status">> := <<"disabled">>}},
|
||||
api_get(["load_rebalance", "status"])
|
||||
),
|
||||
|
||||
Conns = emqtt_connect_many(DonorPort, 100),
|
||||
|
||||
StartOpts = maps:without(
|
||||
[nodes],
|
||||
emqx_node_rebalance_api:rebalance_example()
|
||||
),
|
||||
|
||||
?assertMatch(
|
||||
{ok, 200, #{}},
|
||||
api_post(
|
||||
["load_rebalance", atom_to_list(DonorNode), "start"],
|
||||
StartOpts
|
||||
)
|
||||
),
|
||||
|
||||
StatusResponse = api_get(["load_rebalance", "status"]),
|
||||
|
||||
?assertMatch(
|
||||
{ok, 200, _},
|
||||
StatusResponse
|
||||
),
|
||||
|
||||
{ok, 200, Status} = StatusResponse,
|
||||
|
||||
?assertMatch(
|
||||
#{process := rebalance, connection_eviction_rate := 10, session_eviction_rate := 20},
|
||||
emqx_node_rebalance_api:translate(local_status_enabled, Status)
|
||||
),
|
||||
|
||||
DonorNodeBin = atom_to_binary(DonorNode),
|
||||
RecipientNodeBin = atom_to_binary(RecipientNode),
|
||||
|
||||
GlobalStatusResponse = api_get(["load_rebalance", "global_status"]),
|
||||
|
||||
?assertMatch(
|
||||
{ok, 200, _},
|
||||
GlobalStatusResponse
|
||||
),
|
||||
|
||||
{ok, 200, GlobalStatus} = GlobalStatusResponse,
|
||||
|
||||
?assertMatch(
|
||||
{ok, 200, #{
|
||||
<<"evacuations">> := [],
|
||||
<<"rebalances">> :=
|
||||
[
|
||||
#{
|
||||
<<"state">> := _,
|
||||
<<"node">> := DonorNodeBin,
|
||||
<<"coordinator_node">> := _,
|
||||
<<"connection_eviction_rate">> := 10,
|
||||
<<"session_eviction_rate">> := 20,
|
||||
<<"donors">> := [DonorNodeBin],
|
||||
<<"recipients">> := [RecipientNodeBin]
|
||||
}
|
||||
]
|
||||
}},
|
||||
GlobalStatusResponse
|
||||
),
|
||||
|
||||
?assertMatch(
|
||||
#{
|
||||
evacuations := [],
|
||||
rebalances := [
|
||||
#{
|
||||
state := _,
|
||||
node := DonorNodeBin,
|
||||
coordinator_node := _,
|
||||
connection_eviction_rate := 10,
|
||||
session_eviction_rate := 20,
|
||||
donors := [DonorNodeBin],
|
||||
recipients := [RecipientNodeBin]
|
||||
}
|
||||
]
|
||||
},
|
||||
emqx_node_rebalance_api:translate(global_status, GlobalStatus)
|
||||
),
|
||||
|
||||
?assertMatch(
|
||||
{ok, 200, #{}},
|
||||
api_post(
|
||||
["load_rebalance", atom_to_list(DonorNode), "stop"],
|
||||
#{}
|
||||
)
|
||||
),
|
||||
|
||||
?assertMatch(
|
||||
{ok, 200, #{<<"status">> := <<"disabled">>}},
|
||||
api_get(["load_rebalance", "status"])
|
||||
),
|
||||
|
||||
?assertMatch(
|
||||
{ok, 200, #{<<"evacuations">> := [], <<"rebalances">> := []}},
|
||||
api_get(["load_rebalance", "global_status"])
|
||||
),
|
||||
|
||||
ok = stop_many(Conns).
|
||||
|
||||
t_availability_check(Config) ->
|
||||
[{DonorNode, _} | _] = ?config(cluster_nodes, Config),
|
||||
?assertMatch(
|
||||
{ok, 200, #{}},
|
||||
api_get(["load_rebalance", "availability_check"])
|
||||
),
|
||||
|
||||
ok = rpc:call(DonorNode, emqx_node_rebalance_evacuation, start, [#{}]),
|
||||
|
||||
?assertMatch(
|
||||
{ok, 503, _},
|
||||
api_get(["load_rebalance", "availability_check"])
|
||||
),
|
||||
|
||||
ok = rpc:call(DonorNode, emqx_node_rebalance_evacuation, stop, []),
|
||||
|
||||
?assertMatch(
|
||||
{ok, 200, #{}},
|
||||
api_get(["load_rebalance", "availability_check"])
|
||||
).
|
||||
|
||||
%%--------------------------------------------------------------------
|
||||
%% Helpers
|
||||
%%--------------------------------------------------------------------
|
||||
|
||||
api_get(Path) ->
|
||||
case request(get, uri(Path)) of
|
||||
{ok, Code, ResponseBody} ->
|
||||
{ok, Code, jiffy:decode(ResponseBody, [return_maps])};
|
||||
{error, _} = Error ->
|
||||
Error
|
||||
end.
|
||||
|
||||
api_post(Path, Data) ->
|
||||
case request(post, uri(Path), Data) of
|
||||
{ok, Code, ResponseBody} ->
|
||||
{ok, Code, jiffy:decode(ResponseBody, [return_maps])};
|
||||
{error, _} = Error ->
|
||||
Error
|
||||
end.
|
||||
|
||||
take_auth_header_from(Node) ->
|
||||
meck:new(emqx_common_test_http, [passthrough]),
|
||||
meck:expect(
|
||||
emqx_common_test_http,
|
||||
default_auth_header,
|
||||
fun() -> rpc:call(Node, emqx_common_test_http, default_auth_header, []) end
|
||||
),
|
||||
ok.
|
||||
|
||||
case_specific_data_dir(Case, Config) ->
|
||||
case ?config(priv_dir, Config) of
|
||||
undefined -> undefined;
|
||||
PrivDir -> filename:join(PrivDir, atom_to_list(Case))
|
||||
end.
|
|
@ -0,0 +1,291 @@
|
|||
%%--------------------------------------------------------------------
|
||||
%% Copyright (c) 2022-2023 EMQ Technologies Co., Ltd. All Rights Reserved.
|
||||
%%%--------------------------------------------------------------------
|
||||
|
||||
-module(emqx_node_rebalance_cli_SUITE).
|
||||
|
||||
-compile(export_all).
|
||||
-compile(nowarn_export_all).
|
||||
|
||||
-include_lib("eunit/include/eunit.hrl").
|
||||
-include_lib("common_test/include/ct.hrl").
|
||||
|
||||
-import(
|
||||
emqx_eviction_agent_test_helpers,
|
||||
[emqtt_connect_many/2, stop_many/1, case_specific_node_name/3]
|
||||
).
|
||||
|
||||
-define(START_APPS, [emqx_eviction_agent, emqx_node_rebalance]).
|
||||
|
||||
all() ->
|
||||
emqx_common_test_helpers:all(?MODULE).
|
||||
|
||||
init_per_suite(Config) ->
|
||||
emqx_common_test_helpers:start_apps(?START_APPS),
|
||||
Config.
|
||||
|
||||
end_per_suite(Config) ->
|
||||
emqx_common_test_helpers:stop_apps(lists:reverse(?START_APPS)),
|
||||
Config.
|
||||
|
||||
init_per_testcase(Case = t_rebalance, Config) ->
|
||||
_ = emqx_node_rebalance_evacuation:stop(),
|
||||
ClusterNodes = emqx_eviction_agent_test_helpers:start_cluster(
|
||||
[
|
||||
{case_specific_node_name(?MODULE, Case, '_donor'), 2883},
|
||||
{case_specific_node_name(?MODULE, Case, '_recipient'), 3883}
|
||||
],
|
||||
?START_APPS
|
||||
),
|
||||
[{cluster_nodes, ClusterNodes} | Config];
|
||||
init_per_testcase(_Case, Config) ->
|
||||
_ = emqx_node_rebalance_evacuation:stop(),
|
||||
_ = emqx_node_rebalance:stop(),
|
||||
Config.
|
||||
|
||||
end_per_testcase(t_rebalance, Config) ->
|
||||
_ = emqx_node_rebalance_evacuation:stop(),
|
||||
_ = emqx_node_rebalance:stop(),
|
||||
_ = emqx_eviction_agent_test_helpers:stop_cluster(
|
||||
?config(cluster_nodes, Config),
|
||||
?START_APPS
|
||||
);
|
||||
end_per_testcase(_Case, _Config) ->
|
||||
_ = emqx_node_rebalance_evacuation:stop(),
|
||||
_ = emqx_node_rebalance:stop().
|
||||
|
||||
%%--------------------------------------------------------------------
|
||||
%% Tests
|
||||
%%--------------------------------------------------------------------
|
||||
|
||||
t_evacuation(_Config) ->
|
||||
%% usage
|
||||
ok = emqx_node_rebalance_cli:cli(["foobar"]),
|
||||
|
||||
%% status
|
||||
ok = emqx_node_rebalance_cli:cli(["status"]),
|
||||
ok = emqx_node_rebalance_cli:cli(["node-status"]),
|
||||
ok = emqx_node_rebalance_cli:cli(["node-status", atom_to_list(node())]),
|
||||
|
||||
%% start with invalid args
|
||||
?assertNot(
|
||||
emqx_node_rebalance_cli:cli(["start", "--evacuation", "--foo-bar"])
|
||||
),
|
||||
|
||||
?assertNot(
|
||||
emqx_node_rebalance_cli:cli(["start", "--evacuation", "--conn-evict-rate", "foobar"])
|
||||
),
|
||||
|
||||
?assertNot(
|
||||
emqx_node_rebalance_cli:cli(["start", "--evacuation", "--sess-evict-rate", "foobar"])
|
||||
),
|
||||
|
||||
?assertNot(
|
||||
emqx_node_rebalance_cli:cli(["start", "--evacuation", "--wait-takeover", "foobar"])
|
||||
),
|
||||
|
||||
?assertNot(
|
||||
emqx_node_rebalance_cli:cli([
|
||||
"start",
|
||||
"--evacuation",
|
||||
"--migrate-to",
|
||||
"nonexistent@node"
|
||||
])
|
||||
),
|
||||
?assertNot(
|
||||
emqx_node_rebalance_cli:cli([
|
||||
"start",
|
||||
"--evacuation",
|
||||
"--migrate-to",
|
||||
""
|
||||
])
|
||||
),
|
||||
?assertNot(
|
||||
emqx_node_rebalance_cli:cli([
|
||||
"start",
|
||||
"--evacuation",
|
||||
"--unknown-arg"
|
||||
])
|
||||
),
|
||||
?assert(
|
||||
emqx_node_rebalance_cli:cli([
|
||||
"start",
|
||||
"--evacuation",
|
||||
"--conn-evict-rate",
|
||||
"10",
|
||||
"--sess-evict-rate",
|
||||
"10",
|
||||
"--wait-takeover",
|
||||
"10",
|
||||
"--migrate-to",
|
||||
atom_to_list(node()),
|
||||
"--redirect-to",
|
||||
"srv"
|
||||
])
|
||||
),
|
||||
|
||||
%% status
|
||||
ok = emqx_node_rebalance_cli:cli(["status"]),
|
||||
ok = emqx_node_rebalance_cli:cli(["node-status"]),
|
||||
ok = emqx_node_rebalance_cli:cli(["node-status", atom_to_list(node())]),
|
||||
|
||||
?assertMatch(
|
||||
{enabled, #{}},
|
||||
emqx_node_rebalance_evacuation:status()
|
||||
),
|
||||
|
||||
%% already enabled
|
||||
?assertNot(
|
||||
emqx_node_rebalance_cli:cli([
|
||||
"start",
|
||||
"--evacuation",
|
||||
"--conn-evict-rate",
|
||||
"10",
|
||||
"--redirect-to",
|
||||
"srv"
|
||||
])
|
||||
),
|
||||
|
||||
%% stop
|
||||
true = emqx_node_rebalance_cli:cli(["stop"]),
|
||||
|
||||
false = emqx_node_rebalance_cli:cli(["stop"]),
|
||||
|
||||
?assertEqual(
|
||||
disabled,
|
||||
emqx_node_rebalance_evacuation:status()
|
||||
).
|
||||
|
||||
t_rebalance(Config) ->
|
||||
process_flag(trap_exit, true),
|
||||
|
||||
[{DonorNode, DonorPort}, {RecipientNode, _}] = ?config(cluster_nodes, Config),
|
||||
|
||||
%% start with invalid args
|
||||
?assertNot(
|
||||
emqx_node_rebalance_cli(DonorNode, ["start", "--foo-bar"])
|
||||
),
|
||||
|
||||
?assertNot(
|
||||
emqx_node_rebalance_cli(DonorNode, ["start", "--conn-evict-rate", "foobar"])
|
||||
),
|
||||
|
||||
?assertNot(
|
||||
emqx_node_rebalance_cli(DonorNode, ["start", "--abs-conn-threshold", "foobar"])
|
||||
),
|
||||
|
||||
?assertNot(
|
||||
emqx_node_rebalance_cli(DonorNode, ["start", "--rel-conn-threshold", "foobar"])
|
||||
),
|
||||
|
||||
?assertNot(
|
||||
emqx_node_rebalance_cli(DonorNode, ["start", "--sess-evict-rate", "foobar"])
|
||||
),
|
||||
|
||||
?assertNot(
|
||||
emqx_node_rebalance_cli(DonorNode, ["start", "--abs-sess-threshold", "foobar"])
|
||||
),
|
||||
|
||||
?assertNot(
|
||||
emqx_node_rebalance_cli(DonorNode, ["start", "--rel-sess-threshold", "foobar"])
|
||||
),
|
||||
|
||||
?assertNot(
|
||||
emqx_node_rebalance_cli(DonorNode, ["start", "--wait-takeover", "foobar"])
|
||||
),
|
||||
|
||||
?assertNot(
|
||||
emqx_node_rebalance_cli(DonorNode, ["start", "--wait-health-check", "foobar"])
|
||||
),
|
||||
|
||||
?assertNot(
|
||||
emqx_node_rebalance_cli(DonorNode, [
|
||||
"start",
|
||||
"--nodes",
|
||||
"nonexistent@node"
|
||||
])
|
||||
),
|
||||
?assertNot(
|
||||
emqx_node_rebalance_cli(DonorNode, [
|
||||
"start",
|
||||
"--nodes",
|
||||
""
|
||||
])
|
||||
),
|
||||
?assertNot(
|
||||
emqx_node_rebalance_cli(DonorNode, [
|
||||
"start",
|
||||
"--nodes",
|
||||
atom_to_list(RecipientNode)
|
||||
])
|
||||
),
|
||||
?assertNot(
|
||||
emqx_node_rebalance_cli(DonorNode, [
|
||||
"start",
|
||||
"--unknown-arg"
|
||||
])
|
||||
),
|
||||
|
||||
Conns = emqtt_connect_many(DonorPort, 20),
|
||||
|
||||
?assert(
|
||||
emqx_node_rebalance_cli(DonorNode, [
|
||||
"start",
|
||||
"--conn-evict-rate",
|
||||
"10",
|
||||
"--abs-conn-threshold",
|
||||
"10",
|
||||
"--rel-conn-threshold",
|
||||
"1.1",
|
||||
"--sess-evict-rate",
|
||||
"10",
|
||||
"--abs-sess-threshold",
|
||||
"10",
|
||||
"--rel-sess-threshold",
|
||||
"1.1",
|
||||
"--wait-takeover",
|
||||
"10",
|
||||
"--nodes",
|
||||
atom_to_list(DonorNode) ++ "," ++
|
||||
atom_to_list(RecipientNode)
|
||||
])
|
||||
),
|
||||
|
||||
%% status
|
||||
ok = emqx_node_rebalance_cli(DonorNode, ["status"]),
|
||||
ok = emqx_node_rebalance_cli(DonorNode, ["node-status"]),
|
||||
ok = emqx_node_rebalance_cli(DonorNode, ["node-status", atom_to_list(DonorNode)]),
|
||||
|
||||
?assertMatch(
|
||||
{enabled, #{}},
|
||||
rpc:call(DonorNode, emqx_node_rebalance, status, [])
|
||||
),
|
||||
|
||||
%% already enabled
|
||||
?assertNot(
|
||||
emqx_node_rebalance_cli(DonorNode, ["start"])
|
||||
),
|
||||
|
||||
%% stop
|
||||
true = emqx_node_rebalance_cli(DonorNode, ["stop"]),
|
||||
|
||||
false = emqx_node_rebalance_cli(DonorNode, ["stop"]),
|
||||
|
||||
?assertEqual(
|
||||
disabled,
|
||||
rpc:call(DonorNode, emqx_node_rebalance, status, [])
|
||||
),
|
||||
|
||||
ok = stop_many(Conns).
|
||||
|
||||
%%--------------------------------------------------------------------
|
||||
%% Helpers
|
||||
%%--------------------------------------------------------------------
|
||||
|
||||
emqx_node_rebalance_cli(Node, Args) ->
|
||||
case rpc:call(Node, emqx_node_rebalance_cli, cli, [Args]) of
|
||||
{badrpc, Reason} ->
|
||||
error(Reason);
|
||||
Result ->
|
||||
Result
|
||||
end.
|
|
@ -0,0 +1,270 @@
|
|||
%%--------------------------------------------------------------------
|
||||
%% Copyright (c) 2022-2023 EMQ Technologies Co., Ltd. All Rights Reserved.
|
||||
%%--------------------------------------------------------------------
|
||||
|
||||
-module(emqx_node_rebalance_evacuation_SUITE).
|
||||
|
||||
-compile(export_all).
|
||||
-compile(nowarn_export_all).
|
||||
|
||||
-include_lib("emqx/include/emqx_mqtt.hrl").
|
||||
-include_lib("emqx/include/asserts.hrl").
|
||||
-include_lib("eunit/include/eunit.hrl").
|
||||
-include_lib("common_test/include/ct.hrl").
|
||||
-include_lib("snabbkaffe/include/snabbkaffe.hrl").
|
||||
|
||||
-import(
|
||||
emqx_eviction_agent_test_helpers,
|
||||
[emqtt_connect/1, emqtt_try_connect/1, case_specific_node_name/3]
|
||||
).
|
||||
|
||||
all() -> [{group, one_node}, {group, two_node}].
|
||||
|
||||
groups() ->
|
||||
[
|
||||
{one_node, [], one_node_cases()},
|
||||
{two_node, [], two_node_cases()}
|
||||
].
|
||||
|
||||
two_node_cases() ->
|
||||
[
|
||||
t_conn_evicted,
|
||||
t_migrate_to,
|
||||
t_session_evicted
|
||||
].
|
||||
|
||||
one_node_cases() ->
|
||||
emqx_common_test_helpers:all(?MODULE) -- two_node_cases().
|
||||
|
||||
init_per_suite(Config) ->
|
||||
ok = emqx_common_test_helpers:start_apps([]),
|
||||
Config.
|
||||
|
||||
end_per_suite(_Config) ->
|
||||
ok = emqx_common_test_helpers:stop_apps([]),
|
||||
ok.
|
||||
|
||||
init_per_group(one_node, Config) ->
|
||||
[{cluster_type, one_node} | Config];
|
||||
init_per_group(two_node, Config) ->
|
||||
[{cluster_type, two_node} | Config].
|
||||
|
||||
end_per_group(_Group, _Config) ->
|
||||
ok.
|
||||
|
||||
init_per_testcase(Case, Config) ->
|
||||
NodesWithPorts =
|
||||
case ?config(cluster_type, Config) of
|
||||
one_node ->
|
||||
[{case_specific_node_name(?MODULE, Case, '_evacuated'), 2883}];
|
||||
two_node ->
|
||||
[
|
||||
{case_specific_node_name(?MODULE, Case, '_evacuated'), 2883},
|
||||
{case_specific_node_name(?MODULE, Case, '_recipient'), 3883}
|
||||
]
|
||||
end,
|
||||
ClusterNodes = emqx_eviction_agent_test_helpers:start_cluster(
|
||||
NodesWithPorts,
|
||||
[emqx_eviction_agent, emqx_node_rebalance],
|
||||
[{emqx, data_dir, case_specific_data_dir(Case, Config)}]
|
||||
),
|
||||
ok = snabbkaffe:start_trace(),
|
||||
[{cluster_nodes, ClusterNodes} | Config].
|
||||
|
||||
end_per_testcase(_Case, Config) ->
|
||||
ok = snabbkaffe:stop(),
|
||||
ok = emqx_eviction_agent_test_helpers:stop_cluster(
|
||||
?config(cluster_nodes, Config),
|
||||
[emqx_eviction_agent, emqx_node_rebalance]
|
||||
).
|
||||
|
||||
%%--------------------------------------------------------------------
|
||||
%% Tests
|
||||
%%--------------------------------------------------------------------
|
||||
|
||||
%% One node tests
|
||||
|
||||
t_agent_busy(Config) ->
|
||||
[{DonorNode, _DonorPort}] = ?config(cluster_nodes, Config),
|
||||
ok = rpc:call(DonorNode, emqx_eviction_agent, enable, [other_rebalance, undefined]),
|
||||
|
||||
?assertEqual(
|
||||
{error, eviction_agent_busy},
|
||||
rpc:call(DonorNode, emqx_node_rebalance_evacuation, start, [opts(Config)])
|
||||
).
|
||||
|
||||
t_already_started(Config) ->
|
||||
[{DonorNode, _DonorPort}] = ?config(cluster_nodes, Config),
|
||||
ok = rpc:call(DonorNode, emqx_node_rebalance_evacuation, start, [opts(Config)]),
|
||||
|
||||
?assertEqual(
|
||||
{error, already_started},
|
||||
rpc:call(DonorNode, emqx_node_rebalance_evacuation, start, [opts(Config)])
|
||||
).
|
||||
|
||||
t_not_started(Config) ->
|
||||
[{DonorNode, _DonorPort}] = ?config(cluster_nodes, Config),
|
||||
|
||||
?assertEqual(
|
||||
{error, not_started},
|
||||
rpc:call(DonorNode, emqx_node_rebalance_evacuation, stop, [])
|
||||
).
|
||||
|
||||
t_start(Config) ->
|
||||
process_flag(trap_exit, true),
|
||||
|
||||
[{DonorNode, DonorPort}] = ?config(cluster_nodes, Config),
|
||||
|
||||
ok = rpc:call(DonorNode, emqx_node_rebalance_evacuation, start, [opts(Config)]),
|
||||
?assertMatch(
|
||||
{error, {use_another_server, #{}}},
|
||||
emqtt_try_connect([{port, DonorPort}])
|
||||
).
|
||||
|
||||
t_persistence(Config) ->
|
||||
process_flag(trap_exit, true),
|
||||
|
||||
[{DonorNode, DonorPort}] = ?config(cluster_nodes, Config),
|
||||
|
||||
ok = rpc:call(DonorNode, emqx_node_rebalance_evacuation, start, [opts(Config)]),
|
||||
|
||||
?assertMatch(
|
||||
{error, {use_another_server, #{}}},
|
||||
emqtt_try_connect([{port, DonorPort}])
|
||||
),
|
||||
|
||||
ok = rpc:call(DonorNode, supervisor, terminate_child, [
|
||||
emqx_node_rebalance_sup, emqx_node_rebalance_evacuation
|
||||
]),
|
||||
{ok, _} = rpc:call(DonorNode, supervisor, restart_child, [
|
||||
emqx_node_rebalance_sup, emqx_node_rebalance_evacuation
|
||||
]),
|
||||
|
||||
?assertMatch(
|
||||
{error, {use_another_server, #{}}},
|
||||
emqtt_try_connect([{port, DonorPort}])
|
||||
),
|
||||
?assertMatch(
|
||||
{enabled, #{conn_evict_rate := 10}},
|
||||
rpc:call(DonorNode, emqx_node_rebalance_evacuation, status, [])
|
||||
).
|
||||
|
||||
t_unknown_messages(Config) ->
|
||||
process_flag(trap_exit, true),
|
||||
|
||||
[{DonorNode, _DonorPort}] = ?config(cluster_nodes, Config),
|
||||
|
||||
ok = rpc:call(DonorNode, emqx_node_rebalance_evacuation, start, [opts(Config)]),
|
||||
|
||||
Pid = rpc:call(DonorNode, erlang, whereis, [emqx_node_rebalance_evacuation]),
|
||||
|
||||
Pid ! unknown,
|
||||
|
||||
ok = gen_server:cast(Pid, unknown),
|
||||
|
||||
?assertEqual(
|
||||
ignored,
|
||||
gen_server:call(Pid, unknown)
|
||||
).
|
||||
|
||||
%% Two node tests
|
||||
|
||||
t_conn_evicted(Config) ->
|
||||
process_flag(trap_exit, true),
|
||||
|
||||
[{DonorNode, DonorPort}, _] = ?config(cluster_nodes, Config),
|
||||
|
||||
{ok, C} = emqtt_connect([{clientid, <<"evacuated">>}, {port, DonorPort}]),
|
||||
|
||||
?assertWaitEvent(
|
||||
ok = rpc:call(DonorNode, emqx_node_rebalance_evacuation, start, [opts(Config)]),
|
||||
#{?snk_kind := node_evacuation_evict_conn},
|
||||
1000
|
||||
),
|
||||
|
||||
?assertMatch(
|
||||
{error, {use_another_server, #{}}},
|
||||
emqtt_try_connect([{clientid, <<"connecting">>}, {port, DonorPort}])
|
||||
),
|
||||
|
||||
receive
|
||||
{'EXIT', C, {disconnected, 156, _}} -> ok
|
||||
after 1000 ->
|
||||
ct:fail("Connection not evicted")
|
||||
end.
|
||||
|
||||
t_migrate_to(Config) ->
|
||||
[{DonorNode, _DonorPort}, {RecipientNode, _RecipientPort}] = ?config(cluster_nodes, Config),
|
||||
|
||||
?assertEqual(
|
||||
[RecipientNode],
|
||||
rpc:call(DonorNode, emqx_node_rebalance_evacuation, migrate_to, [undefined])
|
||||
),
|
||||
|
||||
?assertEqual(
|
||||
[],
|
||||
rpc:call(DonorNode, emqx_node_rebalance_evacuation, migrate_to, [['unknown@node']])
|
||||
),
|
||||
|
||||
ok = rpc:call(RecipientNode, emqx_eviction_agent, enable, [test_rebalance, undefined]),
|
||||
|
||||
?assertEqual(
|
||||
[],
|
||||
rpc:call(DonorNode, emqx_node_rebalance_evacuation, migrate_to, [undefined])
|
||||
).
|
||||
|
||||
t_session_evicted(Config) ->
|
||||
process_flag(trap_exit, true),
|
||||
|
||||
[{DonorNode, DonorPort}, {RecipientNode, _RecipientPort}] = ?config(cluster_nodes, Config),
|
||||
|
||||
{ok, C} = emqtt_connect([
|
||||
{port, DonorPort}, {clientid, <<"client_with_sess">>}, {clean_start, false}
|
||||
]),
|
||||
|
||||
?assertWaitEvent(
|
||||
ok = rpc:call(DonorNode, emqx_node_rebalance_evacuation, start, [opts(Config)]),
|
||||
#{?snk_kind := node_evacuation_evict_sess_over},
|
||||
5000
|
||||
),
|
||||
|
||||
receive
|
||||
{'EXIT', C, {disconnected, ?RC_USE_ANOTHER_SERVER, _}} -> ok
|
||||
after 1000 ->
|
||||
ct:fail("Connection not evicted")
|
||||
end,
|
||||
|
||||
[ChannelPid] = rpc:call(DonorNode, emqx_cm_registry, lookup_channels, [<<"client_with_sess">>]),
|
||||
|
||||
?assertEqual(
|
||||
RecipientNode,
|
||||
node(ChannelPid)
|
||||
).
|
||||
|
||||
%%--------------------------------------------------------------------
|
||||
%% Helpers
|
||||
%%--------------------------------------------------------------------
|
||||
|
||||
opts(Config) ->
|
||||
#{
|
||||
server_reference => <<"srv">>,
|
||||
conn_evict_rate => 10,
|
||||
sess_evict_rate => 10,
|
||||
wait_takeover => 1,
|
||||
migrate_to => migrate_to(Config)
|
||||
}.
|
||||
|
||||
migrate_to(Config) ->
|
||||
case ?config(cluster_type, Config) of
|
||||
one_node ->
|
||||
[];
|
||||
two_node ->
|
||||
[_, {RecipientNode, _RecipientPort}] = ?config(cluster_nodes, Config),
|
||||
[RecipientNode]
|
||||
end.
|
||||
|
||||
case_specific_data_dir(Case, Config) ->
|
||||
case ?config(priv_dir, Config) of
|
||||
undefined -> undefined;
|
||||
PrivDir -> filename:join(PrivDir, atom_to_list(Case))
|
||||
end.
|
|
@ -0,0 +1,108 @@
|
|||
%%--------------------------------------------------------------------
|
||||
%% Copyright (c) 2022-2023 EMQ Technologies Co., Ltd. All Rights Reserved.
|
||||
%%--------------------------------------------------------------------
|
||||
|
||||
-module(emqx_node_rebalance_evacuation_persist_SUITE).
|
||||
|
||||
-compile(export_all).
|
||||
-compile(nowarn_export_all).
|
||||
|
||||
-include_lib("eunit/include/eunit.hrl").
|
||||
-include_lib("common_test/include/ct.hrl").
|
||||
|
||||
all() ->
|
||||
emqx_common_test_helpers:all(?MODULE).
|
||||
|
||||
init_per_suite(Config) ->
|
||||
Config.
|
||||
|
||||
end_per_suite(_Config) ->
|
||||
ok.
|
||||
|
||||
init_per_testcase(_Case, Config) ->
|
||||
_ = emqx_node_rebalance_evacuation_persist:clear(),
|
||||
Config.
|
||||
|
||||
end_per_testcase(_Case, _Config) ->
|
||||
_ = emqx_node_rebalance_evacuation_persist:clear().
|
||||
|
||||
%%--------------------------------------------------------------------
|
||||
%% Tests
|
||||
%%--------------------------------------------------------------------
|
||||
|
||||
t_save_read(_Config) ->
|
||||
DefaultOpts = #{
|
||||
server_reference => <<"default_ref">>,
|
||||
conn_evict_rate => 2001,
|
||||
sess_evict_rate => 2002,
|
||||
wait_takeover => 2003
|
||||
},
|
||||
|
||||
Opts0 = #{
|
||||
server_reference => <<"ref">>,
|
||||
conn_evict_rate => 1001,
|
||||
sess_evict_rate => 1002,
|
||||
wait_takeover => 1003
|
||||
},
|
||||
ok = emqx_node_rebalance_evacuation_persist:save(Opts0),
|
||||
|
||||
{ok, ReadOpts0} = emqx_node_rebalance_evacuation_persist:read(DefaultOpts),
|
||||
?assertEqual(Opts0, ReadOpts0),
|
||||
|
||||
Opts1 = Opts0#{server_reference => undefined},
|
||||
ok = emqx_node_rebalance_evacuation_persist:save(Opts1),
|
||||
|
||||
{ok, ReadOpts1} = emqx_node_rebalance_evacuation_persist:read(DefaultOpts),
|
||||
?assertEqual(Opts1, ReadOpts1).
|
||||
|
||||
t_read_default(_Config) ->
|
||||
ok = write_evacuation_file(<<"{}">>),
|
||||
|
||||
DefaultOpts = #{
|
||||
server_reference => <<"ref">>,
|
||||
conn_evict_rate => 1001,
|
||||
sess_evict_rate => 1002,
|
||||
wait_takeover => 1003
|
||||
},
|
||||
|
||||
{ok, ReadOpts} = emqx_node_rebalance_evacuation_persist:read(DefaultOpts),
|
||||
?assertEqual(DefaultOpts, ReadOpts).
|
||||
|
||||
t_read_bad_data(_Config) ->
|
||||
ok = write_evacuation_file(<<"{bad json">>),
|
||||
|
||||
DefaultOpts = #{
|
||||
server_reference => <<"ref">>,
|
||||
conn_evict_rate => 1001,
|
||||
sess_evict_rate => 1002,
|
||||
wait_takeover => 1003
|
||||
},
|
||||
|
||||
{ok, ReadOpts} = emqx_node_rebalance_evacuation_persist:read(DefaultOpts),
|
||||
?assertEqual(DefaultOpts, ReadOpts).
|
||||
|
||||
t_clear(_Config) ->
|
||||
ok = write_evacuation_file(<<"{}">>),
|
||||
|
||||
?assertMatch(
|
||||
{ok, _},
|
||||
emqx_node_rebalance_evacuation_persist:read(#{})
|
||||
),
|
||||
|
||||
ok = emqx_node_rebalance_evacuation_persist:clear(),
|
||||
|
||||
?assertEqual(
|
||||
none,
|
||||
emqx_node_rebalance_evacuation_persist:read(#{})
|
||||
).
|
||||
|
||||
%%--------------------------------------------------------------------
|
||||
%% Helpers
|
||||
%%--------------------------------------------------------------------
|
||||
|
||||
write_evacuation_file(Json) ->
|
||||
ok = filelib:ensure_dir(emqx_node_rebalance_evacuation_persist:evacuation_filepath()),
|
||||
ok = file:write_file(
|
||||
emqx_node_rebalance_evacuation_persist:evacuation_filepath(),
|
||||
Json
|
||||
).
|
|
@ -9,18 +9,7 @@
|
|||
|
||||
-include_lib("eunit/include/eunit.hrl").
|
||||
-include_lib("common_test/include/ct.hrl").
|
||||
-include_lib("snabbkaffe/include/snabbkaffe.hrl").
|
||||
|
||||
-define(assertWaitEvent(Code, EventMatch, Timeout),
|
||||
?assertMatch(
|
||||
{_, {ok, EventMatch}},
|
||||
?wait_async_action(
|
||||
Code,
|
||||
EventMatch,
|
||||
Timeout
|
||||
)
|
||||
)
|
||||
).
|
||||
-include_lib("emqx/include/asserts.hrl").
|
||||
|
||||
all() -> emqx_common_test_helpers:all(?MODULE).
|
||||
|
||||
|
|
|
@ -72,4 +72,6 @@ is_running_node(Node) ->
|
|||
handle_result({ok, Result}) ->
|
||||
?OK(Result);
|
||||
handle_result({error, Reason}) ->
|
||||
?BAD_REQUEST(Reason).
|
||||
?BAD_REQUEST(Reason);
|
||||
handle_result({HTTPCode, Content}) when is_integer(HTTPCode) ->
|
||||
{HTTPCode, Content}.
|
||||
|
|
|
@ -0,0 +1,2 @@
|
|||
Add node rebalance/node evacuation functionality.
|
||||
See also: [design doc](https://github.com/emqx/eip/blob/main/active/0020-node-rebalance.md)
|
2
mix.exs
2
mix.exs
|
@ -412,6 +412,8 @@ defmodule EMQXUmbrella.MixProject do
|
|||
emqx_bridge_oracle: :permanent,
|
||||
emqx_bridge_rabbitmq: :permanent,
|
||||
emqx_ee_schema_registry: :permanent,
|
||||
emqx_eviction_agent: :permanent,
|
||||
emqx_node_rebalance: :permanent,
|
||||
emqx_ft: :permanent
|
||||
],
|
||||
else: []
|
||||
|
|
|
@ -481,6 +481,8 @@ relx_apps_per_edition(ee) ->
|
|||
emqx_bridge_oracle,
|
||||
emqx_bridge_rabbitmq,
|
||||
emqx_ee_schema_registry,
|
||||
emqx_eviction_agent,
|
||||
emqx_node_rebalance,
|
||||
emqx_ft
|
||||
];
|
||||
relx_apps_per_edition(ce) ->
|
||||
|
|
|
@ -0,0 +1,9 @@
|
|||
emqx_eviction_agent_api {
|
||||
|
||||
node_eviction_status_get.desc:
|
||||
"""Get the node eviction status"""
|
||||
|
||||
node_eviction_status_get.label:
|
||||
"""Node Eviction Status"""
|
||||
|
||||
}
|
|
@ -0,0 +1,267 @@
|
|||
emqx_node_rebalance_api {
|
||||
|
||||
load_rebalance_status.desc:
|
||||
"""Get rebalance status of the current node"""
|
||||
|
||||
load_rebalance_status.label:
|
||||
"""Get rebalance status"""
|
||||
|
||||
load_rebalance_global_status.desc:
|
||||
"""Get status of all rebalance/evacuation processes across the cluster"""
|
||||
|
||||
load_rebalance_global_status.label:
|
||||
"""Get global rebalance status"""
|
||||
|
||||
load_rebalance_availability_check.desc:
|
||||
"""Check if the node is being evacuated or rebalanced"""
|
||||
|
||||
load_rebalance_availability_check.label:
|
||||
"""Availability check"""
|
||||
|
||||
load_rebalance_start.desc:
|
||||
"""Start rebalance process"""
|
||||
|
||||
load_rebalance_start.label:
|
||||
"""Start rebalance"""
|
||||
|
||||
load_rebalance_stop.desc:
|
||||
"""Stop rebalance process"""
|
||||
|
||||
load_rebalance_stop.label:
|
||||
"""Stop rebalance"""
|
||||
|
||||
load_rebalance_evacuation_start.desc:
|
||||
"""Start evacuation process"""
|
||||
|
||||
load_rebalance_evacuation_start.label:
|
||||
"""Start evacuation"""
|
||||
|
||||
load_rebalance_evacuation_stop.desc:
|
||||
"""Stop evacuation process"""
|
||||
|
||||
load_rebalance_evacuation_stop.label:
|
||||
"""Stop evacuation"""
|
||||
|
||||
param_node.desc:
|
||||
"""Node name"""
|
||||
|
||||
param_node.label:
|
||||
"""Node name"""
|
||||
|
||||
wait_health_check.desc:
|
||||
"""Time to wait before starting the rebalance process, in seconds"""
|
||||
|
||||
wait_health_check.label:
|
||||
"""Wait health check"""
|
||||
|
||||
conn_evict_rate.desc:
|
||||
"""The rate of evicting connections, in connections per second"""
|
||||
|
||||
conn_evict_rate.label:
|
||||
"""Connection eviction rate"""
|
||||
|
||||
sess_evict_rate.desc:
|
||||
"""The rate of evicting sessions, in sessions per second"""
|
||||
|
||||
sess_evict_rate.label:
|
||||
"""Session eviction rate"""
|
||||
|
||||
abs_conn_threshold.desc:
|
||||
"""Maximum desired difference between the number of connections on the node and the average number of connections on the recipient nodes. Difference lower than this is the goal of the rebalance process."""
|
||||
|
||||
abs_conn_threshold.label:
|
||||
"""Absolute connection threshold"""
|
||||
|
||||
rel_conn_threshold.desc:
|
||||
"""Maximum desired fraction between the number of connections on the node and the average number of connections on the recipient nodes. Fraction lower than this is the goal of the rebalance process."""
|
||||
|
||||
rel_conn_threshold.label:
|
||||
"""Relative connection threshold"""
|
||||
|
||||
abs_sess_threshold.desc:
|
||||
"""Maximum desired difference between the number of sessions on the node and the average number of sessions on the recipient nodes. Difference lower than this is the goal of the evacuation process."""
|
||||
|
||||
abs_sess_threshold.label:
|
||||
"""Absolute session threshold"""
|
||||
|
||||
rel_sess_threshold.desc:
|
||||
"""Maximum desired fraction between the number of sessions on the node and the average number of sessions on the recipient nodes. Fraction lower than this is the goal of the evacuation process"""
|
||||
|
||||
rel_sess_threshold.label:
|
||||
"""Relative session threshold"""
|
||||
|
||||
wait_takeover.desc:
|
||||
"""Time to wait before starting session evacuation process, in seconds"""
|
||||
|
||||
wait_takeover.label:
|
||||
"""Wait takeover"""
|
||||
|
||||
redirect_to.desc:
|
||||
"""Server reference to redirect clients to (MQTTv5 Server redirection)"""
|
||||
|
||||
redirect_to.label:
|
||||
"""Redirect to"""
|
||||
|
||||
migrate_to.desc:
|
||||
"""Nodes to migrate sessions to"""
|
||||
|
||||
migrate_to.label:
|
||||
"""Migrate to"""
|
||||
|
||||
rebalance_nodes.desc:
|
||||
"""Nodes to participate in rebalance"""
|
||||
|
||||
rebalance_nodes.label:
|
||||
"""Rebalance nodes"""
|
||||
|
||||
local_status_enabled.desc:
|
||||
"""Whether the node is being evacuated"""
|
||||
|
||||
local_status_enabled.label:
|
||||
"""Local evacuation status"""
|
||||
|
||||
local_status_process.desc:
|
||||
"""The type of the task that is being performed on the node: 'evacuation' or 'rebalance'"""
|
||||
|
||||
local_status_process.label:
|
||||
"""Task Type"""
|
||||
|
||||
local_status_state.desc:
|
||||
"""The state of the process that is being performed on the node"""
|
||||
|
||||
local_status_state.label:
|
||||
"""Rebalance/evacuation current state"""
|
||||
|
||||
local_status_coordinator_node.desc:
|
||||
"""The node that is coordinating rebalance process"""
|
||||
|
||||
local_status_coordinator_node.label:
|
||||
"""Coordinator node"""
|
||||
|
||||
local_status_connection_eviction_rate.desc:
|
||||
"""The rate of evicting connections, in connections per second"""
|
||||
|
||||
local_status_connection_eviction_rate.label:
|
||||
"""Connection eviction rate"""
|
||||
|
||||
local_status_session_eviction_rate.desc:
|
||||
"""The rate of evicting sessions, in sessions per second"""
|
||||
|
||||
local_status_session_eviction_rate.label:
|
||||
"""Session eviction rate"""
|
||||
|
||||
local_status_connection_goal.desc:
|
||||
"""The number of connections that the node should have after the rebalance/evacuation process"""
|
||||
|
||||
local_status_connection_goal.label:
|
||||
"""Connection goal"""
|
||||
|
||||
local_status_session_goal.desc:
|
||||
"""The number of sessions that the node should have after the evacuation process"""
|
||||
|
||||
local_status_session_goal.label:
|
||||
"""Session goal"""
|
||||
|
||||
local_status_disconnected_session_goal.desc:
|
||||
"""The number of disconnected sessions that the node should have after the rebalance process"""
|
||||
|
||||
local_status_disconnected_session_goal.label:
|
||||
"""Disconnected session goal"""
|
||||
|
||||
local_status_session_recipients.desc:
|
||||
"""List of nodes to which sessions are being evacuated"""
|
||||
|
||||
local_status_session_recipients.label:
|
||||
"""Session recipients"""
|
||||
|
||||
local_status_recipients.desc:
|
||||
"""List of nodes to which connections/sessions are being evacuated during rebalance"""
|
||||
|
||||
local_status_recipients.label:
|
||||
"""Recipients"""
|
||||
|
||||
local_status_stats.desc:
|
||||
"""Statistics of the evacuation/rebalance process"""
|
||||
|
||||
local_status_stats.label:
|
||||
"""Statistics"""
|
||||
|
||||
status_stats_initial_connected.desc:
|
||||
"""The number of connections on the node before the evacuation/rebalance process"""
|
||||
|
||||
status_stats_initial_connected.label:
|
||||
"""Initial connected"""
|
||||
|
||||
status_stats_current_connected.desc:
|
||||
"""Current number of connections on the node"""
|
||||
|
||||
status_stats_current_connected.label:
|
||||
"""Current connections"""
|
||||
|
||||
status_stats_initial_sessions.desc:
|
||||
"""The number of sessions on the node before the evacuation/rebalance process"""
|
||||
|
||||
status_stats_initial_sessions.label:
|
||||
"""Initial sessions"""
|
||||
|
||||
status_stats_current_sessions.desc:
|
||||
"""Current number of sessions on the node"""
|
||||
|
||||
status_stats_current_sessions.label:
|
||||
"""Current sessions"""
|
||||
|
||||
status_stats_current_disconnected_sessions.desc:
|
||||
"""Current number of disconnected sessions on the node"""
|
||||
|
||||
status_stats_current_disconnected_sessions.label:
|
||||
"""Current disconnected sessions"""
|
||||
|
||||
coordinator_status_donors.desc:
|
||||
"""List of nodes from which connections/sessions are being evacuated"""
|
||||
|
||||
coordinator_status_donors.label:
|
||||
"""Donors"""
|
||||
|
||||
coordinator_status_donor_conn_avg.desc:
|
||||
"""Average number of connections per donor node"""
|
||||
|
||||
coordinator_status_donor_conn_avg.label:
|
||||
"""Donor connections average"""
|
||||
|
||||
coordinator_status_donor_sess_avg.desc:
|
||||
"""Average number of sessions per donor node"""
|
||||
|
||||
coordinator_status_donor_sess_avg.label:
|
||||
"""Donor sessions average"""
|
||||
|
||||
coordinator_status_node.desc:
|
||||
"""The node that is coordinating the evacuation/rebalance process"""
|
||||
|
||||
coordinator_status_node.label:
|
||||
"""Coordinator node"""
|
||||
|
||||
evacuation_status_node.desc:
|
||||
"""The node that is being evacuated"""
|
||||
|
||||
evacuation_status_node.label:
|
||||
"""Evacuated node"""
|
||||
|
||||
global_status_evacuations.desc:
|
||||
"""List of nodes that are being evacuated"""
|
||||
|
||||
global_status_evacuations.label:
|
||||
"""Evacuations"""
|
||||
|
||||
global_status_rebalances.desc:
|
||||
"""List of nodes that coordinate a rebalance"""
|
||||
|
||||
global_status_rebalances.label:
|
||||
"""Rebalances"""
|
||||
|
||||
empty_response.desc:
|
||||
"""The response is empty"""
|
||||
|
||||
empty_response.label:
|
||||
"""Empty response"""
|
||||
|
||||
}
|
|
@ -0,0 +1,9 @@
|
|||
emqx_eviction_agent_api {
|
||||
|
||||
node_eviction_status_get.desc:
|
||||
"""获取节点驱逐状态"""
|
||||
|
||||
node_eviction_status_get.label:
|
||||
"""节点驱逐状态"""
|
||||
|
||||
}
|
|
@ -0,0 +1,266 @@
|
|||
emqx_node_rebalance_api {
|
||||
|
||||
load_rebalance_status.desc:
|
||||
"""获取当前节点的重平衡状态"""
|
||||
|
||||
load_rebalance_status.label:
|
||||
"""获取重平衡状态"""
|
||||
|
||||
load_rebalance_global_status.desc:
|
||||
"""获取集群中所有重平衡/疏散任务的状态"""
|
||||
|
||||
load_rebalance_global_status.label:
|
||||
"""获取全局重平衡状态"""
|
||||
|
||||
load_rebalance_availability_check.desc:
|
||||
"""检查节点是否正在被执行重平衡或疏散"""
|
||||
|
||||
load_rebalance_availability_check.label:
|
||||
"""可用性检查"""
|
||||
|
||||
load_rebalance_start.desc:
|
||||
"""启动重平衡任务"""
|
||||
|
||||
load_rebalance_start.label:
|
||||
"""启动重平衡"""
|
||||
|
||||
load_rebalance_stop.desc:
|
||||
"""停止重平衡任务"""
|
||||
|
||||
load_rebalance_stop.label:
|
||||
"""停止重平衡"""
|
||||
|
||||
load_rebalance_evacuation_start.desc:
|
||||
"""启动疏散任务"""
|
||||
|
||||
load_rebalance_evacuation_start.label:
|
||||
"""启动疏散"""
|
||||
|
||||
load_rebalance_evacuation_stop.desc:
|
||||
"""停止疏散任务"""
|
||||
|
||||
load_rebalance_evacuation_stop.label:
|
||||
"""停止疏散"""
|
||||
|
||||
param_node.desc:
|
||||
"""节点名称"""
|
||||
|
||||
param_node.label:
|
||||
"""节点名称"""
|
||||
|
||||
wait_health_check.desc:
|
||||
"""启动重平衡任务前等待的时间,单位为秒"""
|
||||
|
||||
wait_health_check.label:
|
||||
"""等待健康检查"""
|
||||
|
||||
conn_evict_rate.desc:
|
||||
"""每秒迁出连接数"""
|
||||
|
||||
conn_evict_rate.label:
|
||||
"""迁出速率"""
|
||||
|
||||
sess_evict_rate.desc:
|
||||
"""每秒迁出会话数"""
|
||||
|
||||
sess_evict_rate.label:
|
||||
"""会话迁出速率"""
|
||||
|
||||
abs_conn_threshold.desc:
|
||||
"""当前节点上的连接数与迁入节点上的平均连接数的差值(绝对值)上限,低于该差值时停止迁移连接。"""
|
||||
|
||||
abs_conn_threshold.label:
|
||||
"""连接数差值"""
|
||||
|
||||
rel_conn_threshold.desc:
|
||||
"""当前节点上的连接数与迁入节点上的平均连接数的比值上限,低于该比值时停止迁移连接。"""
|
||||
|
||||
rel_conn_threshold.label:
|
||||
"""连接数比值"""
|
||||
|
||||
abs_sess_threshold.desc:
|
||||
"""当前节点上的会话数与迁入节点上的平均会话数之间的差值(绝对值)上限,低于该差值时停止迁移会话。"""
|
||||
|
||||
abs_sess_threshold.label:
|
||||
"""会话数差值"""
|
||||
|
||||
rel_sess_threshold.desc:
|
||||
"""当前节点上的会话数与迁入节点上的平均会话数的比值上限,低于该比值时停止迁移会话。"""
|
||||
|
||||
rel_sess_threshold.label:
|
||||
"""会话数比值"""
|
||||
|
||||
wait_takeover.desc:
|
||||
"""开始会话疏散任务之前的等待时间,以秒为单位"""
|
||||
|
||||
wait_takeover.label:
|
||||
"""等待接管"""
|
||||
|
||||
redirect_to.desc:
|
||||
"""将客户端重定向到的服务器参考(MQTTv5 服务器重定向)"""
|
||||
|
||||
redirect_to.label:
|
||||
"""重定向至"""
|
||||
|
||||
migrate_to.desc:
|
||||
"""接受会话迁入的节点"""
|
||||
|
||||
migrate_to.label:
|
||||
"""迁入节点"""
|
||||
|
||||
rebalance_nodes.desc:
|
||||
"""参与重平衡的节点"""
|
||||
|
||||
rebalance_nodes.label:
|
||||
"""重新平衡节点"""
|
||||
|
||||
local_status_enabled.desc:
|
||||
"""节点是否正在执行重平衡疏散任务"""
|
||||
|
||||
local_status_enabled.label:
|
||||
"""运行状态"""
|
||||
|
||||
local_status_process.desc:
|
||||
"""正在节点上执行的任务:'evacuation' 或 'rebalance'"""
|
||||
|
||||
local_status_process.label:
|
||||
"""节点任务"""
|
||||
|
||||
local_status_state.desc:
|
||||
"""正在节点上执行的任务的状态"""
|
||||
|
||||
local_status_state.label:
|
||||
"""重新平衡/疏散当前状态"""
|
||||
|
||||
local_status_coordinator_node.desc:
|
||||
"""协调分配重平衡任务的节点"""
|
||||
|
||||
local_status_coordinator_node.label:
|
||||
"""协调节点"""
|
||||
|
||||
local_status_connection_eviction_rate.desc:
|
||||
"""每秒迁出的连接数"""
|
||||
|
||||
local_status_connection_eviction_rate.label:
|
||||
"""连接迁出速率"""
|
||||
|
||||
local_status_session_eviction_rate.desc:
|
||||
"""每秒迁出的会话数"""
|
||||
|
||||
local_status_session_eviction_rate.label:
|
||||
"""会话迁出速率"""
|
||||
|
||||
local_status_connection_goal.desc:
|
||||
"""节点在重新平衡/疏散任务完成后预期拥有的连接数"""
|
||||
|
||||
local_status_connection_goal.label:
|
||||
"""连接数目标"""
|
||||
|
||||
local_status_session_goal.desc:
|
||||
"""疏散任务完成后节点预期的会话数"""
|
||||
|
||||
local_status_session_goal.label:
|
||||
"""会话数目标"""
|
||||
|
||||
local_status_disconnected_session_goal.desc:
|
||||
"""重新平衡任务完成后节点预期的无连接的会话数"""
|
||||
|
||||
local_status_disconnected_session_goal.label:
|
||||
"""预期无连接会话数"""
|
||||
|
||||
local_status_session_recipients.desc:
|
||||
"""会话被迁入的节点列表"""
|
||||
|
||||
local_status_session_recipients.label:
|
||||
"""会话迁入节点"""
|
||||
|
||||
local_status_recipients.desc:
|
||||
"""在重新平衡期间接受连接/会话迁入的节点列表"""
|
||||
|
||||
local_status_recipients.label:
|
||||
"""接受迁入节点"""
|
||||
|
||||
local_status_stats.desc:
|
||||
"""疏散/重平衡的统计"""
|
||||
|
||||
local_status_stats.label:
|
||||
"""统计数据"""
|
||||
|
||||
status_stats_initial_connected.desc:
|
||||
"""疏散/重新平衡任务开始之前节点上的连接数"""
|
||||
|
||||
status_stats_initial_connected.label:
|
||||
"""初始连接"""
|
||||
|
||||
status_stats_current_connected.desc:
|
||||
"""节点上的当前连接数"""
|
||||
|
||||
status_stats_current_connected.label:
|
||||
"""当前连接"""
|
||||
|
||||
status_stats_initial_sessions.desc:
|
||||
"""疏散/重新平衡任务开始之前节点上的会话数"""
|
||||
|
||||
status_stats_initial_sessions.label:
|
||||
"""初始会话"""
|
||||
|
||||
status_stats_current_sessions.desc:
|
||||
"""节点上的当前会话数"""
|
||||
|
||||
status_stats_current_sessions.label:
|
||||
"""当前会话"""
|
||||
|
||||
status_stats_current_disconnected_sessions.desc:
|
||||
"""节点上当前无连接的会话数"""
|
||||
|
||||
status_stats_current_disconnected_sessions.label:
|
||||
"""当前无连接会话"""
|
||||
|
||||
coordinator_status_donors.desc:
|
||||
"""正在迁出连接/会话的节点列表"""
|
||||
|
||||
coordinator_status_donors.label:
|
||||
"""迁出节点"""
|
||||
|
||||
coordinator_status_donor_conn_avg.desc:
|
||||
"""每个迁出节点的平均连接数"""
|
||||
|
||||
coordinator_status_donor_conn_avg.label:
|
||||
"""迁出节点连接平均值"""
|
||||
|
||||
coordinator_status_donor_sess_avg.desc:
|
||||
"""每个迁出节点的平均会话数"""
|
||||
|
||||
coordinator_status_donor_sess_avg.label:
|
||||
"""迁出节点会话平均数"""
|
||||
|
||||
coordinator_status_node.desc:
|
||||
"""协调分配疏散/重平衡任务的节点"""
|
||||
|
||||
coordinator_status_node.label:
|
||||
"""协调节点"""
|
||||
|
||||
evacuation_status_node.desc:
|
||||
"""正在迁出的节点"""
|
||||
|
||||
evacuation_status_node.label:
|
||||
"""疏散节点"""
|
||||
|
||||
global_status_evacuations.desc:
|
||||
"""正在迁出的节点列表"""
|
||||
|
||||
global_status_evacuations.label:
|
||||
"""疏散"""
|
||||
|
||||
global_status_rebalances.desc:
|
||||
"""协调重平衡的节点列表"""
|
||||
|
||||
global_status_rebalances.label:
|
||||
"""重平衡"""
|
||||
|
||||
empty_response.desc:
|
||||
"""响应为空"""
|
||||
|
||||
empty_response.label:
|
||||
"""空响应"""
|
||||
}
|
Loading…
Reference in New Issue