Merge branch 'release-50' into file-transfer
* release-50: chore(rebalance): fix app metadata chore(rebalance): move apps from lib-ee, add READMEs docs: refine zh tr docs: delete APL header from ee file docs: delete zh changelog chore(rebalance): review fixes chore(rebalance): rebase and review fixes feat(rebalance): port apps from 4.x
This commit is contained in:
commit
8d9b785bd7
1
Makefile
1
Makefile
|
@ -179,6 +179,7 @@ clean-all:
|
||||||
@rm -f rebar.lock
|
@rm -f rebar.lock
|
||||||
@rm -rf deps
|
@rm -rf deps
|
||||||
@rm -rf _build
|
@rm -rf _build
|
||||||
|
@rm -f emqx_dialyzer_*_plt
|
||||||
|
|
||||||
.PHONY: deps-all
|
.PHONY: deps-all
|
||||||
deps-all: $(REBAR) $(PROFILES:%=deps-%)
|
deps-all: $(REBAR) $(PROFILES:%=deps-%)
|
||||||
|
|
|
@ -14,26 +14,19 @@
|
||||||
%% limitations under the License.
|
%% limitations under the License.
|
||||||
%%--------------------------------------------------------------------
|
%%--------------------------------------------------------------------
|
||||||
|
|
||||||
|
%% This file contains common macros for testing.
|
||||||
|
%% It must not be used anywhere except in test suites.
|
||||||
|
|
||||||
-include_lib("snabbkaffe/include/snabbkaffe.hrl").
|
-include_lib("snabbkaffe/include/snabbkaffe.hrl").
|
||||||
|
|
||||||
-define(assertWaitEvent(Code, EventMatch, Timeout),
|
-define(assertWaitEvent(Code, EventMatch, Timeout),
|
||||||
?check_trace(
|
?assertMatch(
|
||||||
|
{_, {ok, EventMatch}},
|
||||||
?wait_async_action(
|
?wait_async_action(
|
||||||
Code,
|
Code,
|
||||||
EventMatch,
|
EventMatch,
|
||||||
Timeout
|
Timeout
|
||||||
),
|
)
|
||||||
fun(Trace) ->
|
|
||||||
?assert(
|
|
||||||
lists:any(
|
|
||||||
fun
|
|
||||||
(EventMatch) -> true;
|
|
||||||
(_) -> false
|
|
||||||
end,
|
|
||||||
Trace
|
|
||||||
)
|
|
||||||
)
|
|
||||||
end
|
|
||||||
)
|
)
|
||||||
).
|
).
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,42 @@
|
||||||
|
%%--------------------------------------------------------------------
|
||||||
|
%% Copyright (c) 2017-2023 EMQ Technologies Co., Ltd. All Rights Reserved.
|
||||||
|
%%
|
||||||
|
%% Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
%% you may not use this file except in compliance with the License.
|
||||||
|
%% You may obtain a copy of the License at
|
||||||
|
%%
|
||||||
|
%% http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
%%
|
||||||
|
%% Unless required by applicable law or agreed to in writing, software
|
||||||
|
%% distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
%% See the License for the specific language governing permissions and
|
||||||
|
%% limitations under the License.
|
||||||
|
%%--------------------------------------------------------------------
|
||||||
|
|
||||||
|
-define(CHANNEL_METRICS, [
|
||||||
|
recv_pkt,
|
||||||
|
recv_msg,
|
||||||
|
'recv_msg.qos0',
|
||||||
|
'recv_msg.qos1',
|
||||||
|
'recv_msg.qos2',
|
||||||
|
'recv_msg.dropped',
|
||||||
|
'recv_msg.dropped.await_pubrel_timeout',
|
||||||
|
send_pkt,
|
||||||
|
send_msg,
|
||||||
|
'send_msg.qos0',
|
||||||
|
'send_msg.qos1',
|
||||||
|
'send_msg.qos2',
|
||||||
|
'send_msg.dropped',
|
||||||
|
'send_msg.dropped.expired',
|
||||||
|
'send_msg.dropped.queue_full',
|
||||||
|
'send_msg.dropped.too_large'
|
||||||
|
]).
|
||||||
|
|
||||||
|
-define(INFO_KEYS, [
|
||||||
|
conninfo,
|
||||||
|
conn_state,
|
||||||
|
clientinfo,
|
||||||
|
session,
|
||||||
|
will_msg
|
||||||
|
]).
|
|
@ -34,6 +34,7 @@
|
||||||
-define(HP_BRIDGE, 870).
|
-define(HP_BRIDGE, 870).
|
||||||
-define(HP_DELAY_PUB, 860).
|
-define(HP_DELAY_PUB, 860).
|
||||||
%% apps that can stop the hooks chain from continuing
|
%% apps that can stop the hooks chain from continuing
|
||||||
|
-define(HP_NODE_REBALANCE, 110).
|
||||||
-define(HP_EXHOOK, 100).
|
-define(HP_EXHOOK, 100).
|
||||||
|
|
||||||
%% == Lowest Priority = 0, don't change this value as the plugins may depend on it.
|
%% == Lowest Priority = 0, don't change this value as the plugins may depend on it.
|
||||||
|
|
|
@ -14,6 +14,7 @@
|
||||||
{emqx_conf,2}.
|
{emqx_conf,2}.
|
||||||
{emqx_dashboard,1}.
|
{emqx_dashboard,1}.
|
||||||
{emqx_delayed,1}.
|
{emqx_delayed,1}.
|
||||||
|
{emqx_eviction_agent,1}.
|
||||||
{emqx_exhook,1}.
|
{emqx_exhook,1}.
|
||||||
{emqx_ft_storage_exporter_fs,1}.
|
{emqx_ft_storage_exporter_fs,1}.
|
||||||
{emqx_ft_storage_fs,1}.
|
{emqx_ft_storage_fs,1}.
|
||||||
|
@ -30,6 +31,10 @@
|
||||||
{emqx_mgmt_cluster,1}.
|
{emqx_mgmt_cluster,1}.
|
||||||
{emqx_mgmt_trace,1}.
|
{emqx_mgmt_trace,1}.
|
||||||
{emqx_mgmt_trace,2}.
|
{emqx_mgmt_trace,2}.
|
||||||
|
{emqx_node_rebalance,1}.
|
||||||
|
{emqx_node_rebalance_api,1}.
|
||||||
|
{emqx_node_rebalance_evacuation,1}.
|
||||||
|
{emqx_node_rebalance_status,1}.
|
||||||
{emqx_persistent_session,1}.
|
{emqx_persistent_session,1}.
|
||||||
{emqx_plugin_libs,1}.
|
{emqx_plugin_libs,1}.
|
||||||
{emqx_plugins,1}.
|
{emqx_plugins,1}.
|
||||||
|
|
|
@ -18,6 +18,7 @@
|
||||||
-module(emqx_channel).
|
-module(emqx_channel).
|
||||||
|
|
||||||
-include("emqx.hrl").
|
-include("emqx.hrl").
|
||||||
|
-include("emqx_channel.hrl").
|
||||||
-include("emqx_mqtt.hrl").
|
-include("emqx_mqtt.hrl").
|
||||||
-include("logger.hrl").
|
-include("logger.hrl").
|
||||||
-include("types.hrl").
|
-include("types.hrl").
|
||||||
|
@ -57,6 +58,12 @@
|
||||||
clear_keepalive/1
|
clear_keepalive/1
|
||||||
]).
|
]).
|
||||||
|
|
||||||
|
%% Export for emqx_channel implementations
|
||||||
|
-export([
|
||||||
|
maybe_nack/1,
|
||||||
|
maybe_mark_as_delivered/2
|
||||||
|
]).
|
||||||
|
|
||||||
%% Exports for CT
|
%% Exports for CT
|
||||||
-export([set_field/3]).
|
-export([set_field/3]).
|
||||||
|
|
||||||
|
@ -69,7 +76,7 @@
|
||||||
]
|
]
|
||||||
).
|
).
|
||||||
|
|
||||||
-export_type([channel/0, opts/0]).
|
-export_type([channel/0, opts/0, conn_state/0]).
|
||||||
|
|
||||||
-record(channel, {
|
-record(channel, {
|
||||||
%% MQTT ConnInfo
|
%% MQTT ConnInfo
|
||||||
|
@ -131,33 +138,6 @@
|
||||||
quota_timer => expire_quota_limit
|
quota_timer => expire_quota_limit
|
||||||
}).
|
}).
|
||||||
|
|
||||||
-define(CHANNEL_METRICS, [
|
|
||||||
recv_pkt,
|
|
||||||
recv_msg,
|
|
||||||
'recv_msg.qos0',
|
|
||||||
'recv_msg.qos1',
|
|
||||||
'recv_msg.qos2',
|
|
||||||
'recv_msg.dropped',
|
|
||||||
'recv_msg.dropped.await_pubrel_timeout',
|
|
||||||
send_pkt,
|
|
||||||
send_msg,
|
|
||||||
'send_msg.qos0',
|
|
||||||
'send_msg.qos1',
|
|
||||||
'send_msg.qos2',
|
|
||||||
'send_msg.dropped',
|
|
||||||
'send_msg.dropped.expired',
|
|
||||||
'send_msg.dropped.queue_full',
|
|
||||||
'send_msg.dropped.too_large'
|
|
||||||
]).
|
|
||||||
|
|
||||||
-define(INFO_KEYS, [
|
|
||||||
conninfo,
|
|
||||||
conn_state,
|
|
||||||
clientinfo,
|
|
||||||
session,
|
|
||||||
will_msg
|
|
||||||
]).
|
|
||||||
|
|
||||||
-define(LIMITER_ROUTING, message_routing).
|
-define(LIMITER_ROUTING, message_routing).
|
||||||
|
|
||||||
-dialyzer({no_match, [shutdown/4, ensure_timer/2, interval/2]}).
|
-dialyzer({no_match, [shutdown/4, ensure_timer/2, interval/2]}).
|
||||||
|
@ -1091,10 +1071,12 @@ handle_out(unsuback, {PacketId, _ReasonCodes}, Channel) ->
|
||||||
handle_out(disconnect, ReasonCode, Channel) when is_integer(ReasonCode) ->
|
handle_out(disconnect, ReasonCode, Channel) when is_integer(ReasonCode) ->
|
||||||
ReasonName = disconnect_reason(ReasonCode),
|
ReasonName = disconnect_reason(ReasonCode),
|
||||||
handle_out(disconnect, {ReasonCode, ReasonName}, Channel);
|
handle_out(disconnect, {ReasonCode, ReasonName}, Channel);
|
||||||
handle_out(disconnect, {ReasonCode, ReasonName}, Channel = ?IS_MQTT_V5) ->
|
handle_out(disconnect, {ReasonCode, ReasonName}, Channel) ->
|
||||||
Packet = ?DISCONNECT_PACKET(ReasonCode),
|
handle_out(disconnect, {ReasonCode, ReasonName, #{}}, Channel);
|
||||||
|
handle_out(disconnect, {ReasonCode, ReasonName, Props}, Channel = ?IS_MQTT_V5) ->
|
||||||
|
Packet = ?DISCONNECT_PACKET(ReasonCode, Props),
|
||||||
{ok, [{outgoing, Packet}, {close, ReasonName}], Channel};
|
{ok, [{outgoing, Packet}, {close, ReasonName}], Channel};
|
||||||
handle_out(disconnect, {_ReasonCode, ReasonName}, Channel) ->
|
handle_out(disconnect, {_ReasonCode, ReasonName, _Props}, Channel) ->
|
||||||
{ok, {close, ReasonName}, Channel};
|
{ok, {close, ReasonName}, Channel};
|
||||||
handle_out(auth, {ReasonCode, Properties}, Channel) ->
|
handle_out(auth, {ReasonCode, Properties}, Channel) ->
|
||||||
{ok, ?AUTH_PACKET(ReasonCode, Properties), Channel};
|
{ok, ?AUTH_PACKET(ReasonCode, Properties), Channel};
|
||||||
|
@ -1211,13 +1193,19 @@ handle_call(
|
||||||
{takeover, 'end'},
|
{takeover, 'end'},
|
||||||
Channel = #channel{
|
Channel = #channel{
|
||||||
session = Session,
|
session = Session,
|
||||||
pendings = Pendings
|
pendings = Pendings,
|
||||||
|
conninfo = #{clientid := ClientId}
|
||||||
}
|
}
|
||||||
) ->
|
) ->
|
||||||
ok = emqx_session:takeover(Session),
|
ok = emqx_session:takeover(Session),
|
||||||
%% TODO: Should not drain deliver here (side effect)
|
%% TODO: Should not drain deliver here (side effect)
|
||||||
Delivers = emqx_utils:drain_deliver(),
|
Delivers = emqx_utils:drain_deliver(),
|
||||||
AllPendings = lists:append(Delivers, Pendings),
|
AllPendings = lists:append(Delivers, Pendings),
|
||||||
|
?tp(
|
||||||
|
debug,
|
||||||
|
emqx_channel_takeover_end,
|
||||||
|
#{clientid => ClientId}
|
||||||
|
),
|
||||||
disconnect_and_shutdown(takenover, AllPendings, Channel);
|
disconnect_and_shutdown(takenover, AllPendings, Channel);
|
||||||
handle_call(list_authz_cache, Channel) ->
|
handle_call(list_authz_cache, Channel) ->
|
||||||
{reply, emqx_authz_cache:list_authz_cache(), Channel};
|
{reply, emqx_authz_cache:list_authz_cache(), Channel};
|
||||||
|
@ -1289,6 +1277,8 @@ handle_info(die_if_test = Info, Channel) ->
|
||||||
die_if_test_compiled(),
|
die_if_test_compiled(),
|
||||||
?SLOG(error, #{msg => "unexpected_info", info => Info}),
|
?SLOG(error, #{msg => "unexpected_info", info => Info}),
|
||||||
{ok, Channel};
|
{ok, Channel};
|
||||||
|
handle_info({disconnect, ReasonCode, ReasonName, Props}, Channel) ->
|
||||||
|
handle_out(disconnect, {ReasonCode, ReasonName, Props}, Channel);
|
||||||
handle_info({puback, PacketId, PubRes, RC}, Channel) ->
|
handle_info({puback, PacketId, PubRes, RC}, Channel) ->
|
||||||
do_finish_publish(PacketId, PubRes, RC, Channel);
|
do_finish_publish(PacketId, PubRes, RC, Channel);
|
||||||
handle_info(Info, Channel) ->
|
handle_info(Info, Channel) ->
|
||||||
|
|
|
@ -23,6 +23,8 @@
|
||||||
-include("logger.hrl").
|
-include("logger.hrl").
|
||||||
-include("types.hrl").
|
-include("types.hrl").
|
||||||
-include_lib("snabbkaffe/include/snabbkaffe.hrl").
|
-include_lib("snabbkaffe/include/snabbkaffe.hrl").
|
||||||
|
-include_lib("stdlib/include/qlc.hrl").
|
||||||
|
-include_lib("stdlib/include/ms_transform.hrl").
|
||||||
|
|
||||||
-export([start_link/0]).
|
-export([start_link/0]).
|
||||||
|
|
||||||
|
@ -72,6 +74,12 @@
|
||||||
get_session_confs/2
|
get_session_confs/2
|
||||||
]).
|
]).
|
||||||
|
|
||||||
|
%% Client management
|
||||||
|
-export([
|
||||||
|
channel_with_session_table/1,
|
||||||
|
live_connection_table/1
|
||||||
|
]).
|
||||||
|
|
||||||
%% gen_server callbacks
|
%% gen_server callbacks
|
||||||
-export([
|
-export([
|
||||||
init/1,
|
init/1,
|
||||||
|
@ -597,6 +605,40 @@ all_channels() ->
|
||||||
Pat = [{{'_', '$1'}, [], ['$1']}],
|
Pat = [{{'_', '$1'}, [], ['$1']}],
|
||||||
ets:select(?CHAN_TAB, Pat).
|
ets:select(?CHAN_TAB, Pat).
|
||||||
|
|
||||||
|
%% @doc Get clientinfo for all clients with sessions
|
||||||
|
channel_with_session_table(ConnModuleList) ->
|
||||||
|
Ms = ets:fun2ms(
|
||||||
|
fun({{ClientId, _ChanPid}, Info, _Stats}) ->
|
||||||
|
{ClientId, Info}
|
||||||
|
end
|
||||||
|
),
|
||||||
|
Table = ets:table(?CHAN_INFO_TAB, [{traverse, {select, Ms}}]),
|
||||||
|
ConnModules = sets:from_list(ConnModuleList, [{version, 2}]),
|
||||||
|
qlc:q([
|
||||||
|
{ClientId, ConnState, ConnInfo, ClientInfo}
|
||||||
|
|| {ClientId, #{
|
||||||
|
conn_state := ConnState,
|
||||||
|
clientinfo := ClientInfo,
|
||||||
|
conninfo := #{clean_start := false, conn_mod := ConnModule} = ConnInfo
|
||||||
|
}} <-
|
||||||
|
Table,
|
||||||
|
sets:is_element(ConnModule, ConnModules)
|
||||||
|
]).
|
||||||
|
|
||||||
|
%% @doc Get all local connection query handle
|
||||||
|
live_connection_table(ConnModules) ->
|
||||||
|
Ms = lists:map(fun live_connection_ms/1, ConnModules),
|
||||||
|
Table = ets:table(?CHAN_CONN_TAB, [{traverse, {select, Ms}}]),
|
||||||
|
qlc:q([{ClientId, ChanPid} || {ClientId, ChanPid} <- Table, is_channel_connected(ChanPid)]).
|
||||||
|
|
||||||
|
live_connection_ms(ConnModule) ->
|
||||||
|
{{{'$1', '$2'}, ConnModule}, [], [{{'$1', '$2'}}]}.
|
||||||
|
|
||||||
|
is_channel_connected(ChanPid) when node(ChanPid) =:= node() ->
|
||||||
|
ets:member(?CHAN_LIVE_TAB, ChanPid);
|
||||||
|
is_channel_connected(_ChanPid) ->
|
||||||
|
false.
|
||||||
|
|
||||||
%% @doc Get all registered clientIDs. Debug/test interface
|
%% @doc Get all registered clientIDs. Debug/test interface
|
||||||
all_client_ids() ->
|
all_client_ids() ->
|
||||||
Pat = [{{'$1', '_'}, [], ['$1']}],
|
Pat = [{{'$1', '_'}, [], ['$1']}],
|
||||||
|
@ -697,7 +739,8 @@ code_change(_OldVsn, State, _Extra) ->
|
||||||
%%--------------------------------------------------------------------
|
%%--------------------------------------------------------------------
|
||||||
|
|
||||||
clean_down({ChanPid, ClientId}) ->
|
clean_down({ChanPid, ClientId}) ->
|
||||||
do_unregister_channel({ClientId, ChanPid}).
|
do_unregister_channel({ClientId, ChanPid}),
|
||||||
|
ok = ?tp(debug, emqx_cm_clean_down, #{client_id => ClientId}).
|
||||||
|
|
||||||
stats_fun() ->
|
stats_fun() ->
|
||||||
lists:foreach(fun update_stats/1, ?CHAN_STATS).
|
lists:foreach(fun update_stats/1, ?CHAN_STATS).
|
||||||
|
@ -723,12 +766,12 @@ get_chann_conn_mod(ClientId, ChanPid) ->
|
||||||
wrap_rpc(emqx_cm_proto_v2:get_chann_conn_mod(ClientId, ChanPid)).
|
wrap_rpc(emqx_cm_proto_v2:get_chann_conn_mod(ClientId, ChanPid)).
|
||||||
|
|
||||||
mark_channel_connected(ChanPid) ->
|
mark_channel_connected(ChanPid) ->
|
||||||
?tp(emqx_cm_connected_client_count_inc, #{}),
|
?tp(emqx_cm_connected_client_count_inc, #{chan_pid => ChanPid}),
|
||||||
ets:insert_new(?CHAN_LIVE_TAB, {ChanPid, true}),
|
ets:insert_new(?CHAN_LIVE_TAB, {ChanPid, true}),
|
||||||
ok.
|
ok.
|
||||||
|
|
||||||
mark_channel_disconnected(ChanPid) ->
|
mark_channel_disconnected(ChanPid) ->
|
||||||
?tp(emqx_cm_connected_client_count_dec, #{}),
|
?tp(emqx_cm_connected_client_count_dec, #{chan_pid => ChanPid}),
|
||||||
ets:delete(?CHAN_LIVE_TAB, ChanPid),
|
ets:delete(?CHAN_LIVE_TAB, ChanPid),
|
||||||
ok.
|
ok.
|
||||||
|
|
||||||
|
|
|
@ -167,9 +167,15 @@ handle_info(Info, State) ->
|
||||||
{noreply, State}.
|
{noreply, State}.
|
||||||
|
|
||||||
terminate(_Reason, _State) ->
|
terminate(_Reason, _State) ->
|
||||||
ok = ekka:unmonitor(membership),
|
try
|
||||||
emqx_stats:cancel_update(route_stats),
|
ok = ekka:unmonitor(membership),
|
||||||
mnesia:unsubscribe({table, ?ROUTING_NODE, simple}).
|
emqx_stats:cancel_update(route_stats),
|
||||||
|
mnesia:unsubscribe({table, ?ROUTING_NODE, simple})
|
||||||
|
catch
|
||||||
|
exit:{noproc, {gen_server, call, [mria_membership, _]}} ->
|
||||||
|
?SLOG(warning, #{msg => "mria_membership_down"}),
|
||||||
|
ok
|
||||||
|
end.
|
||||||
|
|
||||||
code_change(_OldVsn, State, _Extra) ->
|
code_change(_OldVsn, State, _Extra) ->
|
||||||
{ok, State}.
|
{ok, State}.
|
||||||
|
|
|
@ -20,6 +20,7 @@
|
||||||
set_default_config/0,
|
set_default_config/0,
|
||||||
set_default_config/1,
|
set_default_config/1,
|
||||||
set_default_config/2,
|
set_default_config/2,
|
||||||
|
set_default_config/3,
|
||||||
request/2,
|
request/2,
|
||||||
request/3,
|
request/3,
|
||||||
request/4,
|
request/4,
|
||||||
|
@ -41,11 +42,14 @@ set_default_config(DefaultUsername) ->
|
||||||
set_default_config(DefaultUsername, false).
|
set_default_config(DefaultUsername, false).
|
||||||
|
|
||||||
set_default_config(DefaultUsername, HAProxyEnabled) ->
|
set_default_config(DefaultUsername, HAProxyEnabled) ->
|
||||||
|
set_default_config(DefaultUsername, HAProxyEnabled, #{}).
|
||||||
|
|
||||||
|
set_default_config(DefaultUsername, HAProxyEnabled, Opts) ->
|
||||||
Config = #{
|
Config = #{
|
||||||
listeners => #{
|
listeners => #{
|
||||||
http => #{
|
http => #{
|
||||||
enable => true,
|
enable => true,
|
||||||
bind => 18083,
|
bind => maps:get(bind, Opts, 18083),
|
||||||
inet6 => false,
|
inet6 => false,
|
||||||
ipv6_v6only => false,
|
ipv6_v6only => false,
|
||||||
max_connections => 512,
|
max_connections => 512,
|
||||||
|
|
|
@ -0,0 +1,94 @@
|
||||||
|
Business Source License 1.1
|
||||||
|
|
||||||
|
Licensor: Hangzhou EMQ Technologies Co., Ltd.
|
||||||
|
Licensed Work: EMQX Enterprise Edition
|
||||||
|
The Licensed Work is (c) 2023
|
||||||
|
Hangzhou EMQ Technologies Co., Ltd.
|
||||||
|
Additional Use Grant: Students and educators are granted right to copy,
|
||||||
|
modify, and create derivative work for research
|
||||||
|
or education.
|
||||||
|
Change Date: 2027-02-01
|
||||||
|
Change License: Apache License, Version 2.0
|
||||||
|
|
||||||
|
For information about alternative licensing arrangements for the Software,
|
||||||
|
please contact Licensor: https://www.emqx.com/en/contact
|
||||||
|
|
||||||
|
Notice
|
||||||
|
|
||||||
|
The Business Source License (this document, or the “License”) is not an Open
|
||||||
|
Source license. However, the Licensed Work will eventually be made available
|
||||||
|
under an Open Source License, as stated in this License.
|
||||||
|
|
||||||
|
License text copyright (c) 2017 MariaDB Corporation Ab, All Rights Reserved.
|
||||||
|
“Business Source License” is a trademark of MariaDB Corporation Ab.
|
||||||
|
|
||||||
|
-----------------------------------------------------------------------------
|
||||||
|
|
||||||
|
Business Source License 1.1
|
||||||
|
|
||||||
|
Terms
|
||||||
|
|
||||||
|
The Licensor hereby grants you the right to copy, modify, create derivative
|
||||||
|
works, redistribute, and make non-production use of the Licensed Work. The
|
||||||
|
Licensor may make an Additional Use Grant, above, permitting limited
|
||||||
|
production use.
|
||||||
|
|
||||||
|
Effective on the Change Date, or the fourth anniversary of the first publicly
|
||||||
|
available distribution of a specific version of the Licensed Work under this
|
||||||
|
License, whichever comes first, the Licensor hereby grants you rights under
|
||||||
|
the terms of the Change License, and the rights granted in the paragraph
|
||||||
|
above terminate.
|
||||||
|
|
||||||
|
If your use of the Licensed Work does not comply with the requirements
|
||||||
|
currently in effect as described in this License, you must purchase a
|
||||||
|
commercial license from the Licensor, its affiliated entities, or authorized
|
||||||
|
resellers, or you must refrain from using the Licensed Work.
|
||||||
|
|
||||||
|
All copies of the original and modified Licensed Work, and derivative works
|
||||||
|
of the Licensed Work, are subject to this License. This License applies
|
||||||
|
separately for each version of the Licensed Work and the Change Date may vary
|
||||||
|
for each version of the Licensed Work released by Licensor.
|
||||||
|
|
||||||
|
You must conspicuously display this License on each original or modified copy
|
||||||
|
of the Licensed Work. If you receive the Licensed Work in original or
|
||||||
|
modified form from a third party, the terms and conditions set forth in this
|
||||||
|
License apply to your use of that work.
|
||||||
|
|
||||||
|
Any use of the Licensed Work in violation of this License will automatically
|
||||||
|
terminate your rights under this License for the current and all other
|
||||||
|
versions of the Licensed Work.
|
||||||
|
|
||||||
|
This License does not grant you any right in any trademark or logo of
|
||||||
|
Licensor or its affiliates (provided that you may use a trademark or logo of
|
||||||
|
Licensor as expressly required by this License).
|
||||||
|
|
||||||
|
TO THE EXTENT PERMITTED BY APPLICABLE LAW, THE LICENSED WORK IS PROVIDED ON
|
||||||
|
AN “AS IS” BASIS. LICENSOR HEREBY DISCLAIMS ALL WARRANTIES AND CONDITIONS,
|
||||||
|
EXPRESS OR IMPLIED, INCLUDING (WITHOUT LIMITATION) WARRANTIES OF
|
||||||
|
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NON-INFRINGEMENT, AND
|
||||||
|
TITLE.
|
||||||
|
|
||||||
|
MariaDB hereby grants you permission to use this License’s text to license
|
||||||
|
your works, and to refer to it using the trademark “Business Source License”,
|
||||||
|
as long as you comply with the Covenants of Licensor below.
|
||||||
|
|
||||||
|
Covenants of Licensor
|
||||||
|
|
||||||
|
In consideration of the right to use this License’s text and the “Business
|
||||||
|
Source License” name and trademark, Licensor covenants to MariaDB, and to all
|
||||||
|
other recipients of the licensed work to be provided by Licensor:
|
||||||
|
|
||||||
|
1. To specify as the Change License the GPL Version 2.0 or any later version,
|
||||||
|
or a license that is compatible with GPL Version 2.0 or a later version,
|
||||||
|
where “compatible” means that software provided under the Change License can
|
||||||
|
be included in a program with software provided under GPL Version 2.0 or a
|
||||||
|
later version. Licensor may specify additional Change Licenses without
|
||||||
|
limitation.
|
||||||
|
|
||||||
|
2. To either: (a) specify an additional grant of rights to use that does not
|
||||||
|
impose any additional restriction on the right granted in this License, as
|
||||||
|
the Additional Use Grant; or (b) insert the text “None”.
|
||||||
|
|
||||||
|
3. To specify a Change Date.
|
||||||
|
|
||||||
|
4. Not to modify this License in any other way.
|
|
@ -0,0 +1,35 @@
|
||||||
|
# EMQX Eviction Agent
|
||||||
|
|
||||||
|
`emqx_eviction_agent` is a part of the node evacuation/node rebalance feature in EMQX.
|
||||||
|
It is a low-level application that encapsulates working with actual MQTT connections.
|
||||||
|
|
||||||
|
## Application Responsibilities
|
||||||
|
|
||||||
|
`emqx_eviction_agent` application:
|
||||||
|
|
||||||
|
* Blocks incoming connection to the node it is running on.
|
||||||
|
* Serves as a facade for connection/session eviction operations.
|
||||||
|
* Reports blocking status via HTTP API.
|
||||||
|
|
||||||
|
The `emqx_eviction_agent` is relatively passive and has no eviction/rebalancing logic. It allows
|
||||||
|
`emqx_node_rebalance` to perform eviction/rebalancing operations using high-level API, without having to deal with
|
||||||
|
MQTT connections directly.
|
||||||
|
|
||||||
|
## EMQX Integration
|
||||||
|
|
||||||
|
`emqx_eviction_agent` interacts with the following EMQX components:
|
||||||
|
* `emqx_cm` - to get the list of active MQTT connections;
|
||||||
|
* `emqx_hooks` subsystem - to block/unblock incoming connections;
|
||||||
|
* `emqx_channel` and the corresponding connection modules to perform the eviction.
|
||||||
|
|
||||||
|
## User Facing API
|
||||||
|
|
||||||
|
The application provided a very simple API (CLI and HTTP) to inspect the current blocking status.
|
||||||
|
|
||||||
|
# Documentation
|
||||||
|
|
||||||
|
The rebalancing concept is described in the corresponding [EIP](https://github.com/emqx/eip/blob/main/active/0020-node-rebalance.md).
|
||||||
|
|
||||||
|
# Contributing
|
||||||
|
|
||||||
|
Please see our [contributing.md](../../CONTRIBUTING.md).
|
|
@ -0,0 +1,3 @@
|
||||||
|
##--------------------------------------------------------------------
|
||||||
|
## EMQX Eviction Agent Plugin
|
||||||
|
##--------------------------------------------------------------------
|
|
@ -0,0 +1,2 @@
|
||||||
|
{deps, [{emqx, {path, "../../apps/emqx"}}]}.
|
||||||
|
{project_plugins, [erlfmt]}.
|
|
@ -0,0 +1,21 @@
|
||||||
|
{application, emqx_eviction_agent, [
|
||||||
|
{description, "EMQX Eviction Agent"},
|
||||||
|
{vsn, "5.0.0"},
|
||||||
|
{registered, [
|
||||||
|
emqx_eviction_agent_sup,
|
||||||
|
emqx_eviction_agent,
|
||||||
|
emqx_eviction_agent_conn_sup
|
||||||
|
]},
|
||||||
|
{applications, [
|
||||||
|
kernel,
|
||||||
|
stdlib,
|
||||||
|
emqx_ctl
|
||||||
|
]},
|
||||||
|
{mod, {emqx_eviction_agent_app, []}},
|
||||||
|
{env, []},
|
||||||
|
{modules, []},
|
||||||
|
{links, [
|
||||||
|
{"Homepage", "https://www.emqx.com/"},
|
||||||
|
{"Github", "https://github.com/emqx"}
|
||||||
|
]}
|
||||||
|
]}.
|
|
@ -0,0 +1,3 @@
|
||||||
|
%% -*- mode: erlang -*-
|
||||||
|
%% Unless you know what you are doing, DO NOT edit manually!!
|
||||||
|
{VSN, [{<<".*">>, []}], [{<<".*">>, []}]}.
|
|
@ -0,0 +1,348 @@
|
||||||
|
%%--------------------------------------------------------------------
|
||||||
|
%% Copyright (c) 2022-2023 EMQ Technologies Co., Ltd. All Rights Reserved.
|
||||||
|
%%--------------------------------------------------------------------
|
||||||
|
|
||||||
|
-module(emqx_eviction_agent).
|
||||||
|
|
||||||
|
-include_lib("emqx/include/emqx_mqtt.hrl").
|
||||||
|
-include_lib("emqx/include/logger.hrl").
|
||||||
|
-include_lib("emqx/include/types.hrl").
|
||||||
|
-include_lib("emqx/include/emqx_hooks.hrl").
|
||||||
|
|
||||||
|
-include_lib("stdlib/include/qlc.hrl").
|
||||||
|
-include_lib("snabbkaffe/include/snabbkaffe.hrl").
|
||||||
|
|
||||||
|
-export([
|
||||||
|
start_link/0,
|
||||||
|
enable/2,
|
||||||
|
disable/1,
|
||||||
|
status/0,
|
||||||
|
connection_count/0,
|
||||||
|
session_count/0,
|
||||||
|
session_count/1,
|
||||||
|
evict_connections/1,
|
||||||
|
evict_sessions/2,
|
||||||
|
evict_sessions/3,
|
||||||
|
evict_session_channel/3
|
||||||
|
]).
|
||||||
|
|
||||||
|
-behaviour(gen_server).
|
||||||
|
|
||||||
|
-export([
|
||||||
|
init/1,
|
||||||
|
handle_call/3,
|
||||||
|
handle_info/2,
|
||||||
|
handle_cast/2,
|
||||||
|
code_change/3
|
||||||
|
]).
|
||||||
|
|
||||||
|
-export([
|
||||||
|
on_connect/2,
|
||||||
|
on_connack/3
|
||||||
|
]).
|
||||||
|
|
||||||
|
-export([
|
||||||
|
hook/0,
|
||||||
|
unhook/0
|
||||||
|
]).
|
||||||
|
|
||||||
|
-export_type([server_reference/0]).
|
||||||
|
|
||||||
|
-define(CONN_MODULES, [
|
||||||
|
emqx_connection, emqx_ws_connection, emqx_quic_connection, emqx_eviction_agent_channel
|
||||||
|
]).
|
||||||
|
|
||||||
|
%%--------------------------------------------------------------------
|
||||||
|
%% APIs
|
||||||
|
%%--------------------------------------------------------------------
|
||||||
|
|
||||||
|
-type server_reference() :: binary() | undefined.
|
||||||
|
-type status() :: {enabled, conn_stats()} | disabled.
|
||||||
|
-type conn_stats() :: #{
|
||||||
|
connections := non_neg_integer(),
|
||||||
|
sessions := non_neg_integer()
|
||||||
|
}.
|
||||||
|
-type kind() :: atom().
|
||||||
|
|
||||||
|
-spec start_link() -> startlink_ret().
|
||||||
|
start_link() ->
|
||||||
|
gen_server:start_link({local, ?MODULE}, ?MODULE, [], []).
|
||||||
|
|
||||||
|
-spec enable(kind(), server_reference()) -> ok_or_error(eviction_agent_busy).
|
||||||
|
enable(Kind, ServerReference) ->
|
||||||
|
gen_server:call(?MODULE, {enable, Kind, ServerReference}).
|
||||||
|
|
||||||
|
-spec disable(kind()) -> ok.
|
||||||
|
disable(Kind) ->
|
||||||
|
gen_server:call(?MODULE, {disable, Kind}).
|
||||||
|
|
||||||
|
-spec status() -> status().
|
||||||
|
status() ->
|
||||||
|
case enable_status() of
|
||||||
|
{enabled, _Kind, _ServerReference} ->
|
||||||
|
{enabled, stats()};
|
||||||
|
disabled ->
|
||||||
|
disabled
|
||||||
|
end.
|
||||||
|
|
||||||
|
-spec evict_connections(pos_integer()) -> ok_or_error(disabled).
|
||||||
|
evict_connections(N) ->
|
||||||
|
case enable_status() of
|
||||||
|
{enabled, _Kind, ServerReference} ->
|
||||||
|
ok = do_evict_connections(N, ServerReference);
|
||||||
|
disabled ->
|
||||||
|
{error, disabled}
|
||||||
|
end.
|
||||||
|
|
||||||
|
-spec evict_sessions(pos_integer(), node() | [node()]) -> ok_or_error(disabled).
|
||||||
|
evict_sessions(N, Node) when is_atom(Node) ->
|
||||||
|
evict_sessions(N, [Node]);
|
||||||
|
evict_sessions(N, Nodes) when is_list(Nodes) andalso length(Nodes) > 0 ->
|
||||||
|
evict_sessions(N, Nodes, any).
|
||||||
|
|
||||||
|
-spec evict_sessions(pos_integer(), node() | [node()], atom()) -> ok_or_error(disabled).
|
||||||
|
evict_sessions(N, Node, ConnState) when is_atom(Node) ->
|
||||||
|
evict_sessions(N, [Node], ConnState);
|
||||||
|
evict_sessions(N, Nodes, ConnState) when
|
||||||
|
is_list(Nodes) andalso length(Nodes) > 0
|
||||||
|
->
|
||||||
|
case enable_status() of
|
||||||
|
{enabled, _Kind, _ServerReference} ->
|
||||||
|
ok = do_evict_sessions(N, Nodes, ConnState);
|
||||||
|
disabled ->
|
||||||
|
{error, disabled}
|
||||||
|
end.
|
||||||
|
|
||||||
|
%%--------------------------------------------------------------------
|
||||||
|
%% gen_server callbacks
|
||||||
|
%%--------------------------------------------------------------------
|
||||||
|
|
||||||
|
init([]) ->
|
||||||
|
_ = persistent_term:erase(?MODULE),
|
||||||
|
{ok, #{}}.
|
||||||
|
|
||||||
|
%% enable
|
||||||
|
handle_call({enable, Kind, ServerReference}, _From, St) ->
|
||||||
|
Reply =
|
||||||
|
case enable_status() of
|
||||||
|
disabled ->
|
||||||
|
ok = persistent_term:put(?MODULE, {enabled, Kind, ServerReference});
|
||||||
|
{enabled, Kind, _ServerReference} ->
|
||||||
|
ok = persistent_term:put(?MODULE, {enabled, Kind, ServerReference});
|
||||||
|
{enabled, _OtherKind, _ServerReference} ->
|
||||||
|
{error, eviction_agent_busy}
|
||||||
|
end,
|
||||||
|
{reply, Reply, St};
|
||||||
|
%% disable
|
||||||
|
handle_call({disable, Kind}, _From, St) ->
|
||||||
|
Reply =
|
||||||
|
case enable_status() of
|
||||||
|
disabled ->
|
||||||
|
{error, disabled};
|
||||||
|
{enabled, Kind, _ServerReference} ->
|
||||||
|
_ = persistent_term:erase(?MODULE),
|
||||||
|
ok;
|
||||||
|
{enabled, _OtherKind, _ServerReference} ->
|
||||||
|
{error, eviction_agent_busy}
|
||||||
|
end,
|
||||||
|
{reply, Reply, St};
|
||||||
|
handle_call(Msg, _From, St) ->
|
||||||
|
?SLOG(warning, #{msg => "unknown_call", call => Msg, state => St}),
|
||||||
|
{reply, {error, unknown_call}, St}.
|
||||||
|
|
||||||
|
handle_info(Msg, St) ->
|
||||||
|
?SLOG(warning, #{msg => "unknown_msg", info => Msg, state => St}),
|
||||||
|
{noreply, St}.
|
||||||
|
|
||||||
|
handle_cast(Msg, St) ->
|
||||||
|
?SLOG(warning, #{msg => "unknown_cast", cast => Msg, state => St}),
|
||||||
|
{noreply, St}.
|
||||||
|
|
||||||
|
code_change(_Vsn, State, _Extra) ->
|
||||||
|
{ok, State}.
|
||||||
|
|
||||||
|
%%--------------------------------------------------------------------
|
||||||
|
%% Hook callbacks
|
||||||
|
%%--------------------------------------------------------------------
|
||||||
|
|
||||||
|
on_connect(_ConnInfo, _Props) ->
|
||||||
|
case enable_status() of
|
||||||
|
{enabled, _Kind, _ServerReference} ->
|
||||||
|
{stop, {error, ?RC_USE_ANOTHER_SERVER}};
|
||||||
|
disabled ->
|
||||||
|
ignore
|
||||||
|
end.
|
||||||
|
|
||||||
|
on_connack(
|
||||||
|
#{proto_name := <<"MQTT">>, proto_ver := ?MQTT_PROTO_V5},
|
||||||
|
use_another_server,
|
||||||
|
Props
|
||||||
|
) ->
|
||||||
|
case enable_status() of
|
||||||
|
{enabled, _Kind, ServerReference} ->
|
||||||
|
{ok, Props#{'Server-Reference' => ServerReference}};
|
||||||
|
disabled ->
|
||||||
|
{ok, Props}
|
||||||
|
end;
|
||||||
|
on_connack(_ClientInfo, _Reason, Props) ->
|
||||||
|
{ok, Props}.
|
||||||
|
|
||||||
|
%%--------------------------------------------------------------------
|
||||||
|
%% Hook funcs
|
||||||
|
%%--------------------------------------------------------------------
|
||||||
|
|
||||||
|
hook() ->
|
||||||
|
?tp(debug, eviction_agent_hook, #{}),
|
||||||
|
ok = emqx_hooks:put('client.connack', {?MODULE, on_connack, []}, ?HP_NODE_REBALANCE),
|
||||||
|
ok = emqx_hooks:put('client.connect', {?MODULE, on_connect, []}, ?HP_NODE_REBALANCE).
|
||||||
|
|
||||||
|
unhook() ->
|
||||||
|
?tp(debug, eviction_agent_unhook, #{}),
|
||||||
|
ok = emqx_hooks:del('client.connect', {?MODULE, on_connect}),
|
||||||
|
ok = emqx_hooks:del('client.connack', {?MODULE, on_connack}).
|
||||||
|
|
||||||
|
enable_status() ->
|
||||||
|
persistent_term:get(?MODULE, disabled).
|
||||||
|
|
||||||
|
% connection management
|
||||||
|
stats() ->
|
||||||
|
#{
|
||||||
|
connections => connection_count(),
|
||||||
|
sessions => session_count()
|
||||||
|
}.
|
||||||
|
|
||||||
|
connection_table() ->
|
||||||
|
emqx_cm:live_connection_table(?CONN_MODULES).
|
||||||
|
|
||||||
|
connection_count() ->
|
||||||
|
table_count(connection_table()).
|
||||||
|
|
||||||
|
channel_with_session_table(any) ->
|
||||||
|
qlc:q([
|
||||||
|
{ClientId, ConnInfo, ClientInfo}
|
||||||
|
|| {ClientId, _, ConnInfo, ClientInfo} <-
|
||||||
|
emqx_cm:channel_with_session_table(?CONN_MODULES)
|
||||||
|
]);
|
||||||
|
channel_with_session_table(RequiredConnState) ->
|
||||||
|
qlc:q([
|
||||||
|
{ClientId, ConnInfo, ClientInfo}
|
||||||
|
|| {ClientId, ConnState, ConnInfo, ClientInfo} <-
|
||||||
|
emqx_cm:channel_with_session_table(?CONN_MODULES),
|
||||||
|
RequiredConnState =:= ConnState
|
||||||
|
]).
|
||||||
|
|
||||||
|
session_count() ->
|
||||||
|
session_count(any).
|
||||||
|
|
||||||
|
session_count(ConnState) ->
|
||||||
|
table_count(channel_with_session_table(ConnState)).
|
||||||
|
|
||||||
|
table_count(QH) ->
|
||||||
|
qlc:fold(fun(_, Acc) -> Acc + 1 end, 0, QH).
|
||||||
|
|
||||||
|
take_connections(N) ->
|
||||||
|
ChanQH = qlc:q([ChanPid || {_ClientId, ChanPid} <- connection_table()]),
|
||||||
|
ChanPidCursor = qlc:cursor(ChanQH),
|
||||||
|
ChanPids = qlc:next_answers(ChanPidCursor, N),
|
||||||
|
ok = qlc:delete_cursor(ChanPidCursor),
|
||||||
|
ChanPids.
|
||||||
|
|
||||||
|
take_channel_with_sessions(N, ConnState) ->
|
||||||
|
ChanPidCursor = qlc:cursor(channel_with_session_table(ConnState)),
|
||||||
|
Channels = qlc:next_answers(ChanPidCursor, N),
|
||||||
|
ok = qlc:delete_cursor(ChanPidCursor),
|
||||||
|
Channels.
|
||||||
|
|
||||||
|
do_evict_connections(N, ServerReference) when N > 0 ->
|
||||||
|
ChanPids = take_connections(N),
|
||||||
|
ok = lists:foreach(
|
||||||
|
fun(ChanPid) ->
|
||||||
|
disconnect_channel(ChanPid, ServerReference)
|
||||||
|
end,
|
||||||
|
ChanPids
|
||||||
|
).
|
||||||
|
|
||||||
|
do_evict_sessions(N, Nodes, ConnState) when N > 0 ->
|
||||||
|
Channels = take_channel_with_sessions(N, ConnState),
|
||||||
|
ok = lists:foreach(
|
||||||
|
fun({ClientId, ConnInfo, ClientInfo}) ->
|
||||||
|
evict_session_channel(Nodes, ClientId, ConnInfo, ClientInfo)
|
||||||
|
end,
|
||||||
|
Channels
|
||||||
|
).
|
||||||
|
|
||||||
|
evict_session_channel(Nodes, ClientId, ConnInfo, ClientInfo) ->
|
||||||
|
Node = select_random(Nodes),
|
||||||
|
?SLOG(
|
||||||
|
info,
|
||||||
|
#{
|
||||||
|
msg => "evict_session_channel",
|
||||||
|
client_id => ClientId,
|
||||||
|
node => Node,
|
||||||
|
conn_info => ConnInfo,
|
||||||
|
client_info => ClientInfo
|
||||||
|
}
|
||||||
|
),
|
||||||
|
case emqx_eviction_agent_proto_v1:evict_session_channel(Node, ClientId, ConnInfo, ClientInfo) of
|
||||||
|
{badrpc, Reason} ->
|
||||||
|
?SLOG(
|
||||||
|
error,
|
||||||
|
#{
|
||||||
|
msg => "evict_session_channel_rpc_error",
|
||||||
|
client_id => ClientId,
|
||||||
|
node => Node,
|
||||||
|
reason => Reason
|
||||||
|
}
|
||||||
|
),
|
||||||
|
{error, Reason};
|
||||||
|
{error, Reason} = Error ->
|
||||||
|
?SLOG(
|
||||||
|
error,
|
||||||
|
#{
|
||||||
|
msg => "evict_session_channel_error",
|
||||||
|
client_id => ClientId,
|
||||||
|
node => Node,
|
||||||
|
reason => Reason
|
||||||
|
}
|
||||||
|
),
|
||||||
|
Error;
|
||||||
|
Res ->
|
||||||
|
Res
|
||||||
|
end.
|
||||||
|
|
||||||
|
-spec evict_session_channel(
|
||||||
|
emqx_types:clientid(),
|
||||||
|
emqx_types:conninfo(),
|
||||||
|
emqx_types:clientinfo()
|
||||||
|
) -> supervisor:startchild_ret().
|
||||||
|
evict_session_channel(ClientId, ConnInfo, ClientInfo) ->
|
||||||
|
?SLOG(info, #{
|
||||||
|
msg => "evict_session_channel",
|
||||||
|
client_id => ClientId,
|
||||||
|
conn_info => ConnInfo,
|
||||||
|
client_info => ClientInfo
|
||||||
|
}),
|
||||||
|
Result = emqx_eviction_agent_channel:start_supervised(
|
||||||
|
#{
|
||||||
|
conninfo => ConnInfo,
|
||||||
|
clientinfo => ClientInfo
|
||||||
|
}
|
||||||
|
),
|
||||||
|
?SLOG(
|
||||||
|
info,
|
||||||
|
#{
|
||||||
|
msg => "evict_session_channel_result",
|
||||||
|
client_id => ClientId,
|
||||||
|
result => Result
|
||||||
|
}
|
||||||
|
),
|
||||||
|
Result.
|
||||||
|
|
||||||
|
disconnect_channel(ChanPid, ServerReference) ->
|
||||||
|
ChanPid !
|
||||||
|
{disconnect, ?RC_USE_ANOTHER_SERVER, use_another_server, #{
|
||||||
|
'Server-Reference' => ServerReference
|
||||||
|
}}.
|
||||||
|
|
||||||
|
select_random(List) when length(List) > 0 ->
|
||||||
|
lists:nth(rand:uniform(length(List)), List).
|
|
@ -0,0 +1,85 @@
|
||||||
|
%%--------------------------------------------------------------------
|
||||||
|
%% Copyright (c) 2022-2023 EMQ Technologies Co., Ltd. All Rights Reserved.
|
||||||
|
%%--------------------------------------------------------------------
|
||||||
|
|
||||||
|
-module(emqx_eviction_agent_api).
|
||||||
|
|
||||||
|
-behaviour(minirest_api).
|
||||||
|
|
||||||
|
-include_lib("typerefl/include/types.hrl").
|
||||||
|
-include_lib("hocon/include/hoconsc.hrl").
|
||||||
|
-include_lib("emqx/include/logger.hrl").
|
||||||
|
|
||||||
|
%% Swagger specs from hocon schema
|
||||||
|
-export([
|
||||||
|
api_spec/0,
|
||||||
|
paths/0,
|
||||||
|
schema/1,
|
||||||
|
namespace/0
|
||||||
|
]).
|
||||||
|
|
||||||
|
-export([
|
||||||
|
fields/1,
|
||||||
|
roots/0
|
||||||
|
]).
|
||||||
|
|
||||||
|
%% API callbacks
|
||||||
|
-export([
|
||||||
|
'/node_eviction/status'/2
|
||||||
|
]).
|
||||||
|
|
||||||
|
-import(hoconsc, [mk/2, ref/1, ref/2]).
|
||||||
|
|
||||||
|
namespace() -> "node_eviction".
|
||||||
|
|
||||||
|
api_spec() ->
|
||||||
|
emqx_dashboard_swagger:spec(?MODULE, #{check_schema => true}).
|
||||||
|
|
||||||
|
paths() ->
|
||||||
|
[
|
||||||
|
"/node_eviction/status"
|
||||||
|
].
|
||||||
|
|
||||||
|
schema("/node_eviction/status") ->
|
||||||
|
#{
|
||||||
|
'operationId' => '/node_eviction/status',
|
||||||
|
get => #{
|
||||||
|
tags => [<<"node_eviction">>],
|
||||||
|
summary => <<"Get node eviction status">>,
|
||||||
|
description => ?DESC("node_eviction_status_get"),
|
||||||
|
responses => #{
|
||||||
|
200 => schema_status()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}.
|
||||||
|
|
||||||
|
'/node_eviction/status'(_Bindings, _Params) ->
|
||||||
|
case emqx_eviction_agent:status() of
|
||||||
|
disabled ->
|
||||||
|
{200, #{status => disabled}};
|
||||||
|
{enabled, Stats} ->
|
||||||
|
{200, #{
|
||||||
|
status => enabled,
|
||||||
|
stats => Stats
|
||||||
|
}}
|
||||||
|
end.
|
||||||
|
|
||||||
|
schema_status() ->
|
||||||
|
mk(hoconsc:union([ref(status_enabled), ref(status_disabled)]), #{}).
|
||||||
|
|
||||||
|
roots() -> [].
|
||||||
|
|
||||||
|
fields(status_enabled) ->
|
||||||
|
[
|
||||||
|
{status, mk(enabled, #{default => enabled})},
|
||||||
|
{stats, ref(stats)}
|
||||||
|
];
|
||||||
|
fields(stats) ->
|
||||||
|
[
|
||||||
|
{connections, mk(integer(), #{})},
|
||||||
|
{sessions, mk(integer(), #{})}
|
||||||
|
];
|
||||||
|
fields(status_disabled) ->
|
||||||
|
[
|
||||||
|
{status, mk(disabled, #{default => disabled})}
|
||||||
|
].
|
|
@ -0,0 +1,22 @@
|
||||||
|
%%--------------------------------------------------------------------
|
||||||
|
%% Copyright (c) 2022-2023 EMQ Technologies Co., Ltd. All Rights Reserved.
|
||||||
|
%%--------------------------------------------------------------------
|
||||||
|
|
||||||
|
-module(emqx_eviction_agent_app).
|
||||||
|
|
||||||
|
-behaviour(application).
|
||||||
|
|
||||||
|
-export([
|
||||||
|
start/2,
|
||||||
|
stop/1
|
||||||
|
]).
|
||||||
|
|
||||||
|
start(_Type, _Args) ->
|
||||||
|
ok = emqx_eviction_agent:hook(),
|
||||||
|
{ok, Sup} = emqx_eviction_agent_sup:start_link(),
|
||||||
|
ok = emqx_eviction_agent_cli:load(),
|
||||||
|
{ok, Sup}.
|
||||||
|
|
||||||
|
stop(_State) ->
|
||||||
|
ok = emqx_eviction_agent:unhook(),
|
||||||
|
ok = emqx_eviction_agent_cli:unload().
|
|
@ -0,0 +1,358 @@
|
||||||
|
%%--------------------------------------------------------------------
|
||||||
|
%% Copyright (c) 2022-2023 EMQ Technologies Co., Ltd. All Rights Reserved.
|
||||||
|
%%--------------------------------------------------------------------
|
||||||
|
|
||||||
|
%% MQTT Channel
|
||||||
|
-module(emqx_eviction_agent_channel).
|
||||||
|
|
||||||
|
-include_lib("emqx/include/emqx.hrl").
|
||||||
|
-include_lib("emqx/include/emqx_channel.hrl").
|
||||||
|
-include_lib("emqx/include/emqx_mqtt.hrl").
|
||||||
|
-include_lib("emqx/include/logger.hrl").
|
||||||
|
-include_lib("emqx/include/types.hrl").
|
||||||
|
|
||||||
|
-include_lib("snabbkaffe/include/snabbkaffe.hrl").
|
||||||
|
|
||||||
|
-export([
|
||||||
|
start_link/1,
|
||||||
|
start_supervised/1,
|
||||||
|
call/2,
|
||||||
|
call/3,
|
||||||
|
cast/2,
|
||||||
|
stop/1
|
||||||
|
]).
|
||||||
|
|
||||||
|
-export([
|
||||||
|
init/1,
|
||||||
|
handle_call/3,
|
||||||
|
handle_cast/2,
|
||||||
|
handle_info/2,
|
||||||
|
terminate/2,
|
||||||
|
code_change/3
|
||||||
|
]).
|
||||||
|
|
||||||
|
-type opts() :: #{
|
||||||
|
conninfo := emqx_types:conninfo(),
|
||||||
|
clientinfo := emqx_types:clientinfo()
|
||||||
|
}.
|
||||||
|
|
||||||
|
%%--------------------------------------------------------------------
|
||||||
|
%% API
|
||||||
|
%%--------------------------------------------------------------------
|
||||||
|
|
||||||
|
-spec start_supervised(opts()) -> supervisor:startchild_ret().
|
||||||
|
start_supervised(#{clientinfo := #{clientid := ClientId}} = Opts) ->
|
||||||
|
RandomId = integer_to_binary(erlang:unique_integer([positive])),
|
||||||
|
ClientIdBin = bin_clientid(ClientId),
|
||||||
|
Id = <<ClientIdBin/binary, "-", RandomId/binary>>,
|
||||||
|
ChildSpec = #{
|
||||||
|
id => Id,
|
||||||
|
start => {?MODULE, start_link, [Opts]},
|
||||||
|
restart => temporary,
|
||||||
|
shutdown => 5000,
|
||||||
|
type => worker,
|
||||||
|
modules => [?MODULE]
|
||||||
|
},
|
||||||
|
supervisor:start_child(
|
||||||
|
emqx_eviction_agent_conn_sup,
|
||||||
|
ChildSpec
|
||||||
|
).
|
||||||
|
|
||||||
|
-spec start_link(opts()) -> startlink_ret().
|
||||||
|
start_link(Opts) ->
|
||||||
|
gen_server:start_link(?MODULE, [Opts], []).
|
||||||
|
|
||||||
|
-spec cast(pid(), term()) -> ok.
|
||||||
|
cast(Pid, Req) ->
|
||||||
|
gen_server:cast(Pid, Req).
|
||||||
|
|
||||||
|
-spec call(pid(), term()) -> term().
|
||||||
|
call(Pid, Req) ->
|
||||||
|
call(Pid, Req, infinity).
|
||||||
|
|
||||||
|
-spec call(pid(), term(), timeout()) -> term().
|
||||||
|
call(Pid, Req, Timeout) ->
|
||||||
|
gen_server:call(Pid, Req, Timeout).
|
||||||
|
|
||||||
|
-spec stop(pid()) -> ok.
|
||||||
|
stop(Pid) ->
|
||||||
|
gen_server:stop(Pid).
|
||||||
|
|
||||||
|
%%--------------------------------------------------------------------
|
||||||
|
%% gen_server API
|
||||||
|
%%--------------------------------------------------------------------
|
||||||
|
|
||||||
|
init([#{conninfo := OldConnInfo, clientinfo := #{clientid := ClientId} = OldClientInfo}]) ->
|
||||||
|
process_flag(trap_exit, true),
|
||||||
|
ClientInfo = clientinfo(OldClientInfo),
|
||||||
|
ConnInfo = conninfo(OldConnInfo),
|
||||||
|
case open_session(ConnInfo, ClientInfo) of
|
||||||
|
{ok, Channel0} ->
|
||||||
|
case set_expiry_timer(Channel0) of
|
||||||
|
{ok, Channel1} ->
|
||||||
|
?SLOG(
|
||||||
|
info,
|
||||||
|
#{
|
||||||
|
msg => "channel_initialized",
|
||||||
|
clientid => ClientId,
|
||||||
|
node => node()
|
||||||
|
}
|
||||||
|
),
|
||||||
|
ok = emqx_cm:mark_channel_disconnected(self()),
|
||||||
|
{ok, Channel1, hibernate};
|
||||||
|
{error, Reason} ->
|
||||||
|
{stop, Reason}
|
||||||
|
end;
|
||||||
|
{error, Reason} ->
|
||||||
|
{stop, Reason}
|
||||||
|
end.
|
||||||
|
|
||||||
|
handle_call(kick, _From, Channel) ->
|
||||||
|
{stop, kicked, ok, Channel};
|
||||||
|
handle_call(discard, _From, Channel) ->
|
||||||
|
{stop, discarded, ok, Channel};
|
||||||
|
handle_call({takeover, 'begin'}, _From, #{session := Session} = Channel) ->
|
||||||
|
{reply, Session, Channel#{takeover => true}};
|
||||||
|
handle_call(
|
||||||
|
{takeover, 'end'},
|
||||||
|
_From,
|
||||||
|
#{
|
||||||
|
session := Session,
|
||||||
|
clientinfo := #{clientid := ClientId},
|
||||||
|
pendings := Pendings
|
||||||
|
} = Channel
|
||||||
|
) ->
|
||||||
|
ok = emqx_session:takeover(Session),
|
||||||
|
%% TODO: Should not drain deliver here (side effect)
|
||||||
|
Delivers = emqx_utils:drain_deliver(),
|
||||||
|
AllPendings = lists:append(Delivers, Pendings),
|
||||||
|
?tp(
|
||||||
|
debug,
|
||||||
|
emqx_channel_takeover_end,
|
||||||
|
#{clientid => ClientId}
|
||||||
|
),
|
||||||
|
{stop, normal, AllPendings, Channel};
|
||||||
|
handle_call(list_acl_cache, _From, Channel) ->
|
||||||
|
{reply, [], Channel};
|
||||||
|
handle_call({quota, _Policy}, _From, Channel) ->
|
||||||
|
{reply, ok, Channel};
|
||||||
|
handle_call(Req, _From, Channel) ->
|
||||||
|
?SLOG(
|
||||||
|
error,
|
||||||
|
#{
|
||||||
|
msg => "unexpected_call",
|
||||||
|
req => Req
|
||||||
|
}
|
||||||
|
),
|
||||||
|
{reply, ignored, Channel}.
|
||||||
|
|
||||||
|
handle_info(Deliver = {deliver, _Topic, _Msg}, Channel) ->
|
||||||
|
Delivers = [Deliver | emqx_utils:drain_deliver()],
|
||||||
|
{noreply, handle_deliver(Delivers, Channel)};
|
||||||
|
handle_info(expire_session, Channel) ->
|
||||||
|
{stop, expired, Channel};
|
||||||
|
handle_info(Info, Channel) ->
|
||||||
|
?SLOG(
|
||||||
|
error,
|
||||||
|
#{
|
||||||
|
msg => "unexpected_info",
|
||||||
|
info => Info
|
||||||
|
}
|
||||||
|
),
|
||||||
|
{noreply, Channel}.
|
||||||
|
|
||||||
|
handle_cast(Msg, Channel) ->
|
||||||
|
?SLOG(error, #{msg => "unexpected_cast", cast => Msg}),
|
||||||
|
{noreply, Channel}.
|
||||||
|
|
||||||
|
terminate(Reason, #{conninfo := ConnInfo, clientinfo := ClientInfo, session := Session} = Channel) ->
|
||||||
|
ok = cancel_expiry_timer(Channel),
|
||||||
|
(Reason =:= expired) andalso emqx_persistent_session:persist(ClientInfo, ConnInfo, Session),
|
||||||
|
emqx_session:terminate(ClientInfo, Reason, Session).
|
||||||
|
|
||||||
|
code_change(_OldVsn, Channel, _Extra) ->
|
||||||
|
{ok, Channel}.
|
||||||
|
|
||||||
|
%%--------------------------------------------------------------------
|
||||||
|
%% Internal functions
|
||||||
|
%%--------------------------------------------------------------------
|
||||||
|
|
||||||
|
handle_deliver(
|
||||||
|
Delivers,
|
||||||
|
#{
|
||||||
|
takeover := true,
|
||||||
|
pendings := Pendings,
|
||||||
|
session := Session,
|
||||||
|
clientinfo := #{clientid := ClientId} = ClientInfo
|
||||||
|
} = Channel
|
||||||
|
) ->
|
||||||
|
%% NOTE: Order is important here. While the takeover is in
|
||||||
|
%% progress, the session cannot enqueue messages, since it already
|
||||||
|
%% passed on the queue to the new connection in the session state.
|
||||||
|
NPendings = lists:append(
|
||||||
|
Pendings,
|
||||||
|
emqx_session:ignore_local(ClientInfo, emqx_channel:maybe_nack(Delivers), ClientId, Session)
|
||||||
|
),
|
||||||
|
Channel#{pendings => NPendings};
|
||||||
|
handle_deliver(
|
||||||
|
Delivers,
|
||||||
|
#{
|
||||||
|
takeover := false,
|
||||||
|
session := Session,
|
||||||
|
clientinfo := #{clientid := ClientId} = ClientInfo
|
||||||
|
} = Channel
|
||||||
|
) ->
|
||||||
|
Delivers1 = emqx_channel:maybe_nack(Delivers),
|
||||||
|
Delivers2 = emqx_session:ignore_local(ClientInfo, Delivers1, ClientId, Session),
|
||||||
|
NSession = emqx_session:enqueue(ClientInfo, Delivers2, Session),
|
||||||
|
NChannel = persist(NSession, Channel),
|
||||||
|
%% We consider queued/dropped messages as delivered since they are now in the session state.
|
||||||
|
emqx_channel:maybe_mark_as_delivered(Session, Delivers),
|
||||||
|
NChannel.
|
||||||
|
|
||||||
|
cancel_expiry_timer(#{expiry_timer := TRef}) when is_reference(TRef) ->
|
||||||
|
_ = erlang:cancel_timer(TRef),
|
||||||
|
ok;
|
||||||
|
cancel_expiry_timer(_) ->
|
||||||
|
ok.
|
||||||
|
|
||||||
|
set_expiry_timer(#{conninfo := ConnInfo} = Channel) ->
|
||||||
|
case maps:get(expiry_interval, ConnInfo) of
|
||||||
|
?UINT_MAX ->
|
||||||
|
{ok, Channel};
|
||||||
|
I when I > 0 ->
|
||||||
|
Timer = erlang:send_after(timer:seconds(I), self(), expire_session),
|
||||||
|
{ok, Channel#{expiry_timer => Timer}};
|
||||||
|
_ ->
|
||||||
|
{error, should_be_expired}
|
||||||
|
end.
|
||||||
|
|
||||||
|
open_session(ConnInfo, #{clientid := ClientId} = ClientInfo) ->
|
||||||
|
Channel = channel(ConnInfo, ClientInfo),
|
||||||
|
case emqx_cm:open_session(_CleanSession = false, ClientInfo, ConnInfo) of
|
||||||
|
{ok, #{present := false}} ->
|
||||||
|
?SLOG(
|
||||||
|
info,
|
||||||
|
#{
|
||||||
|
msg => "no_session",
|
||||||
|
clientid => ClientId,
|
||||||
|
node => node()
|
||||||
|
}
|
||||||
|
),
|
||||||
|
{error, no_session};
|
||||||
|
{ok, #{session := Session, present := true, pendings := Pendings0}} ->
|
||||||
|
?SLOG(
|
||||||
|
info,
|
||||||
|
#{
|
||||||
|
msg => "session_opened",
|
||||||
|
clientid => ClientId,
|
||||||
|
node => node()
|
||||||
|
}
|
||||||
|
),
|
||||||
|
Pendings1 = lists:usort(lists:append(Pendings0, emqx_utils:drain_deliver())),
|
||||||
|
NSession = emqx_session:enqueue(
|
||||||
|
ClientInfo,
|
||||||
|
emqx_session:ignore_local(
|
||||||
|
ClientInfo,
|
||||||
|
emqx_channel:maybe_nack(Pendings1),
|
||||||
|
ClientId,
|
||||||
|
Session
|
||||||
|
),
|
||||||
|
Session
|
||||||
|
),
|
||||||
|
NChannel = Channel#{session => NSession},
|
||||||
|
ok = emqx_cm:insert_channel_info(ClientId, info(NChannel), stats(NChannel)),
|
||||||
|
?SLOG(
|
||||||
|
info,
|
||||||
|
#{
|
||||||
|
msg => "channel_info_updated",
|
||||||
|
clientid => ClientId,
|
||||||
|
node => node()
|
||||||
|
}
|
||||||
|
),
|
||||||
|
{ok, NChannel};
|
||||||
|
{error, Reason} = Error ->
|
||||||
|
?SLOG(
|
||||||
|
error,
|
||||||
|
#{
|
||||||
|
msg => "session_open_failed",
|
||||||
|
clientid => ClientId,
|
||||||
|
node => node(),
|
||||||
|
reason => Reason
|
||||||
|
}
|
||||||
|
),
|
||||||
|
Error
|
||||||
|
end.
|
||||||
|
|
||||||
|
conninfo(OldConnInfo) ->
|
||||||
|
DisconnectedAt = maps:get(disconnected_at, OldConnInfo, erlang:system_time(millisecond)),
|
||||||
|
ConnInfo0 = maps:with(
|
||||||
|
[
|
||||||
|
socktype,
|
||||||
|
sockname,
|
||||||
|
peername,
|
||||||
|
peercert,
|
||||||
|
clientid,
|
||||||
|
clean_start,
|
||||||
|
receive_maximum,
|
||||||
|
expiry_interval,
|
||||||
|
connected_at,
|
||||||
|
disconnected_at,
|
||||||
|
keepalive
|
||||||
|
],
|
||||||
|
OldConnInfo
|
||||||
|
),
|
||||||
|
ConnInfo0#{
|
||||||
|
conn_mod => ?MODULE,
|
||||||
|
connected => false,
|
||||||
|
disconnected_at => DisconnectedAt
|
||||||
|
}.
|
||||||
|
|
||||||
|
clientinfo(OldClientInfo) ->
|
||||||
|
maps:with(
|
||||||
|
[
|
||||||
|
zone,
|
||||||
|
protocol,
|
||||||
|
peerhost,
|
||||||
|
sockport,
|
||||||
|
clientid,
|
||||||
|
username,
|
||||||
|
is_bridge,
|
||||||
|
is_superuser,
|
||||||
|
mountpoint
|
||||||
|
],
|
||||||
|
OldClientInfo
|
||||||
|
).
|
||||||
|
|
||||||
|
channel(ConnInfo, ClientInfo) ->
|
||||||
|
#{
|
||||||
|
conninfo => ConnInfo,
|
||||||
|
clientinfo => ClientInfo,
|
||||||
|
expiry_timer => undefined,
|
||||||
|
takeover => false,
|
||||||
|
resuming => false,
|
||||||
|
pendings => []
|
||||||
|
}.
|
||||||
|
|
||||||
|
persist(Session, #{clientinfo := ClientInfo, conninfo := ConnInfo} = Channel) ->
|
||||||
|
Session1 = emqx_persistent_session:persist(ClientInfo, ConnInfo, Session),
|
||||||
|
Channel#{session => Session1}.
|
||||||
|
|
||||||
|
info(Channel) ->
|
||||||
|
#{
|
||||||
|
conninfo => maps:get(conninfo, Channel, undefined),
|
||||||
|
clientinfo => maps:get(clientinfo, Channel, undefined),
|
||||||
|
session => emqx_utils:maybe_apply(
|
||||||
|
fun emqx_session:info/1,
|
||||||
|
maps:get(session, Channel, undefined)
|
||||||
|
),
|
||||||
|
conn_state => disconnected
|
||||||
|
}.
|
||||||
|
|
||||||
|
stats(#{session := Session}) ->
|
||||||
|
lists:append(emqx_session:stats(Session), emqx_pd:get_counters(?CHANNEL_METRICS)).
|
||||||
|
|
||||||
|
bin_clientid(ClientId) when is_binary(ClientId) ->
|
||||||
|
ClientId;
|
||||||
|
bin_clientid(ClientId) when is_atom(ClientId) ->
|
||||||
|
atom_to_binary(ClientId).
|
|
@ -0,0 +1,30 @@
|
||||||
|
%%--------------------------------------------------------------------
|
||||||
|
%% Copyright (c) 2022-2023 EMQ Technologies Co., Ltd. All Rights Reserved.
|
||||||
|
%%--------------------------------------------------------------------
|
||||||
|
|
||||||
|
-module(emqx_eviction_agent_cli).
|
||||||
|
|
||||||
|
%% APIs
|
||||||
|
-export([
|
||||||
|
load/0,
|
||||||
|
unload/0,
|
||||||
|
cli/1
|
||||||
|
]).
|
||||||
|
|
||||||
|
load() ->
|
||||||
|
emqx_ctl:register_command(eviction, {?MODULE, cli}, []).
|
||||||
|
|
||||||
|
unload() ->
|
||||||
|
emqx_ctl:unregister_command(eviction).
|
||||||
|
|
||||||
|
cli(["status"]) ->
|
||||||
|
case emqx_eviction_agent:status() of
|
||||||
|
disabled ->
|
||||||
|
emqx_ctl:print("Eviction status: disabled~n");
|
||||||
|
{enabled, _Stats} ->
|
||||||
|
emqx_ctl:print("Eviction status: enabled~n")
|
||||||
|
end;
|
||||||
|
cli(_) ->
|
||||||
|
emqx_ctl:usage(
|
||||||
|
[{"eviction status", "Get current node eviction status"}]
|
||||||
|
).
|
|
@ -0,0 +1,21 @@
|
||||||
|
%%--------------------------------------------------------------------
|
||||||
|
%% Copyright (c) 2022-2023 EMQ Technologies Co., Ltd. All Rights Reserved.
|
||||||
|
%%--------------------------------------------------------------------
|
||||||
|
|
||||||
|
-module(emqx_eviction_agent_conn_sup).
|
||||||
|
|
||||||
|
-behaviour(supervisor).
|
||||||
|
|
||||||
|
-export([start_link/0]).
|
||||||
|
|
||||||
|
-export([init/1]).
|
||||||
|
|
||||||
|
start_link() ->
|
||||||
|
supervisor:start_link({local, ?MODULE}, ?MODULE, []).
|
||||||
|
|
||||||
|
init([]) ->
|
||||||
|
{ok,
|
||||||
|
{
|
||||||
|
#{strategy => one_for_one, intensity => 10, period => 3600},
|
||||||
|
[]
|
||||||
|
}}.
|
|
@ -0,0 +1,34 @@
|
||||||
|
%%--------------------------------------------------------------------
|
||||||
|
%% Copyright (c) 2022-2023 EMQ Technologies Co., Ltd. All Rights Reserved.
|
||||||
|
%%--------------------------------------------------------------------
|
||||||
|
|
||||||
|
-module(emqx_eviction_agent_sup).
|
||||||
|
|
||||||
|
-behaviour(supervisor).
|
||||||
|
|
||||||
|
-export([start_link/0]).
|
||||||
|
|
||||||
|
-export([init/1]).
|
||||||
|
|
||||||
|
start_link() ->
|
||||||
|
supervisor:start_link({local, ?MODULE}, ?MODULE, []).
|
||||||
|
|
||||||
|
init([]) ->
|
||||||
|
Childs = [
|
||||||
|
child_spec(worker, emqx_eviction_agent, []),
|
||||||
|
child_spec(supervisor, emqx_eviction_agent_conn_sup, [])
|
||||||
|
],
|
||||||
|
{ok, {
|
||||||
|
#{strategy => one_for_one, intensity => 10, period => 3600},
|
||||||
|
Childs
|
||||||
|
}}.
|
||||||
|
|
||||||
|
child_spec(Type, Mod, Args) ->
|
||||||
|
#{
|
||||||
|
id => Mod,
|
||||||
|
start => {Mod, start_link, Args},
|
||||||
|
restart => permanent,
|
||||||
|
shutdown => 5000,
|
||||||
|
type => Type,
|
||||||
|
modules => [Mod]
|
||||||
|
}.
|
|
@ -0,0 +1,27 @@
|
||||||
|
%%--------------------------------------------------------------------
|
||||||
|
%% Copyright (c) 2022-2023 EMQ Technologies Co., Ltd. All Rights Reserved.
|
||||||
|
%%--------------------------------------------------------------------
|
||||||
|
|
||||||
|
-module(emqx_eviction_agent_proto_v1).
|
||||||
|
|
||||||
|
-behaviour(emqx_bpapi).
|
||||||
|
|
||||||
|
-export([
|
||||||
|
introduced_in/0,
|
||||||
|
|
||||||
|
evict_session_channel/4
|
||||||
|
]).
|
||||||
|
|
||||||
|
-include_lib("emqx/include/bpapi.hrl").
|
||||||
|
|
||||||
|
introduced_in() ->
|
||||||
|
"5.0.22".
|
||||||
|
|
||||||
|
-spec evict_session_channel(
|
||||||
|
node(),
|
||||||
|
emqx_types:clientid(),
|
||||||
|
emqx_types:conninfo(),
|
||||||
|
emqx_types:clientinfo()
|
||||||
|
) -> supervisor:startchild_err() | emqx_rpc:badrpc().
|
||||||
|
evict_session_channel(Node, ClientId, ConnInfo, ClientInfo) ->
|
||||||
|
rpc:call(Node, emqx_eviction_agent, evict_session_channel, [ClientId, ConnInfo, ClientInfo]).
|
|
@ -0,0 +1,467 @@
|
||||||
|
%%--------------------------------------------------------------------
|
||||||
|
%% Copyright (c) 2022-2023 EMQ Technologies Co., Ltd. All Rights Reserved.
|
||||||
|
%%--------------------------------------------------------------------
|
||||||
|
|
||||||
|
-module(emqx_eviction_agent_SUITE).
|
||||||
|
|
||||||
|
-compile(export_all).
|
||||||
|
-compile(nowarn_export_all).
|
||||||
|
|
||||||
|
-include_lib("eunit/include/eunit.hrl").
|
||||||
|
-include_lib("common_test/include/ct.hrl").
|
||||||
|
-include_lib("emqx/include/emqx_mqtt.hrl").
|
||||||
|
-include_lib("emqx/include/asserts.hrl").
|
||||||
|
|
||||||
|
-import(
|
||||||
|
emqx_eviction_agent_test_helpers,
|
||||||
|
[emqtt_connect/0, emqtt_connect/1, emqtt_connect/2]
|
||||||
|
).
|
||||||
|
|
||||||
|
-define(assertPrinted(Printed, Code),
|
||||||
|
?assertMatch(
|
||||||
|
{match, _},
|
||||||
|
re:run(Code, Printed)
|
||||||
|
)
|
||||||
|
).
|
||||||
|
|
||||||
|
all() ->
|
||||||
|
emqx_common_test_helpers:all(?MODULE).
|
||||||
|
|
||||||
|
init_per_suite(Config) ->
|
||||||
|
emqx_common_test_helpers:start_apps([emqx_eviction_agent]),
|
||||||
|
Config.
|
||||||
|
|
||||||
|
end_per_suite(_Config) ->
|
||||||
|
emqx_common_test_helpers:stop_apps([emqx_eviction_agent]).
|
||||||
|
|
||||||
|
init_per_testcase(Case, Config) ->
|
||||||
|
_ = emqx_eviction_agent:disable(test_eviction),
|
||||||
|
ok = snabbkaffe:start_trace(),
|
||||||
|
start_slave(Case, Config).
|
||||||
|
|
||||||
|
start_slave(t_explicit_session_takeover, Config) ->
|
||||||
|
ClusterNodes = emqx_eviction_agent_test_helpers:start_cluster(
|
||||||
|
[{evacuate_test1, 2883}, {evacuate_test2, 3883}],
|
||||||
|
[emqx_eviction_agent]
|
||||||
|
),
|
||||||
|
[{evacuate_nodes, ClusterNodes} | Config];
|
||||||
|
start_slave(_Case, Config) ->
|
||||||
|
Config.
|
||||||
|
|
||||||
|
end_per_testcase(TestCase, Config) ->
|
||||||
|
emqx_eviction_agent:disable(test_eviction),
|
||||||
|
ok = snabbkaffe:stop(),
|
||||||
|
stop_slave(TestCase, Config).
|
||||||
|
|
||||||
|
stop_slave(t_explicit_session_takeover, Config) ->
|
||||||
|
emqx_eviction_agent_test_helpers:stop_cluster(
|
||||||
|
?config(evacuate_nodes, Config),
|
||||||
|
[emqx_eviction_agent]
|
||||||
|
);
|
||||||
|
stop_slave(_Case, _Config) ->
|
||||||
|
ok.
|
||||||
|
|
||||||
|
%%--------------------------------------------------------------------
|
||||||
|
%% Tests
|
||||||
|
%%--------------------------------------------------------------------
|
||||||
|
|
||||||
|
t_enable_disable(_Config) ->
|
||||||
|
erlang:process_flag(trap_exit, true),
|
||||||
|
|
||||||
|
?assertMatch(
|
||||||
|
disabled,
|
||||||
|
emqx_eviction_agent:status()
|
||||||
|
),
|
||||||
|
|
||||||
|
{ok, C0} = emqtt_connect(),
|
||||||
|
ok = emqtt:disconnect(C0),
|
||||||
|
|
||||||
|
ok = emqx_eviction_agent:enable(test_eviction, undefined),
|
||||||
|
|
||||||
|
?assertMatch(
|
||||||
|
{error, eviction_agent_busy},
|
||||||
|
emqx_eviction_agent:enable(bar, undefined)
|
||||||
|
),
|
||||||
|
|
||||||
|
?assertMatch(
|
||||||
|
ok,
|
||||||
|
emqx_eviction_agent:enable(test_eviction, <<"srv">>)
|
||||||
|
),
|
||||||
|
|
||||||
|
?assertMatch(
|
||||||
|
{enabled, #{}},
|
||||||
|
emqx_eviction_agent:status()
|
||||||
|
),
|
||||||
|
|
||||||
|
?assertMatch(
|
||||||
|
{error, {use_another_server, #{}}},
|
||||||
|
emqtt_connect()
|
||||||
|
),
|
||||||
|
|
||||||
|
?assertMatch(
|
||||||
|
{error, eviction_agent_busy},
|
||||||
|
emqx_eviction_agent:disable(bar)
|
||||||
|
),
|
||||||
|
|
||||||
|
?assertMatch(
|
||||||
|
ok,
|
||||||
|
emqx_eviction_agent:disable(test_eviction)
|
||||||
|
),
|
||||||
|
|
||||||
|
?assertMatch(
|
||||||
|
{error, disabled},
|
||||||
|
emqx_eviction_agent:disable(test_eviction)
|
||||||
|
),
|
||||||
|
|
||||||
|
?assertMatch(
|
||||||
|
disabled,
|
||||||
|
emqx_eviction_agent:status()
|
||||||
|
),
|
||||||
|
|
||||||
|
{ok, C1} = emqtt_connect(),
|
||||||
|
ok = emqtt:disconnect(C1).
|
||||||
|
|
||||||
|
t_evict_connections_status(_Config) ->
|
||||||
|
erlang:process_flag(trap_exit, true),
|
||||||
|
|
||||||
|
{ok, _C} = emqtt_connect(),
|
||||||
|
|
||||||
|
{error, disabled} = emqx_eviction_agent:evict_connections(1),
|
||||||
|
|
||||||
|
ok = emqx_eviction_agent:enable(test_eviction, undefined),
|
||||||
|
|
||||||
|
?assertMatch(
|
||||||
|
{enabled, #{connections := 1, sessions := _}},
|
||||||
|
emqx_eviction_agent:status()
|
||||||
|
),
|
||||||
|
|
||||||
|
ok = emqx_eviction_agent:evict_connections(1),
|
||||||
|
|
||||||
|
ct:sleep(100),
|
||||||
|
|
||||||
|
?assertMatch(
|
||||||
|
{enabled, #{connections := 0, sessions := _}},
|
||||||
|
emqx_eviction_agent:status()
|
||||||
|
),
|
||||||
|
|
||||||
|
ok = emqx_eviction_agent:disable(test_eviction).
|
||||||
|
|
||||||
|
t_explicit_session_takeover(Config) ->
|
||||||
|
_ = erlang:process_flag(trap_exit, true),
|
||||||
|
ok = restart_emqx(),
|
||||||
|
|
||||||
|
[{Node1, Port1}, {Node2, _Port2}] = ?config(evacuate_nodes, Config),
|
||||||
|
|
||||||
|
{ok, C0} = emqtt_connect([
|
||||||
|
{clientid, <<"client_with_session">>},
|
||||||
|
{clean_start, false},
|
||||||
|
{port, Port1}
|
||||||
|
]),
|
||||||
|
{ok, _, _} = emqtt:subscribe(C0, <<"t1">>),
|
||||||
|
|
||||||
|
ok = rpc:call(Node1, emqx_eviction_agent, enable, [test_eviction, undefined]),
|
||||||
|
|
||||||
|
?assertEqual(
|
||||||
|
1,
|
||||||
|
rpc:call(Node1, emqx_eviction_agent, connection_count, [])
|
||||||
|
),
|
||||||
|
|
||||||
|
[ChanPid] = rpc:call(Node1, emqx_cm, lookup_channels, [<<"client_with_session">>]),
|
||||||
|
|
||||||
|
?assertWaitEvent(
|
||||||
|
begin
|
||||||
|
ok = rpc:call(Node1, emqx_eviction_agent, evict_connections, [1]),
|
||||||
|
receive
|
||||||
|
{'EXIT', C0, {disconnected, ?RC_USE_ANOTHER_SERVER, _}} -> ok
|
||||||
|
after 1000 ->
|
||||||
|
?assert(false, "Connection not evicted")
|
||||||
|
end
|
||||||
|
end,
|
||||||
|
#{?snk_kind := emqx_cm_connected_client_count_dec, chan_pid := ChanPid},
|
||||||
|
2000
|
||||||
|
),
|
||||||
|
|
||||||
|
?assertEqual(
|
||||||
|
0,
|
||||||
|
rpc:call(Node1, emqx_eviction_agent, connection_count, [])
|
||||||
|
),
|
||||||
|
|
||||||
|
?assertEqual(
|
||||||
|
1,
|
||||||
|
rpc:call(Node1, emqx_eviction_agent, session_count, [])
|
||||||
|
),
|
||||||
|
|
||||||
|
%% First, evacuate to the same node
|
||||||
|
|
||||||
|
?assertWaitEvent(
|
||||||
|
rpc:call(Node1, emqx_eviction_agent, evict_sessions, [1, Node1]),
|
||||||
|
#{?snk_kind := emqx_channel_takeover_end, clientid := <<"client_with_session">>},
|
||||||
|
1000
|
||||||
|
),
|
||||||
|
|
||||||
|
ok = rpc:call(Node1, emqx_eviction_agent, disable, [test_eviction]),
|
||||||
|
|
||||||
|
{ok, C1} = emqtt_connect([{port, Port1}]),
|
||||||
|
emqtt:publish(C1, <<"t1">>, <<"MessageToEvictedSession1">>),
|
||||||
|
ok = emqtt:disconnect(C1),
|
||||||
|
|
||||||
|
ok = rpc:call(Node1, emqx_eviction_agent, enable, [test_eviction, undefined]),
|
||||||
|
|
||||||
|
%% Evacuate to another node
|
||||||
|
|
||||||
|
?assertWaitEvent(
|
||||||
|
rpc:call(Node1, emqx_eviction_agent, evict_sessions, [1, Node2]),
|
||||||
|
#{?snk_kind := emqx_channel_takeover_end, clientid := <<"client_with_session">>},
|
||||||
|
1000
|
||||||
|
),
|
||||||
|
|
||||||
|
?assertEqual(
|
||||||
|
0,
|
||||||
|
rpc:call(Node1, emqx_eviction_agent, session_count, [])
|
||||||
|
),
|
||||||
|
|
||||||
|
?assertEqual(
|
||||||
|
1,
|
||||||
|
rpc:call(Node2, emqx_eviction_agent, session_count, [])
|
||||||
|
),
|
||||||
|
|
||||||
|
ok = rpc:call(Node1, emqx_eviction_agent, disable, [test_eviction]),
|
||||||
|
|
||||||
|
%% Session is on Node2, but we connect to Node1
|
||||||
|
{ok, C2} = emqtt_connect([{port, Port1}]),
|
||||||
|
emqtt:publish(C2, <<"t1">>, <<"MessageToEvictedSession2">>),
|
||||||
|
ok = emqtt:disconnect(C2),
|
||||||
|
|
||||||
|
ct:sleep(100),
|
||||||
|
|
||||||
|
%% Session is on Node2, but we connect the subscribed client to Node1
|
||||||
|
%% It should take over the session for the third time and recieve
|
||||||
|
%% previously published messages
|
||||||
|
{ok, C3} = emqtt_connect([
|
||||||
|
{clientid, <<"client_with_session">>},
|
||||||
|
{clean_start, false},
|
||||||
|
{port, Port1}
|
||||||
|
]),
|
||||||
|
|
||||||
|
ok = assert_receive_publish(
|
||||||
|
[
|
||||||
|
#{payload => <<"MessageToEvictedSession1">>, topic => <<"t1">>},
|
||||||
|
#{payload => <<"MessageToEvictedSession2">>, topic => <<"t1">>}
|
||||||
|
]
|
||||||
|
),
|
||||||
|
ok = emqtt:disconnect(C3).
|
||||||
|
|
||||||
|
t_disable_on_restart(_Config) ->
|
||||||
|
ok = emqx_eviction_agent:enable(test_eviction, undefined),
|
||||||
|
|
||||||
|
ok = supervisor:terminate_child(emqx_eviction_agent_sup, emqx_eviction_agent),
|
||||||
|
{ok, _} = supervisor:restart_child(emqx_eviction_agent_sup, emqx_eviction_agent),
|
||||||
|
|
||||||
|
?assertEqual(
|
||||||
|
disabled,
|
||||||
|
emqx_eviction_agent:status()
|
||||||
|
).
|
||||||
|
|
||||||
|
t_session_serialization(_Config) ->
|
||||||
|
_ = erlang:process_flag(trap_exit, true),
|
||||||
|
ok = restart_emqx(),
|
||||||
|
|
||||||
|
{ok, C0} = emqtt_connect(<<"client_with_session">>, false),
|
||||||
|
{ok, _, _} = emqtt:subscribe(C0, <<"t1">>),
|
||||||
|
ok = emqtt:disconnect(C0),
|
||||||
|
|
||||||
|
ok = emqx_eviction_agent:enable(test_eviction, undefined),
|
||||||
|
|
||||||
|
?assertEqual(
|
||||||
|
1,
|
||||||
|
emqx_eviction_agent:session_count()
|
||||||
|
),
|
||||||
|
|
||||||
|
%% Evacuate to the same node
|
||||||
|
|
||||||
|
?assertWaitEvent(
|
||||||
|
emqx_eviction_agent:evict_sessions(1, node()),
|
||||||
|
#{?snk_kind := emqx_channel_takeover_end, clientid := <<"client_with_session">>},
|
||||||
|
1000
|
||||||
|
),
|
||||||
|
|
||||||
|
ok = emqx_eviction_agent:disable(test_eviction),
|
||||||
|
|
||||||
|
?assertEqual(
|
||||||
|
1,
|
||||||
|
emqx_eviction_agent:session_count()
|
||||||
|
),
|
||||||
|
|
||||||
|
?assertMatch(
|
||||||
|
#{data := [#{clientid := <<"client_with_session">>}]},
|
||||||
|
emqx_mgmt_api:cluster_query(
|
||||||
|
emqx_channel_info,
|
||||||
|
#{},
|
||||||
|
[],
|
||||||
|
fun emqx_mgmt_api_clients:qs2ms/2,
|
||||||
|
fun emqx_mgmt_api_clients:format_channel_info/2
|
||||||
|
)
|
||||||
|
),
|
||||||
|
|
||||||
|
mock_print(),
|
||||||
|
|
||||||
|
?assertPrinted(
|
||||||
|
"client_with_session",
|
||||||
|
emqx_mgmt_cli:clients(["list"])
|
||||||
|
),
|
||||||
|
|
||||||
|
?assertPrinted(
|
||||||
|
"client_with_session",
|
||||||
|
emqx_mgmt_cli:clients(["show", "client_with_session"])
|
||||||
|
),
|
||||||
|
|
||||||
|
?assertWaitEvent(
|
||||||
|
emqx_cm:kick_session(<<"client_with_session">>),
|
||||||
|
#{?snk_kind := emqx_cm_clean_down, client_id := <<"client_with_session">>},
|
||||||
|
1000
|
||||||
|
),
|
||||||
|
|
||||||
|
?assertEqual(
|
||||||
|
0,
|
||||||
|
emqx_eviction_agent:session_count()
|
||||||
|
).
|
||||||
|
|
||||||
|
t_will_msg(_Config) ->
|
||||||
|
erlang:process_flag(trap_exit, true),
|
||||||
|
|
||||||
|
WillMsg = <<"will_msg">>,
|
||||||
|
WillTopic = <<"will_topic">>,
|
||||||
|
ClientId = <<"client_with_will">>,
|
||||||
|
|
||||||
|
_ = emqtt_connect([
|
||||||
|
{clean_start, false},
|
||||||
|
{clientid, ClientId},
|
||||||
|
{will_payload, WillMsg},
|
||||||
|
{will_topic, WillTopic}
|
||||||
|
]),
|
||||||
|
|
||||||
|
{ok, C} = emqtt_connect(),
|
||||||
|
{ok, _, _} = emqtt:subscribe(C, WillTopic),
|
||||||
|
|
||||||
|
[ChanPid] = emqx_cm:lookup_channels(ClientId),
|
||||||
|
|
||||||
|
ChanPid !
|
||||||
|
{disconnect, ?RC_USE_ANOTHER_SERVER, use_another_server, #{
|
||||||
|
'Server-Reference' => <<>>
|
||||||
|
}},
|
||||||
|
|
||||||
|
receive
|
||||||
|
{publish, #{
|
||||||
|
payload := WillMsg,
|
||||||
|
topic := WillTopic
|
||||||
|
}} ->
|
||||||
|
ok
|
||||||
|
after 1000 ->
|
||||||
|
ct:fail("Will message not received")
|
||||||
|
end,
|
||||||
|
|
||||||
|
ok = emqtt:disconnect(C).
|
||||||
|
|
||||||
|
t_ws_conn(_Config) ->
|
||||||
|
erlang:process_flag(trap_exit, true),
|
||||||
|
|
||||||
|
ClientId = <<"ws_client">>,
|
||||||
|
{ok, C} = emqtt:start_link([
|
||||||
|
{proto_ver, v5},
|
||||||
|
{clientid, ClientId},
|
||||||
|
{port, 8083},
|
||||||
|
{ws_path, "/mqtt"}
|
||||||
|
]),
|
||||||
|
{ok, _} = emqtt:ws_connect(C),
|
||||||
|
|
||||||
|
ok = emqx_eviction_agent:enable(test_eviction, undefined),
|
||||||
|
|
||||||
|
?assertEqual(
|
||||||
|
1,
|
||||||
|
emqx_eviction_agent:connection_count()
|
||||||
|
),
|
||||||
|
|
||||||
|
?assertWaitEvent(
|
||||||
|
ok = emqx_eviction_agent:evict_connections(1),
|
||||||
|
#{?snk_kind := emqx_cm_connected_client_count_dec},
|
||||||
|
1000
|
||||||
|
),
|
||||||
|
|
||||||
|
?assertEqual(
|
||||||
|
0,
|
||||||
|
emqx_eviction_agent:connection_count()
|
||||||
|
).
|
||||||
|
|
||||||
|
-ifndef(BUILD_WITHOUT_QUIC).
|
||||||
|
|
||||||
|
t_quic_conn(_Config) ->
|
||||||
|
erlang:process_flag(trap_exit, true),
|
||||||
|
|
||||||
|
QuicPort = emqx_common_test_helpers:select_free_port(quic),
|
||||||
|
application:ensure_all_started(quicer),
|
||||||
|
emqx_common_test_helpers:ensure_quic_listener(?MODULE, QuicPort),
|
||||||
|
|
||||||
|
ClientId = <<"quic_client">>,
|
||||||
|
{ok, C} = emqtt:start_link([
|
||||||
|
{proto_ver, v5},
|
||||||
|
{clientid, ClientId},
|
||||||
|
{port, QuicPort}
|
||||||
|
]),
|
||||||
|
{ok, _} = emqtt:quic_connect(C),
|
||||||
|
|
||||||
|
ok = emqx_eviction_agent:enable(test_eviction, undefined),
|
||||||
|
|
||||||
|
?assertEqual(
|
||||||
|
1,
|
||||||
|
emqx_eviction_agent:connection_count()
|
||||||
|
),
|
||||||
|
|
||||||
|
?assertWaitEvent(
|
||||||
|
ok = emqx_eviction_agent:evict_connections(1),
|
||||||
|
#{?snk_kind := emqx_cm_connected_client_count_dec},
|
||||||
|
1000
|
||||||
|
),
|
||||||
|
|
||||||
|
?assertEqual(
|
||||||
|
0,
|
||||||
|
emqx_eviction_agent:connection_count()
|
||||||
|
).
|
||||||
|
|
||||||
|
-endif.
|
||||||
|
|
||||||
|
%%--------------------------------------------------------------------
|
||||||
|
%% Helpers
|
||||||
|
%%--------------------------------------------------------------------
|
||||||
|
|
||||||
|
assert_receive_publish([]) ->
|
||||||
|
ok;
|
||||||
|
assert_receive_publish([#{payload := Msg, topic := Topic} | Rest]) ->
|
||||||
|
receive
|
||||||
|
{publish, #{
|
||||||
|
payload := Msg,
|
||||||
|
topic := Topic
|
||||||
|
}} ->
|
||||||
|
assert_receive_publish(Rest)
|
||||||
|
after 1000 ->
|
||||||
|
?assert(false, "Message `" ++ binary_to_list(Msg) ++ "` is lost")
|
||||||
|
end.
|
||||||
|
|
||||||
|
connect_and_publish(Topic, Message) ->
|
||||||
|
{ok, C} = emqtt_connect(),
|
||||||
|
emqtt:publish(C, Topic, Message),
|
||||||
|
ok = emqtt:disconnect(C).
|
||||||
|
|
||||||
|
restart_emqx() ->
|
||||||
|
_ = application:stop(emqx),
|
||||||
|
_ = application:start(emqx),
|
||||||
|
_ = application:stop(emqx_eviction_agent),
|
||||||
|
_ = application:start(emqx_eviction_agent),
|
||||||
|
ok.
|
||||||
|
|
||||||
|
mock_print() ->
|
||||||
|
catch meck:unload(emqx_ctl),
|
||||||
|
meck:new(emqx_ctl, [non_strict, passthrough]),
|
||||||
|
meck:expect(emqx_ctl, print, fun(Arg) -> emqx_ctl:format(Arg, []) end),
|
||||||
|
meck:expect(emqx_ctl, print, fun(Msg, Arg) -> emqx_ctl:format(Msg, Arg) end),
|
||||||
|
meck:expect(emqx_ctl, usage, fun(Usages) -> emqx_ctl:format_usage(Usages) end),
|
||||||
|
meck:expect(emqx_ctl, usage, fun(Cmd, Descr) -> emqx_ctl:format_usage(Cmd, Descr) end).
|
|
@ -0,0 +1,69 @@
|
||||||
|
%%--------------------------------------------------------------------
|
||||||
|
%% Copyright (c) 2022-2023 EMQ Technologies Co., Ltd. All Rights Reserved.
|
||||||
|
%%--------------------------------------------------------------------
|
||||||
|
|
||||||
|
-module(emqx_eviction_agent_api_SUITE).
|
||||||
|
|
||||||
|
-compile(export_all).
|
||||||
|
-compile(nowarn_export_all).
|
||||||
|
|
||||||
|
-include_lib("eunit/include/eunit.hrl").
|
||||||
|
-include_lib("common_test/include/ct.hrl").
|
||||||
|
|
||||||
|
-import(
|
||||||
|
emqx_mgmt_api_test_util,
|
||||||
|
[
|
||||||
|
request_api/2,
|
||||||
|
uri/1
|
||||||
|
]
|
||||||
|
).
|
||||||
|
|
||||||
|
all() ->
|
||||||
|
emqx_common_test_helpers:all(?MODULE).
|
||||||
|
|
||||||
|
init_per_suite(Config) ->
|
||||||
|
emqx_mgmt_api_test_util:init_suite([emqx_eviction_agent]),
|
||||||
|
Config.
|
||||||
|
|
||||||
|
end_per_suite(Config) ->
|
||||||
|
emqx_mgmt_api_test_util:end_suite([emqx_eviction_agent]),
|
||||||
|
Config.
|
||||||
|
|
||||||
|
%%--------------------------------------------------------------------
|
||||||
|
%% Tests
|
||||||
|
%%--------------------------------------------------------------------
|
||||||
|
|
||||||
|
t_status(_Config) ->
|
||||||
|
?assertMatch(
|
||||||
|
{ok, #{<<"status">> := <<"disabled">>}},
|
||||||
|
api_get(["node_eviction", "status"])
|
||||||
|
),
|
||||||
|
|
||||||
|
ok = emqx_eviction_agent:enable(apitest, undefined),
|
||||||
|
|
||||||
|
?assertMatch(
|
||||||
|
{ok, #{
|
||||||
|
<<"status">> := <<"enabled">>,
|
||||||
|
<<"stats">> := #{}
|
||||||
|
}},
|
||||||
|
api_get(["node_eviction", "status"])
|
||||||
|
),
|
||||||
|
|
||||||
|
ok = emqx_eviction_agent:disable(apitest),
|
||||||
|
|
||||||
|
?assertMatch(
|
||||||
|
{ok, #{<<"status">> := <<"disabled">>}},
|
||||||
|
api_get(["node_eviction", "status"])
|
||||||
|
).
|
||||||
|
|
||||||
|
%%--------------------------------------------------------------------
|
||||||
|
%% Helpers
|
||||||
|
%%--------------------------------------------------------------------
|
||||||
|
|
||||||
|
api_get(Path) ->
|
||||||
|
case request_api(get, uri(Path)) of
|
||||||
|
{ok, ResponseBody} ->
|
||||||
|
{ok, jiffy:decode(list_to_binary(ResponseBody), [return_maps])};
|
||||||
|
{error, _} = Error ->
|
||||||
|
Error
|
||||||
|
end.
|
|
@ -0,0 +1,251 @@
|
||||||
|
%%--------------------------------------------------------------------
|
||||||
|
%% Copyright (c) 2022-2023 EMQ Technologies Co., Ltd. All Rights Reserved.
|
||||||
|
%%--------------------------------------------------------------------
|
||||||
|
|
||||||
|
-module(emqx_eviction_agent_channel_SUITE).
|
||||||
|
|
||||||
|
-compile(export_all).
|
||||||
|
-compile(nowarn_export_all).
|
||||||
|
|
||||||
|
-include_lib("eunit/include/eunit.hrl").
|
||||||
|
-include_lib("common_test/include/ct.hrl").
|
||||||
|
-include_lib("emqx/include/emqx_mqtt.hrl").
|
||||||
|
|
||||||
|
-define(CLIENT_ID, <<"client_with_session">>).
|
||||||
|
|
||||||
|
-import(
|
||||||
|
emqx_eviction_agent_test_helpers,
|
||||||
|
[emqtt_connect/0, emqtt_connect/2]
|
||||||
|
).
|
||||||
|
|
||||||
|
all() ->
|
||||||
|
emqx_common_test_helpers:all(?MODULE).
|
||||||
|
|
||||||
|
init_per_suite(Config) ->
|
||||||
|
emqx_common_test_helpers:start_apps([emqx_conf, emqx_eviction_agent]),
|
||||||
|
{ok, _} = emqx:update_config([rpc, port_discovery], manual),
|
||||||
|
Config.
|
||||||
|
|
||||||
|
end_per_suite(_Config) ->
|
||||||
|
emqx_common_test_helpers:stop_apps([emqx_eviction_agent, emqx_conf]).
|
||||||
|
|
||||||
|
init_per_testcase(t_persistence, Config) ->
|
||||||
|
emqx_config:put([persistent_session_store, enabled], true),
|
||||||
|
{ok, _} = emqx_persistent_session_sup:start_link(),
|
||||||
|
emqx_persistent_session:init_db_backend(),
|
||||||
|
?assert(emqx_persistent_session:is_store_enabled()),
|
||||||
|
Config;
|
||||||
|
init_per_testcase(_TestCase, Config) ->
|
||||||
|
Config.
|
||||||
|
|
||||||
|
end_per_testcase(t_persistence, Config) ->
|
||||||
|
emqx_config:put([persistent_session_store, enabled], false),
|
||||||
|
emqx_persistent_session:init_db_backend(),
|
||||||
|
?assertNot(emqx_persistent_session:is_store_enabled()),
|
||||||
|
Config;
|
||||||
|
end_per_testcase(_TestCase, _Config) ->
|
||||||
|
ok.
|
||||||
|
|
||||||
|
%%--------------------------------------------------------------------
|
||||||
|
%% Tests
|
||||||
|
%%--------------------------------------------------------------------
|
||||||
|
|
||||||
|
t_start_no_session(_Config) ->
|
||||||
|
Opts = #{
|
||||||
|
clientinfo => #{
|
||||||
|
clientid => ?CLIENT_ID,
|
||||||
|
zone => internal
|
||||||
|
},
|
||||||
|
conninfo => #{
|
||||||
|
clientid => ?CLIENT_ID,
|
||||||
|
receive_maximum => 32,
|
||||||
|
expiry_interval => 10000
|
||||||
|
}
|
||||||
|
},
|
||||||
|
?assertMatch(
|
||||||
|
{error, {no_session, _}},
|
||||||
|
emqx_eviction_agent_channel:start_supervised(Opts)
|
||||||
|
).
|
||||||
|
|
||||||
|
t_start_no_expire(_Config) ->
|
||||||
|
erlang:process_flag(trap_exit, true),
|
||||||
|
|
||||||
|
_ = emqtt_connect(?CLIENT_ID, false),
|
||||||
|
|
||||||
|
Opts = #{
|
||||||
|
clientinfo => #{
|
||||||
|
clientid => ?CLIENT_ID,
|
||||||
|
zone => internal
|
||||||
|
},
|
||||||
|
conninfo => #{
|
||||||
|
clientid => ?CLIENT_ID,
|
||||||
|
receive_maximum => 32,
|
||||||
|
expiry_interval => 0
|
||||||
|
}
|
||||||
|
},
|
||||||
|
?assertMatch(
|
||||||
|
{error, {should_be_expired, _}},
|
||||||
|
emqx_eviction_agent_channel:start_supervised(Opts)
|
||||||
|
).
|
||||||
|
|
||||||
|
t_start_infinite_expire(_Config) ->
|
||||||
|
erlang:process_flag(trap_exit, true),
|
||||||
|
|
||||||
|
_ = emqtt_connect(?CLIENT_ID, false),
|
||||||
|
|
||||||
|
Opts = #{
|
||||||
|
clientinfo => #{
|
||||||
|
clientid => ?CLIENT_ID,
|
||||||
|
zone => internal
|
||||||
|
},
|
||||||
|
conninfo => #{
|
||||||
|
clientid => ?CLIENT_ID,
|
||||||
|
receive_maximum => 32,
|
||||||
|
expiry_interval => ?UINT_MAX
|
||||||
|
}
|
||||||
|
},
|
||||||
|
?assertMatch(
|
||||||
|
{ok, _},
|
||||||
|
emqx_eviction_agent_channel:start_supervised(Opts)
|
||||||
|
).
|
||||||
|
|
||||||
|
t_kick(_Config) ->
|
||||||
|
erlang:process_flag(trap_exit, true),
|
||||||
|
|
||||||
|
_ = emqtt_connect(?CLIENT_ID, false),
|
||||||
|
Opts = evict_session_opts(?CLIENT_ID),
|
||||||
|
|
||||||
|
{ok, Pid} = emqx_eviction_agent_channel:start_supervised(Opts),
|
||||||
|
|
||||||
|
?assertEqual(
|
||||||
|
ok,
|
||||||
|
emqx_eviction_agent_channel:call(Pid, kick)
|
||||||
|
).
|
||||||
|
|
||||||
|
t_discard(_Config) ->
|
||||||
|
erlang:process_flag(trap_exit, true),
|
||||||
|
|
||||||
|
_ = emqtt_connect(?CLIENT_ID, false),
|
||||||
|
Opts = evict_session_opts(?CLIENT_ID),
|
||||||
|
|
||||||
|
{ok, Pid} = emqx_eviction_agent_channel:start_supervised(Opts),
|
||||||
|
|
||||||
|
?assertEqual(
|
||||||
|
ok,
|
||||||
|
emqx_eviction_agent_channel:call(Pid, discard)
|
||||||
|
).
|
||||||
|
|
||||||
|
t_stop(_Config) ->
|
||||||
|
erlang:process_flag(trap_exit, true),
|
||||||
|
|
||||||
|
_ = emqtt_connect(?CLIENT_ID, false),
|
||||||
|
Opts = evict_session_opts(?CLIENT_ID),
|
||||||
|
|
||||||
|
{ok, Pid} = emqx_eviction_agent_channel:start_supervised(Opts),
|
||||||
|
|
||||||
|
?assertEqual(
|
||||||
|
ok,
|
||||||
|
emqx_eviction_agent_channel:stop(Pid)
|
||||||
|
).
|
||||||
|
|
||||||
|
t_ignored_calls(_Config) ->
|
||||||
|
erlang:process_flag(trap_exit, true),
|
||||||
|
|
||||||
|
_ = emqtt_connect(?CLIENT_ID, false),
|
||||||
|
Opts = evict_session_opts(?CLIENT_ID),
|
||||||
|
|
||||||
|
{ok, Pid} = emqx_eviction_agent_channel:start_supervised(Opts),
|
||||||
|
|
||||||
|
ok = emqx_eviction_agent_channel:cast(Pid, unknown),
|
||||||
|
Pid ! unknown,
|
||||||
|
|
||||||
|
?assertEqual(
|
||||||
|
[],
|
||||||
|
emqx_eviction_agent_channel:call(Pid, list_acl_cache)
|
||||||
|
),
|
||||||
|
|
||||||
|
?assertEqual(
|
||||||
|
ok,
|
||||||
|
emqx_eviction_agent_channel:call(Pid, {quota, quota})
|
||||||
|
),
|
||||||
|
|
||||||
|
?assertEqual(
|
||||||
|
ignored,
|
||||||
|
emqx_eviction_agent_channel:call(Pid, unknown)
|
||||||
|
).
|
||||||
|
|
||||||
|
t_expire(_Config) ->
|
||||||
|
erlang:process_flag(trap_exit, true),
|
||||||
|
|
||||||
|
_ = emqtt_connect(?CLIENT_ID, false),
|
||||||
|
#{conninfo := ConnInfo} = Opts0 = evict_session_opts(?CLIENT_ID),
|
||||||
|
Opts1 = Opts0#{conninfo => ConnInfo#{expiry_interval => 1}},
|
||||||
|
|
||||||
|
{ok, Pid} = emqx_eviction_agent_channel:start_supervised(Opts1),
|
||||||
|
|
||||||
|
ct:sleep(1500),
|
||||||
|
|
||||||
|
?assertNot(is_process_alive(Pid)).
|
||||||
|
|
||||||
|
t_get_connected_client_count(_Config) ->
|
||||||
|
erlang:process_flag(trap_exit, true),
|
||||||
|
|
||||||
|
_ = emqtt_connect(?CLIENT_ID, false),
|
||||||
|
|
||||||
|
?assertEqual(
|
||||||
|
1,
|
||||||
|
emqx_cm:get_connected_client_count()
|
||||||
|
),
|
||||||
|
|
||||||
|
Opts = evict_session_opts(?CLIENT_ID),
|
||||||
|
|
||||||
|
{ok, _} = emqx_eviction_agent_channel:start_supervised(Opts),
|
||||||
|
|
||||||
|
?assertEqual(
|
||||||
|
0,
|
||||||
|
emqx_cm:get_connected_client_count()
|
||||||
|
).
|
||||||
|
|
||||||
|
t_persistence(_Config) ->
|
||||||
|
erlang:process_flag(trap_exit, true),
|
||||||
|
|
||||||
|
Topic = <<"t1">>,
|
||||||
|
Message = <<"message_to_persist">>,
|
||||||
|
|
||||||
|
{ok, C0} = emqtt_connect(?CLIENT_ID, false),
|
||||||
|
{ok, _, _} = emqtt:subscribe(C0, Topic, 0),
|
||||||
|
|
||||||
|
Opts = evict_session_opts(?CLIENT_ID),
|
||||||
|
{ok, Pid} = emqx_eviction_agent_channel:start_supervised(Opts),
|
||||||
|
|
||||||
|
{ok, C1} = emqtt_connect(),
|
||||||
|
{ok, _} = emqtt:publish(C1, Topic, Message, 1),
|
||||||
|
ok = emqtt:disconnect(C1),
|
||||||
|
|
||||||
|
%% Kill channel so that the session is only persisted
|
||||||
|
ok = emqx_eviction_agent_channel:call(Pid, kick),
|
||||||
|
|
||||||
|
%% Should restore session from persistents storage and receive messages
|
||||||
|
{ok, C2} = emqtt_connect(?CLIENT_ID, false),
|
||||||
|
|
||||||
|
receive
|
||||||
|
{publish, #{
|
||||||
|
payload := Message,
|
||||||
|
topic := Topic
|
||||||
|
}} ->
|
||||||
|
ok
|
||||||
|
after 1000 ->
|
||||||
|
ct:fail("message not received")
|
||||||
|
end,
|
||||||
|
|
||||||
|
ok = emqtt:disconnect(C2).
|
||||||
|
|
||||||
|
%%--------------------------------------------------------------------
|
||||||
|
%% Helpers
|
||||||
|
%%--------------------------------------------------------------------
|
||||||
|
|
||||||
|
evict_session_opts(ClientId) ->
|
||||||
|
maps:with(
|
||||||
|
[conninfo, clientinfo],
|
||||||
|
emqx_cm:get_chan_info(ClientId)
|
||||||
|
).
|
|
@ -0,0 +1,39 @@
|
||||||
|
%%--------------------------------------------------------------------
|
||||||
|
%% Copyright (c) 2022-2023 EMQ Technologies Co., Ltd. All Rights Reserved.
|
||||||
|
%%--------------------------------------------------------------------
|
||||||
|
|
||||||
|
-module(emqx_eviction_agent_cli_SUITE).
|
||||||
|
|
||||||
|
-compile(export_all).
|
||||||
|
-compile(nowarn_export_all).
|
||||||
|
|
||||||
|
-include_lib("eunit/include/eunit.hrl").
|
||||||
|
-include_lib("common_test/include/ct.hrl").
|
||||||
|
|
||||||
|
all() ->
|
||||||
|
emqx_common_test_helpers:all(?MODULE).
|
||||||
|
|
||||||
|
init_per_suite(Config) ->
|
||||||
|
emqx_common_test_helpers:start_apps([emqx_eviction_agent]),
|
||||||
|
Config.
|
||||||
|
|
||||||
|
end_per_suite(Config) ->
|
||||||
|
_ = emqx_eviction_agent:disable(foo),
|
||||||
|
emqx_common_test_helpers:stop_apps([emqx_eviction_agent]),
|
||||||
|
Config.
|
||||||
|
|
||||||
|
%%--------------------------------------------------------------------
|
||||||
|
%% Tests
|
||||||
|
%%--------------------------------------------------------------------
|
||||||
|
|
||||||
|
t_status(_Config) ->
|
||||||
|
%% usage
|
||||||
|
ok = emqx_eviction_agent_cli:cli(["foobar"]),
|
||||||
|
|
||||||
|
%% status
|
||||||
|
ok = emqx_eviction_agent_cli:cli(["status"]),
|
||||||
|
|
||||||
|
ok = emqx_eviction_agent:enable(foo, undefined),
|
||||||
|
|
||||||
|
%% status
|
||||||
|
ok = emqx_eviction_agent_cli:cli(["status"]).
|
|
@ -0,0 +1,134 @@
|
||||||
|
%%--------------------------------------------------------------------
|
||||||
|
%% Copyright (c) 2022-2023 EMQ Technologies Co., Ltd. All Rights Reserved.
|
||||||
|
%%--------------------------------------------------------------------
|
||||||
|
|
||||||
|
-module(emqx_eviction_agent_test_helpers).
|
||||||
|
|
||||||
|
-export([
|
||||||
|
emqtt_connect/0,
|
||||||
|
emqtt_connect/1,
|
||||||
|
emqtt_connect/2,
|
||||||
|
emqtt_connect_many/2,
|
||||||
|
stop_many/1,
|
||||||
|
|
||||||
|
emqtt_try_connect/1,
|
||||||
|
|
||||||
|
start_cluster/2,
|
||||||
|
start_cluster/3,
|
||||||
|
stop_cluster/2,
|
||||||
|
|
||||||
|
case_specific_node_name/2,
|
||||||
|
case_specific_node_name/3,
|
||||||
|
concat_atoms/1
|
||||||
|
]).
|
||||||
|
|
||||||
|
emqtt_connect() ->
|
||||||
|
emqtt_connect(<<"client1">>, true).
|
||||||
|
|
||||||
|
emqtt_connect(ClientId, CleanStart) ->
|
||||||
|
emqtt_connect([{clientid, ClientId}, {clean_start, CleanStart}]).
|
||||||
|
|
||||||
|
emqtt_connect(Opts) ->
|
||||||
|
{ok, C} = emqtt:start_link(
|
||||||
|
Opts ++
|
||||||
|
[
|
||||||
|
{proto_ver, v5},
|
||||||
|
{properties, #{'Session-Expiry-Interval' => 600}}
|
||||||
|
]
|
||||||
|
),
|
||||||
|
case emqtt:connect(C) of
|
||||||
|
{ok, _} -> {ok, C};
|
||||||
|
{error, _} = Error -> Error
|
||||||
|
end.
|
||||||
|
|
||||||
|
emqtt_connect_many(Port, Count) ->
|
||||||
|
lists:map(
|
||||||
|
fun(N) ->
|
||||||
|
NBin = integer_to_binary(N),
|
||||||
|
ClientId = <<"client-", NBin/binary>>,
|
||||||
|
{ok, C} = emqtt_connect([{clientid, ClientId}, {clean_start, false}, {port, Port}]),
|
||||||
|
C
|
||||||
|
end,
|
||||||
|
lists:seq(1, Count)
|
||||||
|
).
|
||||||
|
|
||||||
|
stop_many(Clients) ->
|
||||||
|
lists:foreach(
|
||||||
|
fun(C) ->
|
||||||
|
catch emqtt:disconnect(C)
|
||||||
|
end,
|
||||||
|
Clients
|
||||||
|
),
|
||||||
|
ct:sleep(100).
|
||||||
|
|
||||||
|
emqtt_try_connect(Opts) ->
|
||||||
|
case emqtt_connect(Opts) of
|
||||||
|
{ok, C} ->
|
||||||
|
emqtt:disconnect(C),
|
||||||
|
ok;
|
||||||
|
{error, _} = Error ->
|
||||||
|
Error
|
||||||
|
end.
|
||||||
|
|
||||||
|
start_cluster(NamesWithPorts, Apps) ->
|
||||||
|
start_cluster(NamesWithPorts, Apps, []).
|
||||||
|
|
||||||
|
start_cluster(NamesWithPorts, Apps, Env) ->
|
||||||
|
Specs = lists:map(
|
||||||
|
fun({ShortName, Port}) ->
|
||||||
|
{core, ShortName, #{listener_ports => [{tcp, Port}]}}
|
||||||
|
end,
|
||||||
|
NamesWithPorts
|
||||||
|
),
|
||||||
|
Opts0 = [
|
||||||
|
{env, [{emqx, boot_modules, [broker, listeners]}] ++ Env},
|
||||||
|
{apps, Apps},
|
||||||
|
{conf,
|
||||||
|
[{[listeners, Proto, default, enabled], false} || Proto <- [ssl, ws, wss]] ++
|
||||||
|
[{[rpc, mode], async}]}
|
||||||
|
],
|
||||||
|
Cluster = emqx_common_test_helpers:emqx_cluster(
|
||||||
|
Specs,
|
||||||
|
Opts0
|
||||||
|
),
|
||||||
|
NodesWithPorts = [
|
||||||
|
{
|
||||||
|
emqx_common_test_helpers:start_slave(Name, Opts),
|
||||||
|
proplists:get_value(Name, NamesWithPorts)
|
||||||
|
}
|
||||||
|
|| {Name, Opts} <- Cluster
|
||||||
|
],
|
||||||
|
NodesWithPorts.
|
||||||
|
|
||||||
|
stop_cluster(NodesWithPorts, Apps) ->
|
||||||
|
lists:foreach(
|
||||||
|
fun({Node, _Port}) ->
|
||||||
|
lists:foreach(
|
||||||
|
fun(App) ->
|
||||||
|
rpc:call(Node, application, stop, [App])
|
||||||
|
end,
|
||||||
|
Apps
|
||||||
|
),
|
||||||
|
%% This sleep is just to make logs cleaner
|
||||||
|
ct:sleep(100),
|
||||||
|
_ = rpc:call(Node, emqx_common_test_helpers, stop_apps, []),
|
||||||
|
emqx_common_test_helpers:stop_slave(Node)
|
||||||
|
end,
|
||||||
|
NodesWithPorts
|
||||||
|
).
|
||||||
|
|
||||||
|
case_specific_node_name(Module, Case) ->
|
||||||
|
concat_atoms([Module, '__', Case]).
|
||||||
|
|
||||||
|
case_specific_node_name(Module, Case, Node) ->
|
||||||
|
concat_atoms([Module, '__', Case, '__', Node]).
|
||||||
|
|
||||||
|
concat_atoms(Atoms) ->
|
||||||
|
binary_to_atom(
|
||||||
|
iolist_to_binary(
|
||||||
|
lists:map(
|
||||||
|
fun atom_to_binary/1,
|
||||||
|
Atoms
|
||||||
|
)
|
||||||
|
)
|
||||||
|
).
|
|
@ -155,7 +155,9 @@ basic_reboot_apps() ->
|
||||||
CE ++
|
CE ++
|
||||||
[
|
[
|
||||||
emqx_s3,
|
emqx_s3,
|
||||||
emqx_ft
|
emqx_ft,
|
||||||
|
emqx_eviction_agent,
|
||||||
|
emqx_node_rebalance
|
||||||
]
|
]
|
||||||
end.
|
end.
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,94 @@
|
||||||
|
Business Source License 1.1
|
||||||
|
|
||||||
|
Licensor: Hangzhou EMQ Technologies Co., Ltd.
|
||||||
|
Licensed Work: EMQX Enterprise Edition
|
||||||
|
The Licensed Work is (c) 2023
|
||||||
|
Hangzhou EMQ Technologies Co., Ltd.
|
||||||
|
Additional Use Grant: Students and educators are granted right to copy,
|
||||||
|
modify, and create derivative work for research
|
||||||
|
or education.
|
||||||
|
Change Date: 2027-02-01
|
||||||
|
Change License: Apache License, Version 2.0
|
||||||
|
|
||||||
|
For information about alternative licensing arrangements for the Software,
|
||||||
|
please contact Licensor: https://www.emqx.com/en/contact
|
||||||
|
|
||||||
|
Notice
|
||||||
|
|
||||||
|
The Business Source License (this document, or the “License”) is not an Open
|
||||||
|
Source license. However, the Licensed Work will eventually be made available
|
||||||
|
under an Open Source License, as stated in this License.
|
||||||
|
|
||||||
|
License text copyright (c) 2017 MariaDB Corporation Ab, All Rights Reserved.
|
||||||
|
“Business Source License” is a trademark of MariaDB Corporation Ab.
|
||||||
|
|
||||||
|
-----------------------------------------------------------------------------
|
||||||
|
|
||||||
|
Business Source License 1.1
|
||||||
|
|
||||||
|
Terms
|
||||||
|
|
||||||
|
The Licensor hereby grants you the right to copy, modify, create derivative
|
||||||
|
works, redistribute, and make non-production use of the Licensed Work. The
|
||||||
|
Licensor may make an Additional Use Grant, above, permitting limited
|
||||||
|
production use.
|
||||||
|
|
||||||
|
Effective on the Change Date, or the fourth anniversary of the first publicly
|
||||||
|
available distribution of a specific version of the Licensed Work under this
|
||||||
|
License, whichever comes first, the Licensor hereby grants you rights under
|
||||||
|
the terms of the Change License, and the rights granted in the paragraph
|
||||||
|
above terminate.
|
||||||
|
|
||||||
|
If your use of the Licensed Work does not comply with the requirements
|
||||||
|
currently in effect as described in this License, you must purchase a
|
||||||
|
commercial license from the Licensor, its affiliated entities, or authorized
|
||||||
|
resellers, or you must refrain from using the Licensed Work.
|
||||||
|
|
||||||
|
All copies of the original and modified Licensed Work, and derivative works
|
||||||
|
of the Licensed Work, are subject to this License. This License applies
|
||||||
|
separately for each version of the Licensed Work and the Change Date may vary
|
||||||
|
for each version of the Licensed Work released by Licensor.
|
||||||
|
|
||||||
|
You must conspicuously display this License on each original or modified copy
|
||||||
|
of the Licensed Work. If you receive the Licensed Work in original or
|
||||||
|
modified form from a third party, the terms and conditions set forth in this
|
||||||
|
License apply to your use of that work.
|
||||||
|
|
||||||
|
Any use of the Licensed Work in violation of this License will automatically
|
||||||
|
terminate your rights under this License for the current and all other
|
||||||
|
versions of the Licensed Work.
|
||||||
|
|
||||||
|
This License does not grant you any right in any trademark or logo of
|
||||||
|
Licensor or its affiliates (provided that you may use a trademark or logo of
|
||||||
|
Licensor as expressly required by this License).
|
||||||
|
|
||||||
|
TO THE EXTENT PERMITTED BY APPLICABLE LAW, THE LICENSED WORK IS PROVIDED ON
|
||||||
|
AN “AS IS” BASIS. LICENSOR HEREBY DISCLAIMS ALL WARRANTIES AND CONDITIONS,
|
||||||
|
EXPRESS OR IMPLIED, INCLUDING (WITHOUT LIMITATION) WARRANTIES OF
|
||||||
|
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NON-INFRINGEMENT, AND
|
||||||
|
TITLE.
|
||||||
|
|
||||||
|
MariaDB hereby grants you permission to use this License’s text to license
|
||||||
|
your works, and to refer to it using the trademark “Business Source License”,
|
||||||
|
as long as you comply with the Covenants of Licensor below.
|
||||||
|
|
||||||
|
Covenants of Licensor
|
||||||
|
|
||||||
|
In consideration of the right to use this License’s text and the “Business
|
||||||
|
Source License” name and trademark, Licensor covenants to MariaDB, and to all
|
||||||
|
other recipients of the licensed work to be provided by Licensor:
|
||||||
|
|
||||||
|
1. To specify as the Change License the GPL Version 2.0 or any later version,
|
||||||
|
or a license that is compatible with GPL Version 2.0 or a later version,
|
||||||
|
where “compatible” means that software provided under the Change License can
|
||||||
|
be included in a program with software provided under GPL Version 2.0 or a
|
||||||
|
later version. Licensor may specify additional Change Licenses without
|
||||||
|
limitation.
|
||||||
|
|
||||||
|
2. To either: (a) specify an additional grant of rights to use that does not
|
||||||
|
impose any additional restriction on the right granted in this License, as
|
||||||
|
the Additional Use Grant; or (b) insert the text “None”.
|
||||||
|
|
||||||
|
3. To specify a Change Date.
|
||||||
|
|
||||||
|
4. Not to modify this License in any other way.
|
|
@ -0,0 +1,40 @@
|
||||||
|
# EMQX Node Rebalance
|
||||||
|
|
||||||
|
`emqx_node_rebalance` is a part of the node evacuation/node rebalance feature in EMQX.
|
||||||
|
It implements high-level scenarios for node evacuation and rebalancing.
|
||||||
|
|
||||||
|
## Application Responsibilities
|
||||||
|
|
||||||
|
`emqx_node_rebalance` application's core concept is a _rebalance coordinator_.
|
||||||
|
_Rebalance сoordinator_ is an entity that implements the rebalancing logic and orchestrates the rebalancing process.
|
||||||
|
In particular, it:
|
||||||
|
|
||||||
|
* Enables/Disables Eviction Agent on nodes.
|
||||||
|
* Sends connection/session eviction commands to Eviction Agents according to the evacuation logic.
|
||||||
|
|
||||||
|
We have two implementations of the _rebalance coordinator_:
|
||||||
|
* `emqx_node_rebalance` - a coordinator that implements node rebalancing;
|
||||||
|
* `emqx_node_rebalance_evacuation` - a coordinator that implements node evacuation.
|
||||||
|
|
||||||
|
## EMQX Integration
|
||||||
|
|
||||||
|
`emqx_node_rebalance` is a high-level application that is loosely coupled with the rest of the system.
|
||||||
|
It uses Eviction Agent to perform the required operations.
|
||||||
|
|
||||||
|
## User Facing API
|
||||||
|
|
||||||
|
The application provides API (CLI and HTTP) to perform the following operations:
|
||||||
|
* Start/Stop rebalancing across a set of nodes or the whole cluster;
|
||||||
|
* Start/Stop evacuation of a node;
|
||||||
|
* Get the current rebalancing status of a local node.
|
||||||
|
* Get the current rebalancing status of the whole cluster.
|
||||||
|
|
||||||
|
Also, an HTTP endpoint is provided for liveness probes.
|
||||||
|
|
||||||
|
# Documentation
|
||||||
|
|
||||||
|
The rebalancing concept is described in the corresponding [EIP](https://github.com/emqx/eip/blob/main/active/0020-node-rebalance.md).
|
||||||
|
|
||||||
|
# Contributing
|
||||||
|
|
||||||
|
Please see our [contributing.md](../../CONTRIBUTING.md).
|
|
@ -0,0 +1,3 @@
|
||||||
|
##--------------------------------------------------------------------
|
||||||
|
## EMQX Node Rebalance Plugin
|
||||||
|
##--------------------------------------------------------------------
|
|
@ -0,0 +1,21 @@
|
||||||
|
%%--------------------------------------------------------------------
|
||||||
|
%% Copyright (c) 2022-2023 EMQ Technologies Co., Ltd. All Rights Reserved.
|
||||||
|
%%--------------------------------------------------------------------
|
||||||
|
|
||||||
|
-define(DEFAULT_CONN_EVICT_RATE, 500).
|
||||||
|
-define(DEFAULT_SESS_EVICT_RATE, 500).
|
||||||
|
|
||||||
|
%% sec
|
||||||
|
-define(DEFAULT_WAIT_HEALTH_CHECK, 60).
|
||||||
|
%% sec
|
||||||
|
-define(DEFAULT_WAIT_TAKEOVER, 60).
|
||||||
|
|
||||||
|
-define(DEFAULT_ABS_CONN_THRESHOLD, 1000).
|
||||||
|
-define(DEFAULT_ABS_SESS_THRESHOLD, 1000).
|
||||||
|
|
||||||
|
-define(DEFAULT_REL_CONN_THRESHOLD, 1.1).
|
||||||
|
-define(DEFAULT_REL_SESS_THRESHOLD, 1.1).
|
||||||
|
|
||||||
|
-define(EVICT_INTERVAL, 1000).
|
||||||
|
|
||||||
|
-define(EVACUATION_FILENAME, <<".evacuation">>).
|
|
@ -0,0 +1,2 @@
|
||||||
|
{deps, [{emqx, {path, "../../apps/emqx"}}]}.
|
||||||
|
{project_plugins, [erlfmt]}.
|
|
@ -0,0 +1,21 @@
|
||||||
|
{application, emqx_node_rebalance, [
|
||||||
|
{description, "EMQX Node Rebalance"},
|
||||||
|
{vsn, "5.0.0"},
|
||||||
|
{registered, [
|
||||||
|
emqx_node_rebalance_sup,
|
||||||
|
emqx_node_rebalance,
|
||||||
|
emqx_node_rebalance_agent,
|
||||||
|
emqx_node_rebalance_evacuation
|
||||||
|
]},
|
||||||
|
{applications, [
|
||||||
|
kernel,
|
||||||
|
stdlib
|
||||||
|
]},
|
||||||
|
{mod, {emqx_node_rebalance_app, []}},
|
||||||
|
{env, []},
|
||||||
|
{modules, []},
|
||||||
|
{links, [
|
||||||
|
{"Homepage", "https://www.emqx.com/"},
|
||||||
|
{"Github", "https://github.com/emqx"}
|
||||||
|
]}
|
||||||
|
]}.
|
|
@ -0,0 +1,3 @@
|
||||||
|
%% -*- mode: erlang -*-
|
||||||
|
%% Unless you know what you are doing, DO NOT edit manually!!
|
||||||
|
{VSN, [{<<".*">>, []}], [{<<".*">>, []}]}.
|
|
@ -0,0 +1,438 @@
|
||||||
|
%%--------------------------------------------------------------------
|
||||||
|
%% Copyright (c) 2022-2023 EMQ Technologies Co., Ltd. All Rights Reserved.
|
||||||
|
%%--------------------------------------------------------------------
|
||||||
|
|
||||||
|
-module(emqx_node_rebalance).
|
||||||
|
|
||||||
|
-include("emqx_node_rebalance.hrl").
|
||||||
|
|
||||||
|
-include_lib("emqx/include/logger.hrl").
|
||||||
|
-include_lib("emqx/include/types.hrl").
|
||||||
|
-include_lib("snabbkaffe/include/snabbkaffe.hrl").
|
||||||
|
|
||||||
|
-export([
|
||||||
|
start/1,
|
||||||
|
status/0,
|
||||||
|
status/1,
|
||||||
|
stop/0
|
||||||
|
]).
|
||||||
|
|
||||||
|
-export([start_link/0]).
|
||||||
|
|
||||||
|
-behaviour(gen_statem).
|
||||||
|
|
||||||
|
-export([
|
||||||
|
init/1,
|
||||||
|
callback_mode/0,
|
||||||
|
handle_event/4,
|
||||||
|
code_change/4
|
||||||
|
]).
|
||||||
|
|
||||||
|
-export([
|
||||||
|
is_node_available/0,
|
||||||
|
available_nodes/1,
|
||||||
|
connection_count/0,
|
||||||
|
session_count/0,
|
||||||
|
disconnected_session_count/0
|
||||||
|
]).
|
||||||
|
|
||||||
|
-export_type([
|
||||||
|
start_opts/0,
|
||||||
|
start_error/0
|
||||||
|
]).
|
||||||
|
|
||||||
|
%%--------------------------------------------------------------------
|
||||||
|
%% APIs
|
||||||
|
%%--------------------------------------------------------------------
|
||||||
|
|
||||||
|
-type start_opts() :: #{
|
||||||
|
conn_evict_rate => pos_integer(),
|
||||||
|
sess_evict_rate => pos_integer(),
|
||||||
|
wait_health_check => pos_integer(),
|
||||||
|
wait_takeover => pos_integer(),
|
||||||
|
abs_conn_threshold => pos_integer(),
|
||||||
|
rel_conn_threshold => number(),
|
||||||
|
abs_sess_threshold => pos_integer(),
|
||||||
|
rel_sess_threshold => number(),
|
||||||
|
nodes => [node()]
|
||||||
|
}.
|
||||||
|
-type start_error() :: already_started | [{node(), term()}].
|
||||||
|
|
||||||
|
-spec start(start_opts()) -> ok_or_error(start_error()).
|
||||||
|
start(StartOpts) ->
|
||||||
|
Opts = maps:merge(default_opts(), StartOpts),
|
||||||
|
gen_statem:call(?MODULE, {start, Opts}).
|
||||||
|
|
||||||
|
-spec stop() -> ok_or_error(not_started).
|
||||||
|
stop() ->
|
||||||
|
gen_statem:call(?MODULE, stop).
|
||||||
|
|
||||||
|
-spec status() -> disabled | {enabled, map()}.
|
||||||
|
status() ->
|
||||||
|
gen_statem:call(?MODULE, status).
|
||||||
|
|
||||||
|
-spec status(pid()) -> disabled | {enabled, map()}.
|
||||||
|
status(Pid) ->
|
||||||
|
gen_statem:call(Pid, status).
|
||||||
|
|
||||||
|
-spec start_link() -> startlink_ret().
|
||||||
|
start_link() ->
|
||||||
|
gen_statem:start_link({local, ?MODULE}, ?MODULE, [], []).
|
||||||
|
|
||||||
|
-spec available_nodes(list(node())) -> list(node()).
|
||||||
|
available_nodes(Nodes) when is_list(Nodes) ->
|
||||||
|
{Available, _} = emqx_node_rebalance_proto_v1:available_nodes(Nodes),
|
||||||
|
lists:filter(fun is_atom/1, Available).
|
||||||
|
|
||||||
|
%%--------------------------------------------------------------------
|
||||||
|
%% gen_statem callbacks
|
||||||
|
%%--------------------------------------------------------------------
|
||||||
|
|
||||||
|
callback_mode() -> handle_event_function.
|
||||||
|
|
||||||
|
%% states: disabled, wait_health_check, evicting_conns, wait_takeover, evicting_sessions
|
||||||
|
|
||||||
|
init([]) ->
|
||||||
|
?tp(debug, emqx_node_rebalance_started, #{}),
|
||||||
|
{ok, disabled, #{}}.
|
||||||
|
|
||||||
|
%% start
|
||||||
|
handle_event(
|
||||||
|
{call, From},
|
||||||
|
{start, #{wait_health_check := WaitHealthCheck} = Opts},
|
||||||
|
disabled,
|
||||||
|
#{} = Data
|
||||||
|
) ->
|
||||||
|
case enable_rebalance(Data#{opts => Opts}) of
|
||||||
|
{ok, NewData} ->
|
||||||
|
?SLOG(warning, #{msg => "node_rebalance_enabled", opts => Opts}),
|
||||||
|
{next_state, wait_health_check, NewData, [
|
||||||
|
{state_timeout, seconds(WaitHealthCheck), evict_conns},
|
||||||
|
{reply, From, ok}
|
||||||
|
]};
|
||||||
|
{error, Reason} ->
|
||||||
|
?SLOG(warning, #{
|
||||||
|
msg => "node_rebalance_enable_failed",
|
||||||
|
reason => Reason
|
||||||
|
}),
|
||||||
|
{keep_state_and_data, [{reply, From, {error, Reason}}]}
|
||||||
|
end;
|
||||||
|
handle_event({call, From}, {start, _Opts}, _State, #{}) ->
|
||||||
|
{keep_state_and_data, [{reply, From, {error, already_started}}]};
|
||||||
|
%% stop
|
||||||
|
handle_event({call, From}, stop, disabled, #{}) ->
|
||||||
|
{keep_state_and_data, [{reply, From, {error, not_started}}]};
|
||||||
|
handle_event({call, From}, stop, _State, Data) ->
|
||||||
|
ok = disable_rebalance(Data),
|
||||||
|
?SLOG(warning, #{msg => "node_rebalance_stopped"}),
|
||||||
|
{next_state, disabled, deinit(Data), [{reply, From, ok}]};
|
||||||
|
%% status
|
||||||
|
handle_event({call, From}, status, disabled, #{}) ->
|
||||||
|
{keep_state_and_data, [{reply, From, disabled}]};
|
||||||
|
handle_event({call, From}, status, State, Data) ->
|
||||||
|
Stats = get_stats(State, Data),
|
||||||
|
{keep_state_and_data, [
|
||||||
|
{reply, From,
|
||||||
|
{enabled, Stats#{
|
||||||
|
state => State,
|
||||||
|
coordinator_node => node()
|
||||||
|
}}}
|
||||||
|
]};
|
||||||
|
%% conn eviction
|
||||||
|
handle_event(
|
||||||
|
state_timeout,
|
||||||
|
evict_conns,
|
||||||
|
wait_health_check,
|
||||||
|
Data
|
||||||
|
) ->
|
||||||
|
?SLOG(warning, #{msg => "node_rebalance_wait_health_check_over"}),
|
||||||
|
{next_state, evicting_conns, Data, [{state_timeout, 0, evict_conns}]};
|
||||||
|
handle_event(
|
||||||
|
state_timeout,
|
||||||
|
evict_conns,
|
||||||
|
evicting_conns,
|
||||||
|
#{
|
||||||
|
opts := #{
|
||||||
|
wait_takeover := WaitTakeover,
|
||||||
|
evict_interval := EvictInterval
|
||||||
|
}
|
||||||
|
} = Data
|
||||||
|
) ->
|
||||||
|
case evict_conns(Data) of
|
||||||
|
ok ->
|
||||||
|
?SLOG(warning, #{msg => "node_rebalance_evict_conns_over"}),
|
||||||
|
{next_state, wait_takeover, Data, [
|
||||||
|
{state_timeout, seconds(WaitTakeover), evict_sessions}
|
||||||
|
]};
|
||||||
|
{continue, NewData} ->
|
||||||
|
{keep_state, NewData, [{state_timeout, EvictInterval, evict_conns}]}
|
||||||
|
end;
|
||||||
|
handle_event(
|
||||||
|
state_timeout,
|
||||||
|
evict_sessions,
|
||||||
|
wait_takeover,
|
||||||
|
Data
|
||||||
|
) ->
|
||||||
|
?SLOG(warning, #{msg => "node_rebalance_wait_takeover_over"}),
|
||||||
|
{next_state, evicting_sessions, Data, [{state_timeout, 0, evict_sessions}]};
|
||||||
|
handle_event(
|
||||||
|
state_timeout,
|
||||||
|
evict_sessions,
|
||||||
|
evicting_sessions,
|
||||||
|
#{opts := #{evict_interval := EvictInterval}} = Data
|
||||||
|
) ->
|
||||||
|
case evict_sessions(Data) of
|
||||||
|
ok ->
|
||||||
|
?tp(debug, emqx_node_rebalance_evict_sess_over, #{}),
|
||||||
|
?SLOG(warning, #{msg => "node_rebalance_evict_sessions_over"}),
|
||||||
|
ok = disable_rebalance(Data),
|
||||||
|
?SLOG(warning, #{msg => "node_rebalance_finished_successfully"}),
|
||||||
|
{next_state, disabled, deinit(Data)};
|
||||||
|
{continue, NewData} ->
|
||||||
|
{keep_state, NewData, [{state_timeout, EvictInterval, evict_sessions}]}
|
||||||
|
end;
|
||||||
|
handle_event({call, From}, Msg, _State, _Data) ->
|
||||||
|
?SLOG(warning, #{msg => "node_rebalance_unknown_call", call => Msg}),
|
||||||
|
{keep_state_and_data, [{reply, From, ignored}]};
|
||||||
|
handle_event(info, Msg, _State, _Data) ->
|
||||||
|
?SLOG(warning, #{msg => "node_rebalance_unknown_info", info => Msg}),
|
||||||
|
keep_state_and_data;
|
||||||
|
handle_event(cast, Msg, _State, _Data) ->
|
||||||
|
?SLOG(warning, #{msg => "node_rebalance_unknown_cast", cast => Msg}),
|
||||||
|
keep_state_and_data.
|
||||||
|
|
||||||
|
code_change(_Vsn, State, Data, _Extra) ->
|
||||||
|
{ok, State, Data}.
|
||||||
|
|
||||||
|
%%--------------------------------------------------------------------
|
||||||
|
%% internal funs
|
||||||
|
%%--------------------------------------------------------------------
|
||||||
|
|
||||||
|
enable_rebalance(#{opts := Opts} = Data) ->
|
||||||
|
Nodes = maps:get(nodes, Opts),
|
||||||
|
ConnCounts = multicall(Nodes, connection_counts, []),
|
||||||
|
SessCounts = multicall(Nodes, session_counts, []),
|
||||||
|
{_, Counts} = lists:unzip(ConnCounts),
|
||||||
|
Avg = avg(Counts),
|
||||||
|
{DonorCounts, RecipientCounts} = lists:partition(
|
||||||
|
fun({_Node, Count}) ->
|
||||||
|
Count >= Avg
|
||||||
|
end,
|
||||||
|
ConnCounts
|
||||||
|
),
|
||||||
|
?SLOG(warning, #{
|
||||||
|
msg => "node_rebalance_enabling",
|
||||||
|
conn_counts => ConnCounts,
|
||||||
|
donor_counts => DonorCounts,
|
||||||
|
recipient_counts => RecipientCounts
|
||||||
|
}),
|
||||||
|
{DonorNodes, _} = lists:unzip(DonorCounts),
|
||||||
|
{RecipientNodes, _} = lists:unzip(RecipientCounts),
|
||||||
|
case need_rebalance(DonorNodes, RecipientNodes, ConnCounts, SessCounts, Opts) of
|
||||||
|
false ->
|
||||||
|
{error, nothing_to_balance};
|
||||||
|
true ->
|
||||||
|
_ = multicall(DonorNodes, enable_rebalance_agent, [self()]),
|
||||||
|
{ok, Data#{
|
||||||
|
donors => DonorNodes,
|
||||||
|
recipients => RecipientNodes,
|
||||||
|
initial_conn_counts => maps:from_list(ConnCounts),
|
||||||
|
initial_sess_counts => maps:from_list(SessCounts)
|
||||||
|
}}
|
||||||
|
end.
|
||||||
|
|
||||||
|
disable_rebalance(#{donors := DonorNodes}) ->
|
||||||
|
_ = multicall(DonorNodes, disable_rebalance_agent, [self()]),
|
||||||
|
ok.
|
||||||
|
|
||||||
|
evict_conns(#{donors := DonorNodes, recipients := RecipientNodes, opts := Opts} = Data) ->
|
||||||
|
DonorNodeCounts = multicall(DonorNodes, connection_counts, []),
|
||||||
|
{_, DonorCounts} = lists:unzip(DonorNodeCounts),
|
||||||
|
RecipientNodeCounts = multicall(RecipientNodes, connection_counts, []),
|
||||||
|
{_, RecipientCounts} = lists:unzip(RecipientNodeCounts),
|
||||||
|
|
||||||
|
DonorAvg = avg(DonorCounts),
|
||||||
|
RecipientAvg = avg(RecipientCounts),
|
||||||
|
Thresholds = thresholds(conn, Opts),
|
||||||
|
NewData = Data#{
|
||||||
|
donor_conn_avg => DonorAvg,
|
||||||
|
recipient_conn_avg => RecipientAvg,
|
||||||
|
donor_conn_counts => maps:from_list(DonorNodeCounts),
|
||||||
|
recipient_conn_counts => maps:from_list(RecipientNodeCounts)
|
||||||
|
},
|
||||||
|
case within_thresholds(DonorAvg, RecipientAvg, Thresholds) of
|
||||||
|
true ->
|
||||||
|
ok;
|
||||||
|
false ->
|
||||||
|
ConnEvictRate = maps:get(conn_evict_rate, Opts),
|
||||||
|
NodesToEvict = nodes_to_evict(RecipientAvg, DonorNodeCounts),
|
||||||
|
?SLOG(warning, #{
|
||||||
|
msg => "node_rebalance_evict_conns",
|
||||||
|
nodes => NodesToEvict,
|
||||||
|
counts => ConnEvictRate
|
||||||
|
}),
|
||||||
|
_ = multicall(NodesToEvict, evict_connections, [ConnEvictRate]),
|
||||||
|
{continue, NewData}
|
||||||
|
end.
|
||||||
|
|
||||||
|
evict_sessions(#{donors := DonorNodes, recipients := RecipientNodes, opts := Opts} = Data) ->
|
||||||
|
DonorNodeCounts = multicall(DonorNodes, disconnected_session_counts, []),
|
||||||
|
{_, DonorCounts} = lists:unzip(DonorNodeCounts),
|
||||||
|
RecipientNodeCounts = multicall(RecipientNodes, disconnected_session_counts, []),
|
||||||
|
{_, RecipientCounts} = lists:unzip(RecipientNodeCounts),
|
||||||
|
|
||||||
|
DonorAvg = avg(DonorCounts),
|
||||||
|
RecipientAvg = avg(RecipientCounts),
|
||||||
|
Thresholds = thresholds(sess, Opts),
|
||||||
|
NewData = Data#{
|
||||||
|
donor_sess_avg => DonorAvg,
|
||||||
|
recipient_sess_avg => RecipientAvg,
|
||||||
|
donor_sess_counts => maps:from_list(DonorNodeCounts),
|
||||||
|
recipient_sess_counts => maps:from_list(RecipientNodeCounts)
|
||||||
|
},
|
||||||
|
case within_thresholds(DonorAvg, RecipientAvg, Thresholds) of
|
||||||
|
true ->
|
||||||
|
ok;
|
||||||
|
false ->
|
||||||
|
SessEvictRate = maps:get(sess_evict_rate, Opts),
|
||||||
|
NodesToEvict = nodes_to_evict(RecipientAvg, DonorNodeCounts),
|
||||||
|
?SLOG(warning, #{
|
||||||
|
msg => "node_rebalance_evict_sessions",
|
||||||
|
nodes => NodesToEvict,
|
||||||
|
counts => SessEvictRate
|
||||||
|
}),
|
||||||
|
_ = multicall(
|
||||||
|
NodesToEvict,
|
||||||
|
evict_sessions,
|
||||||
|
[SessEvictRate, RecipientNodes, disconnected]
|
||||||
|
),
|
||||||
|
{continue, NewData}
|
||||||
|
end.
|
||||||
|
|
||||||
|
need_rebalance([] = _DonorNodes, _RecipientNodes, _ConnCounts, _SessCounts, _Opts) ->
|
||||||
|
false;
|
||||||
|
need_rebalance(_DonorNodes, [] = _RecipientNodes, _ConnCounts, _SessCounts, _Opts) ->
|
||||||
|
false;
|
||||||
|
need_rebalance(DonorNodes, RecipientNodes, ConnCounts, SessCounts, Opts) ->
|
||||||
|
DonorConnAvg = avg_for_nodes(DonorNodes, ConnCounts),
|
||||||
|
RecipientConnAvg = avg_for_nodes(RecipientNodes, ConnCounts),
|
||||||
|
DonorSessAvg = avg_for_nodes(DonorNodes, SessCounts),
|
||||||
|
RecipientSessAvg = avg_for_nodes(RecipientNodes, SessCounts),
|
||||||
|
Result =
|
||||||
|
(not within_thresholds(DonorConnAvg, RecipientConnAvg, thresholds(conn, Opts))) orelse
|
||||||
|
(not within_thresholds(DonorSessAvg, RecipientSessAvg, thresholds(sess, Opts))),
|
||||||
|
?tp(
|
||||||
|
debug,
|
||||||
|
emqx_node_rebalance_need_rebalance,
|
||||||
|
#{
|
||||||
|
donors => DonorNodes,
|
||||||
|
recipients => RecipientNodes,
|
||||||
|
conn_counts => ConnCounts,
|
||||||
|
sess_counts => SessCounts,
|
||||||
|
opts => Opts,
|
||||||
|
result => Result
|
||||||
|
}
|
||||||
|
),
|
||||||
|
Result.
|
||||||
|
|
||||||
|
avg_for_nodes(Nodes, Counts) ->
|
||||||
|
avg(maps:values(maps:with(Nodes, maps:from_list(Counts)))).
|
||||||
|
|
||||||
|
within_thresholds(Value, GoalValue, {AbsThres, RelThres}) ->
|
||||||
|
(Value =< GoalValue + AbsThres) orelse (Value =< GoalValue * RelThres).
|
||||||
|
|
||||||
|
thresholds(conn, #{abs_conn_threshold := Abs, rel_conn_threshold := Rel}) ->
|
||||||
|
{Abs, Rel};
|
||||||
|
thresholds(sess, #{abs_sess_threshold := Abs, rel_sess_threshold := Rel}) ->
|
||||||
|
{Abs, Rel}.
|
||||||
|
|
||||||
|
nodes_to_evict(Goal, NodeCounts) ->
|
||||||
|
{Nodes, _} = lists:unzip(
|
||||||
|
lists:filter(
|
||||||
|
fun({_Node, Count}) ->
|
||||||
|
Count > Goal
|
||||||
|
end,
|
||||||
|
NodeCounts
|
||||||
|
)
|
||||||
|
),
|
||||||
|
Nodes.
|
||||||
|
|
||||||
|
get_stats(disabled, _Data) -> #{};
|
||||||
|
get_stats(_State, Data) -> Data.
|
||||||
|
|
||||||
|
avg(List) when length(List) >= 1 ->
|
||||||
|
lists:sum(List) / length(List).
|
||||||
|
|
||||||
|
multicall(Nodes, F, A) ->
|
||||||
|
case apply(emqx_node_rebalance_proto_v1, F, [Nodes | A]) of
|
||||||
|
{Results, []} ->
|
||||||
|
case lists:partition(fun is_ok/1, lists:zip(Nodes, Results)) of
|
||||||
|
{OkResults, []} ->
|
||||||
|
[{Node, ok_result(Result)} || {Node, Result} <- OkResults];
|
||||||
|
{_, BadResults} ->
|
||||||
|
error({bad_nodes, BadResults})
|
||||||
|
end;
|
||||||
|
{_, [_BadNode | _] = BadNodes} ->
|
||||||
|
error({bad_nodes, BadNodes})
|
||||||
|
end.
|
||||||
|
|
||||||
|
is_ok({_Node, {ok, _}}) -> true;
|
||||||
|
is_ok({_Node, ok}) -> true;
|
||||||
|
is_ok(_) -> false.
|
||||||
|
|
||||||
|
ok_result({ok, Result}) -> Result;
|
||||||
|
ok_result(ok) -> ok.
|
||||||
|
|
||||||
|
connection_count() ->
|
||||||
|
{ok, emqx_eviction_agent:connection_count()}.
|
||||||
|
|
||||||
|
session_count() ->
|
||||||
|
{ok, emqx_eviction_agent:session_count()}.
|
||||||
|
|
||||||
|
disconnected_session_count() ->
|
||||||
|
{ok, emqx_eviction_agent:session_count(disconnected)}.
|
||||||
|
|
||||||
|
default_opts() ->
|
||||||
|
#{
|
||||||
|
conn_evict_rate => ?DEFAULT_CONN_EVICT_RATE,
|
||||||
|
abs_conn_threshold => ?DEFAULT_ABS_CONN_THRESHOLD,
|
||||||
|
rel_conn_threshold => ?DEFAULT_REL_CONN_THRESHOLD,
|
||||||
|
|
||||||
|
sess_evict_rate => ?DEFAULT_SESS_EVICT_RATE,
|
||||||
|
abs_sess_threshold => ?DEFAULT_ABS_SESS_THRESHOLD,
|
||||||
|
rel_sess_threshold => ?DEFAULT_REL_SESS_THRESHOLD,
|
||||||
|
|
||||||
|
wait_health_check => ?DEFAULT_WAIT_HEALTH_CHECK,
|
||||||
|
wait_takeover => ?DEFAULT_WAIT_TAKEOVER,
|
||||||
|
|
||||||
|
evict_interval => ?EVICT_INTERVAL,
|
||||||
|
|
||||||
|
nodes => all_nodes()
|
||||||
|
}.
|
||||||
|
|
||||||
|
deinit(Data) ->
|
||||||
|
Keys = [
|
||||||
|
recipient_conn_avg,
|
||||||
|
recipient_sess_avg,
|
||||||
|
donor_conn_avg,
|
||||||
|
donor_sess_avg,
|
||||||
|
recipient_conn_counts,
|
||||||
|
recipient_sess_counts,
|
||||||
|
donor_conn_counts,
|
||||||
|
donor_sess_counts,
|
||||||
|
initial_conn_counts,
|
||||||
|
initial_sess_counts,
|
||||||
|
opts
|
||||||
|
],
|
||||||
|
maps:without(Keys, Data).
|
||||||
|
|
||||||
|
is_node_available() ->
|
||||||
|
true = is_pid(whereis(emqx_node_rebalance_agent)),
|
||||||
|
disabled = emqx_eviction_agent:status(),
|
||||||
|
node().
|
||||||
|
|
||||||
|
all_nodes() ->
|
||||||
|
mria_mnesia:running_nodes().
|
||||||
|
|
||||||
|
seconds(Sec) ->
|
||||||
|
round(timer:seconds(Sec)).
|
|
@ -0,0 +1,131 @@
|
||||||
|
%%--------------------------------------------------------------------
|
||||||
|
%% Copyright (c) 2022-2023 EMQ Technologies Co., Ltd. All Rights Reserved.
|
||||||
|
%%--------------------------------------------------------------------
|
||||||
|
|
||||||
|
-module(emqx_node_rebalance_agent).
|
||||||
|
|
||||||
|
-include_lib("emqx/include/emqx_mqtt.hrl").
|
||||||
|
-include_lib("emqx/include/logger.hrl").
|
||||||
|
-include_lib("emqx/include/types.hrl").
|
||||||
|
|
||||||
|
-include_lib("stdlib/include/qlc.hrl").
|
||||||
|
-include_lib("snabbkaffe/include/snabbkaffe.hrl").
|
||||||
|
|
||||||
|
-export([
|
||||||
|
start_link/0,
|
||||||
|
enable/1,
|
||||||
|
disable/1,
|
||||||
|
status/0
|
||||||
|
]).
|
||||||
|
|
||||||
|
-export([
|
||||||
|
init/1,
|
||||||
|
handle_call/3,
|
||||||
|
handle_info/2,
|
||||||
|
handle_cast/2,
|
||||||
|
code_change/3
|
||||||
|
]).
|
||||||
|
|
||||||
|
-define(ENABLE_KIND, emqx_node_rebalance).
|
||||||
|
|
||||||
|
%%--------------------------------------------------------------------
|
||||||
|
%% APIs
|
||||||
|
%%--------------------------------------------------------------------
|
||||||
|
|
||||||
|
-type status() :: {enabled, pid()} | disabled.
|
||||||
|
|
||||||
|
-spec start_link() -> startlink_ret().
|
||||||
|
start_link() ->
|
||||||
|
gen_server:start_link({local, ?MODULE}, ?MODULE, [], []).
|
||||||
|
|
||||||
|
-spec enable(pid()) -> ok_or_error(already_enabled | eviction_agent_busy).
|
||||||
|
enable(CoordinatorPid) ->
|
||||||
|
gen_server:call(?MODULE, {enable, CoordinatorPid}).
|
||||||
|
|
||||||
|
-spec disable(pid()) -> ok_or_error(already_disabled | invalid_coordinator).
|
||||||
|
disable(CoordinatorPid) ->
|
||||||
|
gen_server:call(?MODULE, {disable, CoordinatorPid}).
|
||||||
|
|
||||||
|
-spec status() -> status().
|
||||||
|
status() ->
|
||||||
|
gen_server:call(?MODULE, status).
|
||||||
|
|
||||||
|
%%--------------------------------------------------------------------
|
||||||
|
%% gen_server callbacks
|
||||||
|
%%--------------------------------------------------------------------
|
||||||
|
|
||||||
|
init([]) ->
|
||||||
|
{ok, #{}}.
|
||||||
|
|
||||||
|
handle_call({enable, CoordinatorPid}, _From, St) ->
|
||||||
|
case St of
|
||||||
|
#{coordinator_pid := _Pid} ->
|
||||||
|
{reply, {error, already_enabled}, St};
|
||||||
|
_ ->
|
||||||
|
true = link(CoordinatorPid),
|
||||||
|
EvictionAgentPid = whereis(emqx_eviction_agent),
|
||||||
|
true = link(EvictionAgentPid),
|
||||||
|
case emqx_eviction_agent:enable(?ENABLE_KIND, undefined) of
|
||||||
|
ok ->
|
||||||
|
{reply, ok, #{
|
||||||
|
coordinator_pid => CoordinatorPid,
|
||||||
|
eviction_agent_pid => EvictionAgentPid
|
||||||
|
}};
|
||||||
|
{error, eviction_agent_busy} ->
|
||||||
|
true = unlink(EvictionAgentPid),
|
||||||
|
true = unlink(CoordinatorPid),
|
||||||
|
{reply, {error, eviction_agent_busy}, St}
|
||||||
|
end
|
||||||
|
end;
|
||||||
|
handle_call({disable, CoordinatorPid}, _From, St) ->
|
||||||
|
case St of
|
||||||
|
#{
|
||||||
|
coordinator_pid := CoordinatorPid,
|
||||||
|
eviction_agent_pid := EvictionAgentPid
|
||||||
|
} ->
|
||||||
|
_ = emqx_eviction_agent:disable(?ENABLE_KIND),
|
||||||
|
true = unlink(EvictionAgentPid),
|
||||||
|
true = unlink(CoordinatorPid),
|
||||||
|
NewSt = maps:without(
|
||||||
|
[coordinator_pid, eviction_agent_pid],
|
||||||
|
St
|
||||||
|
),
|
||||||
|
{reply, ok, NewSt};
|
||||||
|
#{coordinator_pid := _CoordinatorPid} ->
|
||||||
|
{reply, {error, invalid_coordinator}, St};
|
||||||
|
#{} ->
|
||||||
|
{reply, {error, already_disabled}, St}
|
||||||
|
end;
|
||||||
|
handle_call(status, _From, St) ->
|
||||||
|
case St of
|
||||||
|
#{coordinator_pid := Pid} ->
|
||||||
|
{reply, {enabled, Pid}, St};
|
||||||
|
_ ->
|
||||||
|
{reply, disabled, St}
|
||||||
|
end;
|
||||||
|
handle_call(Msg, _From, St) ->
|
||||||
|
?SLOG(warning, #{
|
||||||
|
msg => "unknown_call",
|
||||||
|
call => Msg,
|
||||||
|
state => St
|
||||||
|
}),
|
||||||
|
{reply, ignored, St}.
|
||||||
|
|
||||||
|
handle_info(Msg, St) ->
|
||||||
|
?SLOG(warning, #{
|
||||||
|
msg => "unknown_info",
|
||||||
|
info => Msg,
|
||||||
|
state => St
|
||||||
|
}),
|
||||||
|
{noreply, St}.
|
||||||
|
|
||||||
|
handle_cast(Msg, St) ->
|
||||||
|
?SLOG(warning, #{
|
||||||
|
msg => "unknown_cast",
|
||||||
|
cast => Msg,
|
||||||
|
state => St
|
||||||
|
}),
|
||||||
|
{noreply, St}.
|
||||||
|
|
||||||
|
code_change(_Vsn, State, _Extra) ->
|
||||||
|
{ok, State}.
|
|
@ -0,0 +1,733 @@
|
||||||
|
%%--------------------------------------------------------------------
|
||||||
|
%% Copyright (c) 2022-2023 EMQ Technologies Co., Ltd. All Rights Reserved.
|
||||||
|
%%--------------------------------------------------------------------
|
||||||
|
-module(emqx_node_rebalance_api).
|
||||||
|
|
||||||
|
-behaviour(minirest_api).
|
||||||
|
|
||||||
|
-include_lib("typerefl/include/types.hrl").
|
||||||
|
-include_lib("hocon/include/hoconsc.hrl").
|
||||||
|
-include_lib("emqx/include/logger.hrl").
|
||||||
|
-include_lib("emqx_utils/include/emqx_utils_api.hrl").
|
||||||
|
|
||||||
|
%% Swagger specs from hocon schema
|
||||||
|
-export([
|
||||||
|
api_spec/0,
|
||||||
|
paths/0,
|
||||||
|
schema/1,
|
||||||
|
namespace/0
|
||||||
|
]).
|
||||||
|
|
||||||
|
-export([
|
||||||
|
fields/1,
|
||||||
|
roots/0
|
||||||
|
]).
|
||||||
|
|
||||||
|
%% API callbacks
|
||||||
|
-export([
|
||||||
|
'/load_rebalance/status'/2,
|
||||||
|
'/load_rebalance/global_status'/2,
|
||||||
|
'/load_rebalance/availability_check'/2,
|
||||||
|
'/load_rebalance/:node/start'/2,
|
||||||
|
'/load_rebalance/:node/stop'/2,
|
||||||
|
'/load_rebalance/:node/evacuation/start'/2,
|
||||||
|
'/load_rebalance/:node/evacuation/stop'/2
|
||||||
|
]).
|
||||||
|
|
||||||
|
%% Schema examples
|
||||||
|
-export([
|
||||||
|
rebalance_example/0,
|
||||||
|
rebalance_evacuation_example/0,
|
||||||
|
translate/2
|
||||||
|
]).
|
||||||
|
|
||||||
|
-import(hoconsc, [mk/2, ref/1, ref/2]).
|
||||||
|
-import(emqx_dashboard_swagger, [error_codes/2]).
|
||||||
|
|
||||||
|
-define(BAD_REQUEST, 'BAD_REQUEST').
|
||||||
|
-define(NODE_EVACUATING, 'NODE_EVACUATING').
|
||||||
|
-define(RPC_ERROR, 'RPC_ERROR').
|
||||||
|
-define(NOT_FOUND, 'NOT_FOUND').
|
||||||
|
|
||||||
|
%%--------------------------------------------------------------------
|
||||||
|
%% API Spec
|
||||||
|
%%--------------------------------------------------------------------
|
||||||
|
|
||||||
|
namespace() -> "load_rebalance".
|
||||||
|
|
||||||
|
api_spec() ->
|
||||||
|
emqx_dashboard_swagger:spec(?MODULE, #{check_schema => true}).
|
||||||
|
|
||||||
|
paths() ->
|
||||||
|
[
|
||||||
|
"/load_rebalance/status",
|
||||||
|
"/load_rebalance/global_status",
|
||||||
|
"/load_rebalance/availability_check",
|
||||||
|
"/load_rebalance/:node/start",
|
||||||
|
"/load_rebalance/:node/stop",
|
||||||
|
"/load_rebalance/:node/evacuation/start",
|
||||||
|
"/load_rebalance/:node/evacuation/stop"
|
||||||
|
].
|
||||||
|
|
||||||
|
schema("/load_rebalance/status") ->
|
||||||
|
#{
|
||||||
|
'operationId' => '/load_rebalance/status',
|
||||||
|
get => #{
|
||||||
|
tags => [<<"load_rebalance">>],
|
||||||
|
summary => <<"Get rebalance status">>,
|
||||||
|
description => ?DESC("load_rebalance_status"),
|
||||||
|
responses => #{
|
||||||
|
200 => local_status_response_schema()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
schema("/load_rebalance/global_status") ->
|
||||||
|
#{
|
||||||
|
'operationId' => '/load_rebalance/global_status',
|
||||||
|
get => #{
|
||||||
|
tags => [<<"load_rebalance">>],
|
||||||
|
summary => <<"Get global rebalance status">>,
|
||||||
|
description => ?DESC("load_rebalance_global_status"),
|
||||||
|
responses => #{
|
||||||
|
200 => response_schema()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
schema("/load_rebalance/availability_check") ->
|
||||||
|
#{
|
||||||
|
'operationId' => '/load_rebalance/availability_check',
|
||||||
|
get => #{
|
||||||
|
tags => [<<"load_rebalance">>],
|
||||||
|
summary => <<"Node rebalance availability check">>,
|
||||||
|
description => ?DESC("load_rebalance_availability_check"),
|
||||||
|
responses => #{
|
||||||
|
200 => response_schema(),
|
||||||
|
503 => error_codes([?NODE_EVACUATING], <<"Node Evacuating">>)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
schema("/load_rebalance/:node/start") ->
|
||||||
|
#{
|
||||||
|
'operationId' => '/load_rebalance/:node/start',
|
||||||
|
post => #{
|
||||||
|
tags => [<<"load_rebalance">>],
|
||||||
|
summary => <<"Start rebalancing with the node as coordinator">>,
|
||||||
|
description => ?DESC("load_rebalance_start"),
|
||||||
|
parameters => [param_node()],
|
||||||
|
'requestBody' =>
|
||||||
|
emqx_dashboard_swagger:schema_with_examples(
|
||||||
|
ref(rebalance_start),
|
||||||
|
rebalance_example()
|
||||||
|
),
|
||||||
|
responses => #{
|
||||||
|
200 => response_schema(),
|
||||||
|
400 => error_codes([?BAD_REQUEST], <<"Bad Request">>),
|
||||||
|
404 => error_codes([?NOT_FOUND], <<"Not Found">>)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
schema("/load_rebalance/:node/stop") ->
|
||||||
|
#{
|
||||||
|
'operationId' => '/load_rebalance/:node/stop',
|
||||||
|
post => #{
|
||||||
|
tags => [<<"load_rebalance">>],
|
||||||
|
summary => <<"Stop rebalancing coordinated by the node">>,
|
||||||
|
description => ?DESC("load_rebalance_stop"),
|
||||||
|
parameters => [param_node()],
|
||||||
|
responses => #{
|
||||||
|
200 => response_schema(),
|
||||||
|
400 => error_codes([?BAD_REQUEST], <<"Bad Request">>),
|
||||||
|
404 => error_codes([?NOT_FOUND], <<"Not Found">>)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
schema("/load_rebalance/:node/evacuation/start") ->
|
||||||
|
#{
|
||||||
|
'operationId' => '/load_rebalance/:node/evacuation/start',
|
||||||
|
post => #{
|
||||||
|
tags => [<<"load_rebalance">>],
|
||||||
|
summary => <<"Start evacuation on a node">>,
|
||||||
|
description => ?DESC("load_rebalance_evacuation_start"),
|
||||||
|
parameters => [param_node()],
|
||||||
|
'requestBody' =>
|
||||||
|
emqx_dashboard_swagger:schema_with_examples(
|
||||||
|
ref(rebalance_evacuation_start),
|
||||||
|
rebalance_evacuation_example()
|
||||||
|
),
|
||||||
|
responses => #{
|
||||||
|
200 => response_schema(),
|
||||||
|
400 => error_codes([?BAD_REQUEST], <<"Bad Request">>),
|
||||||
|
404 => error_codes([?NOT_FOUND], <<"Not Found">>)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
schema("/load_rebalance/:node/evacuation/stop") ->
|
||||||
|
#{
|
||||||
|
'operationId' => '/load_rebalance/:node/evacuation/stop',
|
||||||
|
post => #{
|
||||||
|
tags => [<<"load_rebalance">>],
|
||||||
|
summary => <<"Stop evacuation on a node">>,
|
||||||
|
description => ?DESC("load_rebalance_evacuation_stop"),
|
||||||
|
parameters => [param_node()],
|
||||||
|
responses => #{
|
||||||
|
200 => response_schema(),
|
||||||
|
400 => error_codes([?BAD_REQUEST], <<"Bad Request">>),
|
||||||
|
404 => error_codes([?NOT_FOUND], <<"Not Found">>)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}.
|
||||||
|
|
||||||
|
%%--------------------------------------------------------------------
|
||||||
|
%% Handlers
|
||||||
|
%%--------------------------------------------------------------------
|
||||||
|
|
||||||
|
'/load_rebalance/status'(get, #{}) ->
|
||||||
|
case emqx_node_rebalance_status:local_status() of
|
||||||
|
disabled ->
|
||||||
|
{200, #{status => disabled}};
|
||||||
|
{rebalance, Stats} ->
|
||||||
|
{200, format_status(rebalance, Stats)};
|
||||||
|
{evacuation, Stats} ->
|
||||||
|
{200, format_status(evacuation, Stats)}
|
||||||
|
end.
|
||||||
|
|
||||||
|
'/load_rebalance/global_status'(get, #{}) ->
|
||||||
|
#{
|
||||||
|
evacuations := Evacuations,
|
||||||
|
rebalances := Rebalances
|
||||||
|
} = emqx_node_rebalance_status:global_status(),
|
||||||
|
{200, #{
|
||||||
|
evacuations => format_as_map_list(Evacuations),
|
||||||
|
rebalances => format_as_map_list(Rebalances)
|
||||||
|
}}.
|
||||||
|
|
||||||
|
'/load_rebalance/availability_check'(get, #{}) ->
|
||||||
|
case emqx_eviction_agent:status() of
|
||||||
|
disabled ->
|
||||||
|
{200, #{}};
|
||||||
|
{enabled, _Stats} ->
|
||||||
|
error_response(503, ?NODE_EVACUATING, <<"Node Evacuating">>)
|
||||||
|
end.
|
||||||
|
|
||||||
|
'/load_rebalance/:node/start'(post, #{bindings := #{node := NodeBin}, body := Params0}) ->
|
||||||
|
emqx_utils_api:with_node(NodeBin, fun(Node) ->
|
||||||
|
Params1 = translate(rebalance_start, Params0),
|
||||||
|
with_nodes_at_key(nodes, Params1, fun(Params2) ->
|
||||||
|
wrap_rpc(
|
||||||
|
Node, emqx_node_rebalance_api_proto_v1:node_rebalance_start(Node, Params2)
|
||||||
|
)
|
||||||
|
end)
|
||||||
|
end).
|
||||||
|
|
||||||
|
'/load_rebalance/:node/stop'(post, #{bindings := #{node := NodeBin}}) ->
|
||||||
|
emqx_utils_api:with_node(NodeBin, fun(Node) ->
|
||||||
|
wrap_rpc(
|
||||||
|
Node, emqx_node_rebalance_api_proto_v1:node_rebalance_stop(Node)
|
||||||
|
)
|
||||||
|
end).
|
||||||
|
|
||||||
|
'/load_rebalance/:node/evacuation/start'(post, #{
|
||||||
|
bindings := #{node := NodeBin}, body := Params0
|
||||||
|
}) ->
|
||||||
|
emqx_utils_api:with_node(NodeBin, fun(Node) ->
|
||||||
|
Params1 = translate(rebalance_evacuation_start, Params0),
|
||||||
|
with_nodes_at_key(migrate_to, Params1, fun(Params2) ->
|
||||||
|
wrap_rpc(
|
||||||
|
Node,
|
||||||
|
emqx_node_rebalance_api_proto_v1:node_rebalance_evacuation_start(
|
||||||
|
Node, Params2
|
||||||
|
)
|
||||||
|
)
|
||||||
|
end)
|
||||||
|
end).
|
||||||
|
|
||||||
|
'/load_rebalance/:node/evacuation/stop'(post, #{bindings := #{node := NodeBin}}) ->
|
||||||
|
emqx_utils_api:with_node(NodeBin, fun(Node) ->
|
||||||
|
wrap_rpc(
|
||||||
|
Node, emqx_node_rebalance_api_proto_v1:node_rebalance_evacuation_stop(Node)
|
||||||
|
)
|
||||||
|
end).
|
||||||
|
|
||||||
|
%%--------------------------------------------------------------------
|
||||||
|
%% Helpers
|
||||||
|
%%--------------------------------------------------------------------
|
||||||
|
|
||||||
|
wrap_rpc(Node, RPCResult) ->
|
||||||
|
case RPCResult of
|
||||||
|
ok ->
|
||||||
|
{200, #{}};
|
||||||
|
{error, Reason} ->
|
||||||
|
error_response(
|
||||||
|
400, ?BAD_REQUEST, io_lib:format("error on node ~p: ~p", [Node, Reason])
|
||||||
|
);
|
||||||
|
{badrpc, Reason} ->
|
||||||
|
error_response(
|
||||||
|
503, ?RPC_ERROR, io_lib:format("RPC error on node ~p: ~p", [Node, Reason])
|
||||||
|
)
|
||||||
|
end.
|
||||||
|
|
||||||
|
format_status(Process, Stats) ->
|
||||||
|
Stats#{process => Process, status => enabled}.
|
||||||
|
|
||||||
|
validate_nodes(Key, Params) when is_map_key(Key, Params) ->
|
||||||
|
BinNodes = maps:get(Key, Params),
|
||||||
|
{ValidNodes, InvalidNodes} = lists:foldl(
|
||||||
|
fun(BinNode, {Nodes, UnknownNodes}) ->
|
||||||
|
case parse_node(BinNode) of
|
||||||
|
{ok, Node} -> {[Node | Nodes], UnknownNodes};
|
||||||
|
{error, _} -> {Nodes, [BinNode | UnknownNodes]}
|
||||||
|
end
|
||||||
|
end,
|
||||||
|
{[], []},
|
||||||
|
BinNodes
|
||||||
|
),
|
||||||
|
case InvalidNodes of
|
||||||
|
[] ->
|
||||||
|
case emqx_node_rebalance_evacuation:available_nodes(ValidNodes) of
|
||||||
|
ValidNodes -> {ok, Params#{Key => ValidNodes}};
|
||||||
|
OtherNodes -> {error, {unavailable, ValidNodes -- OtherNodes}}
|
||||||
|
end;
|
||||||
|
_ ->
|
||||||
|
{error, {invalid, InvalidNodes}}
|
||||||
|
end;
|
||||||
|
validate_nodes(_Key, Params) ->
|
||||||
|
{ok, Params}.
|
||||||
|
|
||||||
|
with_nodes_at_key(Key, Params, Fun) ->
|
||||||
|
Res = validate_nodes(Key, Params),
|
||||||
|
case Res of
|
||||||
|
{ok, Params1} ->
|
||||||
|
Fun(Params1);
|
||||||
|
{error, {unavailable, Nodes}} ->
|
||||||
|
error_response(400, ?NOT_FOUND, io_lib:format("Nodes unavailable: ~p", [Nodes]));
|
||||||
|
{error, {invalid, Nodes}} ->
|
||||||
|
error_response(400, ?BAD_REQUEST, io_lib:format("Invalid nodes: ~p", [Nodes]))
|
||||||
|
end.
|
||||||
|
|
||||||
|
parse_node(Bin) when is_binary(Bin) ->
|
||||||
|
try
|
||||||
|
{ok, binary_to_existing_atom(Bin)}
|
||||||
|
catch
|
||||||
|
error:badarg ->
|
||||||
|
{error, {unknown, Bin}}
|
||||||
|
end.
|
||||||
|
|
||||||
|
format_as_map_list(List) ->
|
||||||
|
lists:map(
|
||||||
|
fun({Node, Info}) ->
|
||||||
|
Info#{node => Node}
|
||||||
|
end,
|
||||||
|
List
|
||||||
|
).
|
||||||
|
|
||||||
|
error_response(HttpCode, Code, Message) ->
|
||||||
|
{HttpCode, ?ERROR_MSG(Code, Message)}.
|
||||||
|
|
||||||
|
without(Keys, Props) ->
|
||||||
|
lists:filter(
|
||||||
|
fun({Key, _}) ->
|
||||||
|
not lists:member(Key, Keys)
|
||||||
|
end,
|
||||||
|
Props
|
||||||
|
).
|
||||||
|
|
||||||
|
%%------------------------------------------------------------------------------
|
||||||
|
%% Schema
|
||||||
|
%%------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
translate(Ref, Conf) ->
|
||||||
|
Options = #{atom_key => true},
|
||||||
|
#{Ref := TranslatedConf} = hocon_tconf:check_plain(
|
||||||
|
?MODULE, #{atom_to_binary(Ref) => Conf}, Options, [Ref]
|
||||||
|
),
|
||||||
|
TranslatedConf.
|
||||||
|
|
||||||
|
param_node() ->
|
||||||
|
{
|
||||||
|
node,
|
||||||
|
mk(binary(), #{
|
||||||
|
in => path,
|
||||||
|
desc => ?DESC(param_node),
|
||||||
|
required => true
|
||||||
|
})
|
||||||
|
}.
|
||||||
|
|
||||||
|
fields(rebalance_start) ->
|
||||||
|
[
|
||||||
|
{"wait_health_check",
|
||||||
|
mk(
|
||||||
|
emqx_schema:duration_s(),
|
||||||
|
#{
|
||||||
|
desc => ?DESC(wait_health_check),
|
||||||
|
required => false
|
||||||
|
}
|
||||||
|
)},
|
||||||
|
{"conn_evict_rate",
|
||||||
|
mk(
|
||||||
|
pos_integer(),
|
||||||
|
#{
|
||||||
|
desc => ?DESC(conn_evict_rate),
|
||||||
|
required => false
|
||||||
|
}
|
||||||
|
)},
|
||||||
|
{"sess_evict_rate",
|
||||||
|
mk(
|
||||||
|
pos_integer(),
|
||||||
|
#{
|
||||||
|
desc => ?DESC(sess_evict_rate),
|
||||||
|
required => false
|
||||||
|
}
|
||||||
|
)},
|
||||||
|
{"abs_conn_threshold",
|
||||||
|
mk(
|
||||||
|
pos_integer(),
|
||||||
|
#{
|
||||||
|
desc => ?DESC(abs_conn_threshold),
|
||||||
|
required => false
|
||||||
|
}
|
||||||
|
)},
|
||||||
|
{"rel_conn_threshold",
|
||||||
|
mk(
|
||||||
|
number(),
|
||||||
|
#{
|
||||||
|
desc => ?DESC(rel_conn_threshold),
|
||||||
|
required => false,
|
||||||
|
validator => [fun(Value) -> Value > 1.0 end]
|
||||||
|
}
|
||||||
|
)},
|
||||||
|
{"abs_sess_threshold",
|
||||||
|
mk(
|
||||||
|
pos_integer(),
|
||||||
|
#{
|
||||||
|
desc => ?DESC(abs_sess_threshold),
|
||||||
|
required => false
|
||||||
|
}
|
||||||
|
)},
|
||||||
|
{"rel_sess_threshold",
|
||||||
|
mk(
|
||||||
|
number(),
|
||||||
|
#{
|
||||||
|
desc => ?DESC(rel_sess_threshold),
|
||||||
|
required => false,
|
||||||
|
validator => [fun(Value) -> Value > 1.0 end]
|
||||||
|
}
|
||||||
|
)},
|
||||||
|
{"wait_takeover",
|
||||||
|
mk(
|
||||||
|
emqx_schema:duration_s(),
|
||||||
|
#{
|
||||||
|
desc => ?DESC(wait_takeover),
|
||||||
|
required => false
|
||||||
|
}
|
||||||
|
)},
|
||||||
|
{"nodes",
|
||||||
|
mk(
|
||||||
|
list(binary()),
|
||||||
|
#{
|
||||||
|
desc => ?DESC(rebalance_nodes),
|
||||||
|
required => false,
|
||||||
|
validator => [fun(Values) -> length(Values) > 0 end]
|
||||||
|
}
|
||||||
|
)}
|
||||||
|
];
|
||||||
|
fields(rebalance_evacuation_start) ->
|
||||||
|
[
|
||||||
|
{"conn_evict_rate",
|
||||||
|
mk(
|
||||||
|
pos_integer(),
|
||||||
|
#{
|
||||||
|
desc => ?DESC(conn_evict_rate),
|
||||||
|
required => false
|
||||||
|
}
|
||||||
|
)},
|
||||||
|
{"sess_evict_rate",
|
||||||
|
mk(
|
||||||
|
pos_integer(),
|
||||||
|
#{
|
||||||
|
desc => ?DESC(sess_evict_rate),
|
||||||
|
required => false
|
||||||
|
}
|
||||||
|
)},
|
||||||
|
{"redirect_to",
|
||||||
|
mk(
|
||||||
|
binary(),
|
||||||
|
#{
|
||||||
|
desc => ?DESC(redirect_to),
|
||||||
|
required => false
|
||||||
|
}
|
||||||
|
)},
|
||||||
|
{"wait_takeover",
|
||||||
|
mk(
|
||||||
|
pos_integer(),
|
||||||
|
#{
|
||||||
|
desc => ?DESC(wait_takeover),
|
||||||
|
required => false
|
||||||
|
}
|
||||||
|
)},
|
||||||
|
{"migrate_to",
|
||||||
|
mk(
|
||||||
|
nonempty_list(binary()),
|
||||||
|
#{
|
||||||
|
desc => ?DESC(migrate_to),
|
||||||
|
required => false
|
||||||
|
}
|
||||||
|
)}
|
||||||
|
];
|
||||||
|
fields(local_status_disabled) ->
|
||||||
|
[
|
||||||
|
{"status",
|
||||||
|
mk(
|
||||||
|
disabled,
|
||||||
|
#{
|
||||||
|
desc => ?DESC(local_status_enabled),
|
||||||
|
required => true
|
||||||
|
}
|
||||||
|
)}
|
||||||
|
];
|
||||||
|
fields(local_status_enabled) ->
|
||||||
|
[
|
||||||
|
{"status",
|
||||||
|
mk(
|
||||||
|
enabled,
|
||||||
|
#{
|
||||||
|
desc => ?DESC(local_status_enabled),
|
||||||
|
required => true
|
||||||
|
}
|
||||||
|
)},
|
||||||
|
{"process",
|
||||||
|
mk(
|
||||||
|
hoconsc:union([rebalance, evacuation]),
|
||||||
|
#{
|
||||||
|
desc => ?DESC(local_status_process),
|
||||||
|
required => true
|
||||||
|
}
|
||||||
|
)},
|
||||||
|
{"state",
|
||||||
|
mk(
|
||||||
|
atom(),
|
||||||
|
#{
|
||||||
|
desc => ?DESC(local_status_state),
|
||||||
|
required => true
|
||||||
|
}
|
||||||
|
)},
|
||||||
|
{"coordinator_node",
|
||||||
|
mk(
|
||||||
|
binary(),
|
||||||
|
#{
|
||||||
|
desc => ?DESC(local_status_coordinator_node),
|
||||||
|
required => false
|
||||||
|
}
|
||||||
|
)},
|
||||||
|
{"connection_eviction_rate",
|
||||||
|
mk(
|
||||||
|
pos_integer(),
|
||||||
|
#{
|
||||||
|
desc => ?DESC(local_status_connection_eviction_rate),
|
||||||
|
required => false
|
||||||
|
}
|
||||||
|
)},
|
||||||
|
{"session_eviction_rate",
|
||||||
|
mk(
|
||||||
|
pos_integer(),
|
||||||
|
#{
|
||||||
|
desc => ?DESC(local_status_session_eviction_rate),
|
||||||
|
required => false
|
||||||
|
}
|
||||||
|
)},
|
||||||
|
{"connection_goal",
|
||||||
|
mk(
|
||||||
|
non_neg_integer(),
|
||||||
|
#{
|
||||||
|
desc => ?DESC(local_status_connection_goal),
|
||||||
|
required => false
|
||||||
|
}
|
||||||
|
)},
|
||||||
|
{"session_goal",
|
||||||
|
mk(
|
||||||
|
non_neg_integer(),
|
||||||
|
#{
|
||||||
|
desc => ?DESC(local_status_session_goal),
|
||||||
|
required => false
|
||||||
|
}
|
||||||
|
)},
|
||||||
|
{"disconnected_session_goal",
|
||||||
|
mk(
|
||||||
|
non_neg_integer(),
|
||||||
|
#{
|
||||||
|
desc => ?DESC(local_status_disconnected_session_goal),
|
||||||
|
required => false
|
||||||
|
}
|
||||||
|
)},
|
||||||
|
{"session_recipients",
|
||||||
|
mk(
|
||||||
|
list(binary()),
|
||||||
|
#{
|
||||||
|
desc => ?DESC(local_status_session_recipients),
|
||||||
|
required => false
|
||||||
|
}
|
||||||
|
)},
|
||||||
|
{"recipients",
|
||||||
|
mk(
|
||||||
|
list(binary()),
|
||||||
|
#{
|
||||||
|
desc => ?DESC(local_status_recipients),
|
||||||
|
required => false
|
||||||
|
}
|
||||||
|
)},
|
||||||
|
{"stats",
|
||||||
|
mk(
|
||||||
|
ref(status_stats),
|
||||||
|
#{
|
||||||
|
desc => ?DESC(local_status_stats),
|
||||||
|
required => false
|
||||||
|
}
|
||||||
|
)}
|
||||||
|
];
|
||||||
|
fields(status_stats) ->
|
||||||
|
[
|
||||||
|
{"initial_connected",
|
||||||
|
mk(
|
||||||
|
non_neg_integer(),
|
||||||
|
#{
|
||||||
|
desc => ?DESC(status_stats_initial_connected),
|
||||||
|
required => true
|
||||||
|
}
|
||||||
|
)},
|
||||||
|
{"current_connected",
|
||||||
|
mk(
|
||||||
|
non_neg_integer(),
|
||||||
|
#{
|
||||||
|
desc => ?DESC(status_stats_current_connected),
|
||||||
|
required => true
|
||||||
|
}
|
||||||
|
)},
|
||||||
|
{"initial_sessions",
|
||||||
|
mk(
|
||||||
|
non_neg_integer(),
|
||||||
|
#{
|
||||||
|
desc => ?DESC(status_stats_initial_sessions),
|
||||||
|
required => true
|
||||||
|
}
|
||||||
|
)},
|
||||||
|
{"current_sessions",
|
||||||
|
mk(
|
||||||
|
non_neg_integer(),
|
||||||
|
#{
|
||||||
|
desc => ?DESC(status_stats_current_sessions),
|
||||||
|
required => true
|
||||||
|
}
|
||||||
|
)},
|
||||||
|
{"current_disconnected_sessions",
|
||||||
|
mk(
|
||||||
|
non_neg_integer(),
|
||||||
|
#{
|
||||||
|
desc => ?DESC(status_stats_current_disconnected_sessions),
|
||||||
|
required => false
|
||||||
|
}
|
||||||
|
)}
|
||||||
|
];
|
||||||
|
fields(global_coordinator_status) ->
|
||||||
|
without(
|
||||||
|
["status", "process", "session_goal", "session_recipients", "stats"],
|
||||||
|
fields(local_status_enabled)
|
||||||
|
) ++
|
||||||
|
[
|
||||||
|
{"donors",
|
||||||
|
mk(
|
||||||
|
list(binary()),
|
||||||
|
#{
|
||||||
|
desc => ?DESC(coordinator_status_donors),
|
||||||
|
required => false
|
||||||
|
}
|
||||||
|
)},
|
||||||
|
{"donor_conn_avg",
|
||||||
|
mk(
|
||||||
|
non_neg_integer(),
|
||||||
|
#{
|
||||||
|
desc => ?DESC(coordinator_status_donor_conn_avg),
|
||||||
|
required => false
|
||||||
|
}
|
||||||
|
)},
|
||||||
|
{"donor_sess_avg",
|
||||||
|
mk(
|
||||||
|
non_neg_integer(),
|
||||||
|
#{
|
||||||
|
desc => ?DESC(coordinator_status_donor_sess_avg),
|
||||||
|
required => false
|
||||||
|
}
|
||||||
|
)},
|
||||||
|
{"node",
|
||||||
|
mk(
|
||||||
|
binary(),
|
||||||
|
#{
|
||||||
|
desc => ?DESC(coordinator_status_node),
|
||||||
|
required => true
|
||||||
|
}
|
||||||
|
)}
|
||||||
|
];
|
||||||
|
fields(global_evacuation_status) ->
|
||||||
|
without(["status", "process"], fields(local_status_enabled)) ++
|
||||||
|
[
|
||||||
|
{"node",
|
||||||
|
mk(
|
||||||
|
binary(),
|
||||||
|
#{
|
||||||
|
desc => ?DESC(evacuation_status_node),
|
||||||
|
required => true
|
||||||
|
}
|
||||||
|
)}
|
||||||
|
];
|
||||||
|
fields(global_status) ->
|
||||||
|
[
|
||||||
|
{"evacuations",
|
||||||
|
mk(
|
||||||
|
hoconsc:array(ref(global_evacuation_status)),
|
||||||
|
#{
|
||||||
|
desc => ?DESC(global_status_evacuations),
|
||||||
|
required => false
|
||||||
|
}
|
||||||
|
)},
|
||||||
|
{"rebalances",
|
||||||
|
mk(
|
||||||
|
hoconsc:array(ref(global_coordinator_status)),
|
||||||
|
#{
|
||||||
|
desc => ?DESC(global_status_rebalances),
|
||||||
|
required => false
|
||||||
|
}
|
||||||
|
)}
|
||||||
|
].
|
||||||
|
|
||||||
|
rebalance_example() ->
|
||||||
|
#{
|
||||||
|
wait_health_check => 10,
|
||||||
|
conn_evict_rate => 10,
|
||||||
|
sess_evict_rate => 20,
|
||||||
|
abs_conn_threshold => 10,
|
||||||
|
rel_conn_threshold => 1.5,
|
||||||
|
abs_sess_threshold => 10,
|
||||||
|
rel_sess_threshold => 1.5,
|
||||||
|
wait_takeover => 10,
|
||||||
|
nodes => [<<"othernode@127.0.0.1">>]
|
||||||
|
}.
|
||||||
|
|
||||||
|
rebalance_evacuation_example() ->
|
||||||
|
#{
|
||||||
|
conn_evict_rate => 100,
|
||||||
|
sess_evict_rate => 100,
|
||||||
|
redirect_to => <<"othernode:1883">>,
|
||||||
|
wait_takeover => 10,
|
||||||
|
migrate_to => [<<"othernode@127.0.0.1">>]
|
||||||
|
}.
|
||||||
|
|
||||||
|
local_status_response_schema() ->
|
||||||
|
hoconsc:union([ref(local_status_disabled), ref(local_status_enabled)]).
|
||||||
|
|
||||||
|
response_schema() ->
|
||||||
|
mk(
|
||||||
|
map(),
|
||||||
|
#{
|
||||||
|
desc => ?DESC(empty_response)
|
||||||
|
}
|
||||||
|
).
|
||||||
|
|
||||||
|
roots() -> [].
|
|
@ -0,0 +1,22 @@
|
||||||
|
%%--------------------------------------------------------------------
|
||||||
|
%% Copyright (c) 2022-2023 EMQ Technologies Co., Ltd. All Rights Reserved.
|
||||||
|
%%--------------------------------------------------------------------
|
||||||
|
|
||||||
|
-module(emqx_node_rebalance_app).
|
||||||
|
|
||||||
|
-behaviour(application).
|
||||||
|
|
||||||
|
-emqx_plugin(?MODULE).
|
||||||
|
|
||||||
|
-export([
|
||||||
|
start/2,
|
||||||
|
stop/1
|
||||||
|
]).
|
||||||
|
|
||||||
|
start(_Type, _Args) ->
|
||||||
|
{ok, Sup} = emqx_node_rebalance_sup:start_link(),
|
||||||
|
ok = emqx_node_rebalance_cli:load(),
|
||||||
|
{ok, Sup}.
|
||||||
|
|
||||||
|
stop(_State) ->
|
||||||
|
emqx_node_rebalance_cli:unload().
|
|
@ -0,0 +1,305 @@
|
||||||
|
%%--------------------------------------------------------------------
|
||||||
|
%% Copyright (c) 2022-2023 EMQ Technologies Co., Ltd. All Rights Reserved.
|
||||||
|
%%--------------------------------------------------------------------
|
||||||
|
|
||||||
|
-module(emqx_node_rebalance_cli).
|
||||||
|
|
||||||
|
%% APIs
|
||||||
|
-export([
|
||||||
|
load/0,
|
||||||
|
unload/0,
|
||||||
|
cli/1
|
||||||
|
]).
|
||||||
|
|
||||||
|
load() ->
|
||||||
|
emqx_ctl:register_command(rebalance, {?MODULE, cli}, []).
|
||||||
|
|
||||||
|
unload() ->
|
||||||
|
emqx_ctl:unregister_command(rebalance).
|
||||||
|
|
||||||
|
cli(["start" | StartArgs]) ->
|
||||||
|
case start_args(StartArgs) of
|
||||||
|
{evacuation, Opts} ->
|
||||||
|
case emqx_node_rebalance_evacuation:status() of
|
||||||
|
disabled ->
|
||||||
|
ok = emqx_node_rebalance_evacuation:start(Opts),
|
||||||
|
emqx_ctl:print("Rebalance(evacuation) started~n"),
|
||||||
|
true;
|
||||||
|
{enabled, _} ->
|
||||||
|
emqx_ctl:print("Rebalance is already enabled~n"),
|
||||||
|
false
|
||||||
|
end;
|
||||||
|
{rebalance, Opts} ->
|
||||||
|
case emqx_node_rebalance:start(Opts) of
|
||||||
|
ok ->
|
||||||
|
emqx_ctl:print("Rebalance started~n"),
|
||||||
|
true;
|
||||||
|
{error, Reason} ->
|
||||||
|
emqx_ctl:print("Rebalance start error: ~p~n", [Reason]),
|
||||||
|
false
|
||||||
|
end;
|
||||||
|
{error, Error} ->
|
||||||
|
emqx_ctl:print("Rebalance start error: ~s~n", [Error]),
|
||||||
|
false
|
||||||
|
end;
|
||||||
|
cli(["node-status", NodeStr]) ->
|
||||||
|
case emqx_utils:safe_to_existing_atom(NodeStr, utf8) of
|
||||||
|
{ok, Node} ->
|
||||||
|
node_status(emqx_node_rebalance_status:local_status(Node));
|
||||||
|
{error, _} ->
|
||||||
|
emqx_ctl:print("Node status error: invalid node~n"),
|
||||||
|
false
|
||||||
|
end;
|
||||||
|
cli(["node-status"]) ->
|
||||||
|
node_status(emqx_node_rebalance_status:local_status());
|
||||||
|
cli(["status"]) ->
|
||||||
|
#{
|
||||||
|
evacuations := Evacuations,
|
||||||
|
rebalances := Rebalances
|
||||||
|
} = emqx_node_rebalance_status:global_status(),
|
||||||
|
lists:foreach(
|
||||||
|
fun({Node, Status}) ->
|
||||||
|
emqx_ctl:print(
|
||||||
|
"--------------------------------------------------------------------~n"
|
||||||
|
),
|
||||||
|
emqx_ctl:print(
|
||||||
|
"Node ~p: evacuation~n~s",
|
||||||
|
[Node, emqx_node_rebalance_status:format_local_status(Status)]
|
||||||
|
)
|
||||||
|
end,
|
||||||
|
Evacuations
|
||||||
|
),
|
||||||
|
lists:foreach(
|
||||||
|
fun({Node, Status}) ->
|
||||||
|
emqx_ctl:print(
|
||||||
|
"--------------------------------------------------------------------~n"
|
||||||
|
),
|
||||||
|
emqx_ctl:print(
|
||||||
|
"Node ~p: rebalance coordinator~n~s",
|
||||||
|
[Node, emqx_node_rebalance_status:format_coordinator_status(Status)]
|
||||||
|
)
|
||||||
|
end,
|
||||||
|
Rebalances
|
||||||
|
);
|
||||||
|
cli(["stop"]) ->
|
||||||
|
case emqx_node_rebalance_evacuation:status() of
|
||||||
|
{enabled, _} ->
|
||||||
|
ok = emqx_node_rebalance_evacuation:stop(),
|
||||||
|
emqx_ctl:print("Rebalance(evacuation) stopped~n"),
|
||||||
|
true;
|
||||||
|
disabled ->
|
||||||
|
case emqx_node_rebalance:status() of
|
||||||
|
{enabled, _} ->
|
||||||
|
ok = emqx_node_rebalance:stop(),
|
||||||
|
emqx_ctl:print("Rebalance stopped~n"),
|
||||||
|
true;
|
||||||
|
disabled ->
|
||||||
|
emqx_ctl:print("Rebalance is already disabled~n"),
|
||||||
|
false
|
||||||
|
end
|
||||||
|
end;
|
||||||
|
cli(_) ->
|
||||||
|
emqx_ctl:usage(
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"rebalance start --evacuation \\\n"
|
||||||
|
" [--redirect-to \"Host1:Port1 Host2:Port2 ...\"] \\\n"
|
||||||
|
" [--conn-evict-rate CountPerSec] \\\n"
|
||||||
|
" [--migrate-to \"node1@host1 node2@host2 ...\"] \\\n"
|
||||||
|
" [--wait-takeover Secs] \\\n"
|
||||||
|
" [--sess-evict-rate CountPerSec]",
|
||||||
|
"Start current node evacuation with optional server redirect to the specified servers"
|
||||||
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
"rebalance start \\\n"
|
||||||
|
" [--nodes \"node1@host1 node2@host2\"] \\\n"
|
||||||
|
" [--wait-health-check Secs] \\\n"
|
||||||
|
" [--conn-evict-rate ConnPerSec] \\\n"
|
||||||
|
" [--abs-conn-threshold Count] \\\n"
|
||||||
|
" [--rel-conn-threshold Fraction] \\\n"
|
||||||
|
" [--conn-evict-rate ConnPerSec] \\\n"
|
||||||
|
" [--wait-takeover Secs] \\\n"
|
||||||
|
" [--sess-evict-rate CountPerSec] \\\n"
|
||||||
|
" [--abs-sess-threshold Count] \\\n"
|
||||||
|
" [--rel-sess-threshold Fraction]",
|
||||||
|
"Start rebalance on the specified nodes using the current node as the coordinator"
|
||||||
|
},
|
||||||
|
|
||||||
|
{"rebalance node-status", "Get current node rebalance status"},
|
||||||
|
|
||||||
|
{"rebalance node-status \"node1@host1\"", "Get remote node rebalance status"},
|
||||||
|
|
||||||
|
{"rebalance status",
|
||||||
|
"Get statuses of all current rebalance/evacuation processes across the cluster"},
|
||||||
|
|
||||||
|
{"rebalance stop", "Stop node rebalance"}
|
||||||
|
]
|
||||||
|
).
|
||||||
|
|
||||||
|
node_status(NodeStatus) ->
|
||||||
|
case NodeStatus of
|
||||||
|
{Process, Status} when Process =:= evacuation orelse Process =:= rebalance ->
|
||||||
|
emqx_ctl:print(
|
||||||
|
"Rebalance type: ~p~n~s~n",
|
||||||
|
[Process, emqx_node_rebalance_status:format_local_status(Status)]
|
||||||
|
);
|
||||||
|
disabled ->
|
||||||
|
emqx_ctl:print("Rebalance disabled~n");
|
||||||
|
Other ->
|
||||||
|
emqx_ctl:print("Error detecting rebalance status: ~p~n", [Other])
|
||||||
|
end.
|
||||||
|
|
||||||
|
start_args(Args) ->
|
||||||
|
case collect_args(Args, #{}) of
|
||||||
|
{ok, #{"--evacuation" := true} = Collected} ->
|
||||||
|
case validate_evacuation(maps:to_list(Collected), #{}) of
|
||||||
|
{ok, Validated} ->
|
||||||
|
{evacuation, Validated};
|
||||||
|
{error, _} = Error ->
|
||||||
|
Error
|
||||||
|
end;
|
||||||
|
{ok, #{} = Collected} ->
|
||||||
|
case validate_rebalance(maps:to_list(Collected), #{}) of
|
||||||
|
{ok, Validated} ->
|
||||||
|
{rebalance, Validated};
|
||||||
|
{error, _} = Error ->
|
||||||
|
Error
|
||||||
|
end;
|
||||||
|
{error, _} = Error ->
|
||||||
|
Error
|
||||||
|
end.
|
||||||
|
|
||||||
|
collect_args([], Map) ->
|
||||||
|
{ok, Map};
|
||||||
|
%% evacuation
|
||||||
|
collect_args(["--evacuation" | Args], Map) ->
|
||||||
|
collect_args(Args, Map#{"--evacuation" => true});
|
||||||
|
collect_args(["--redirect-to", ServerReference | Args], Map) ->
|
||||||
|
collect_args(Args, Map#{"--redirect-to" => ServerReference});
|
||||||
|
collect_args(["--migrate-to", MigrateTo | Args], Map) ->
|
||||||
|
collect_args(Args, Map#{"--migrate-to" => MigrateTo});
|
||||||
|
%% rebalance
|
||||||
|
collect_args(["--nodes", Nodes | Args], Map) ->
|
||||||
|
collect_args(Args, Map#{"--nodes" => Nodes});
|
||||||
|
collect_args(["--wait-health-check", WaitHealthCheck | Args], Map) ->
|
||||||
|
collect_args(Args, Map#{"--wait-health-check" => WaitHealthCheck});
|
||||||
|
collect_args(["--abs-conn-threshold", AbsConnThres | Args], Map) ->
|
||||||
|
collect_args(Args, Map#{"--abs-conn-threshold" => AbsConnThres});
|
||||||
|
collect_args(["--rel-conn-threshold", RelConnThres | Args], Map) ->
|
||||||
|
collect_args(Args, Map#{"--rel-conn-threshold" => RelConnThres});
|
||||||
|
collect_args(["--abs-sess-threshold", AbsSessThres | Args], Map) ->
|
||||||
|
collect_args(Args, Map#{"--abs-sess-threshold" => AbsSessThres});
|
||||||
|
collect_args(["--rel-sess-threshold", RelSessThres | Args], Map) ->
|
||||||
|
collect_args(Args, Map#{"--rel-sess-threshold" => RelSessThres});
|
||||||
|
%% common
|
||||||
|
collect_args(["--conn-evict-rate", ConnEvictRate | Args], Map) ->
|
||||||
|
collect_args(Args, Map#{"--conn-evict-rate" => ConnEvictRate});
|
||||||
|
collect_args(["--wait-takeover", WaitTakeover | Args], Map) ->
|
||||||
|
collect_args(Args, Map#{"--wait-takeover" => WaitTakeover});
|
||||||
|
collect_args(["--sess-evict-rate", SessEvictRate | Args], Map) ->
|
||||||
|
collect_args(Args, Map#{"--sess-evict-rate" => SessEvictRate});
|
||||||
|
%% fallback
|
||||||
|
collect_args(Args, _Map) ->
|
||||||
|
{error, io_lib:format("unknown arguments: ~p", [Args])}.
|
||||||
|
|
||||||
|
validate_evacuation([], Map) ->
|
||||||
|
{ok, Map};
|
||||||
|
validate_evacuation([{"--evacuation", _} | Rest], Map) ->
|
||||||
|
validate_evacuation(Rest, Map);
|
||||||
|
validate_evacuation([{"--redirect-to", ServerReference} | Rest], Map) ->
|
||||||
|
validate_evacuation(Rest, Map#{server_reference => list_to_binary(ServerReference)});
|
||||||
|
validate_evacuation([{"--conn-evict-rate", _} | _] = Opts, Map) ->
|
||||||
|
validate_pos_int(conn_evict_rate, Opts, Map, fun validate_evacuation/2);
|
||||||
|
validate_evacuation([{"--sess-evict-rate", _} | _] = Opts, Map) ->
|
||||||
|
validate_pos_int(sess_evict_rate, Opts, Map, fun validate_evacuation/2);
|
||||||
|
validate_evacuation([{"--wait-takeover", _} | _] = Opts, Map) ->
|
||||||
|
validate_pos_int(wait_takeover, Opts, Map, fun validate_evacuation/2);
|
||||||
|
validate_evacuation([{"--migrate-to", MigrateTo} | Rest], Map) ->
|
||||||
|
case strings_to_atoms(string:tokens(MigrateTo, ", ")) of
|
||||||
|
{_, Invalid} when Invalid =/= [] ->
|
||||||
|
{error, io_lib:format("invalid --migrate-to, invalid nodes: ~p", [Invalid])};
|
||||||
|
{Nodes, []} ->
|
||||||
|
case emqx_node_rebalance_evacuation:available_nodes(Nodes) of
|
||||||
|
[] ->
|
||||||
|
{error, "invalid --migrate-to, no nodes"};
|
||||||
|
Nodes ->
|
||||||
|
validate_evacuation(Rest, Map#{migrate_to => Nodes});
|
||||||
|
OtherNodes ->
|
||||||
|
{error,
|
||||||
|
io_lib:format(
|
||||||
|
"invalid --migrate-to, unavailable nodes: ~p",
|
||||||
|
[Nodes -- OtherNodes]
|
||||||
|
)}
|
||||||
|
end
|
||||||
|
end;
|
||||||
|
validate_evacuation(Rest, _Map) ->
|
||||||
|
{error, io_lib:format("unknown evacuation arguments: ~p", [Rest])}.
|
||||||
|
|
||||||
|
validate_rebalance([], Map) ->
|
||||||
|
{ok, Map};
|
||||||
|
validate_rebalance([{"--wait-health-check", _} | _] = Opts, Map) ->
|
||||||
|
validate_pos_int(wait_health_check, Opts, Map, fun validate_rebalance/2);
|
||||||
|
validate_rebalance([{"--conn-evict-rate", _} | _] = Opts, Map) ->
|
||||||
|
validate_pos_int(conn_evict_rate, Opts, Map, fun validate_rebalance/2);
|
||||||
|
validate_rebalance([{"--sess-evict-rate", _} | _] = Opts, Map) ->
|
||||||
|
validate_pos_int(sess_evict_rate, Opts, Map, fun validate_rebalance/2);
|
||||||
|
validate_rebalance([{"--abs-conn-threshold", _} | _] = Opts, Map) ->
|
||||||
|
validate_pos_int(abs_conn_threshold, Opts, Map, fun validate_rebalance/2);
|
||||||
|
validate_rebalance([{"--rel-conn-threshold", _} | _] = Opts, Map) ->
|
||||||
|
validate_fraction(rel_conn_threshold, Opts, Map, fun validate_rebalance/2);
|
||||||
|
validate_rebalance([{"--abs-sess-threshold", _} | _] = Opts, Map) ->
|
||||||
|
validate_pos_int(abs_sess_threshold, Opts, Map, fun validate_rebalance/2);
|
||||||
|
validate_rebalance([{"--rel-sess-threshold", _} | _] = Opts, Map) ->
|
||||||
|
validate_fraction(rel_sess_threshold, Opts, Map, fun validate_rebalance/2);
|
||||||
|
validate_rebalance([{"--wait-takeover", _} | _] = Opts, Map) ->
|
||||||
|
validate_pos_int(wait_takeover, Opts, Map, fun validate_rebalance/2);
|
||||||
|
validate_rebalance([{"--nodes", NodeStr} | Rest], Map) ->
|
||||||
|
case strings_to_atoms(string:tokens(NodeStr, ", ")) of
|
||||||
|
{_, Invalid} when Invalid =/= [] ->
|
||||||
|
{error, io_lib:format("invalid --nodes, invalid nodes: ~p", [Invalid])};
|
||||||
|
{Nodes, []} ->
|
||||||
|
case emqx_node_rebalance:available_nodes(Nodes) of
|
||||||
|
[] ->
|
||||||
|
{error, "invalid --nodes, no nodes"};
|
||||||
|
Nodes ->
|
||||||
|
validate_rebalance(Rest, Map#{nodes => Nodes});
|
||||||
|
OtherNodes ->
|
||||||
|
{error,
|
||||||
|
io_lib:format(
|
||||||
|
"invalid --nodes, unavailable nodes: ~p",
|
||||||
|
[Nodes -- OtherNodes]
|
||||||
|
)}
|
||||||
|
end
|
||||||
|
end;
|
||||||
|
validate_rebalance(Rest, _Map) ->
|
||||||
|
{error, io_lib:format("unknown rebalance arguments: ~p", [Rest])}.
|
||||||
|
|
||||||
|
validate_fraction(Name, [{OptionName, Value} | Rest], Map, Next) ->
|
||||||
|
case string:to_float(Value) of
|
||||||
|
{Num, ""} when Num > 1.0 ->
|
||||||
|
Next(Rest, Map#{Name => Num});
|
||||||
|
_ ->
|
||||||
|
{error, "invalid " ++ OptionName ++ " value"}
|
||||||
|
end.
|
||||||
|
|
||||||
|
validate_pos_int(Name, [{OptionName, Value} | Rest], Map, Next) ->
|
||||||
|
case string:to_integer(Value) of
|
||||||
|
{Int, ""} when Int > 0 ->
|
||||||
|
Next(Rest, Map#{Name => Int});
|
||||||
|
_ ->
|
||||||
|
{error, "invalid " ++ OptionName ++ " value"}
|
||||||
|
end.
|
||||||
|
|
||||||
|
strings_to_atoms(Strings) ->
|
||||||
|
strings_to_atoms(Strings, [], []).
|
||||||
|
|
||||||
|
strings_to_atoms([], Atoms, Invalid) ->
|
||||||
|
{lists:reverse(Atoms), lists:reverse(Invalid)};
|
||||||
|
strings_to_atoms([Str | Rest], Atoms, Invalid) ->
|
||||||
|
case emqx_utils:safe_to_existing_atom(Str, utf8) of
|
||||||
|
{ok, Atom} ->
|
||||||
|
strings_to_atoms(Rest, [Atom | Atoms], Invalid);
|
||||||
|
{error, _} ->
|
||||||
|
strings_to_atoms(Rest, Atoms, [Str | Invalid])
|
||||||
|
end.
|
|
@ -0,0 +1,308 @@
|
||||||
|
%%--------------------------------------------------------------------
|
||||||
|
%% Copyright (c) 2022-2023 EMQ Technologies Co., Ltd. All Rights Reserved.
|
||||||
|
%%--------------------------------------------------------------------
|
||||||
|
|
||||||
|
-module(emqx_node_rebalance_evacuation).
|
||||||
|
|
||||||
|
-include("emqx_node_rebalance.hrl").
|
||||||
|
|
||||||
|
-include_lib("emqx/include/logger.hrl").
|
||||||
|
-include_lib("emqx/include/types.hrl").
|
||||||
|
-include_lib("snabbkaffe/include/snabbkaffe.hrl").
|
||||||
|
|
||||||
|
-export([
|
||||||
|
start/1,
|
||||||
|
status/0,
|
||||||
|
stop/0
|
||||||
|
]).
|
||||||
|
|
||||||
|
-export([start_link/0]).
|
||||||
|
|
||||||
|
-behaviour(gen_statem).
|
||||||
|
|
||||||
|
-export([
|
||||||
|
init/1,
|
||||||
|
callback_mode/0,
|
||||||
|
handle_event/4,
|
||||||
|
code_change/4
|
||||||
|
]).
|
||||||
|
|
||||||
|
-export([
|
||||||
|
is_node_available/0,
|
||||||
|
available_nodes/1
|
||||||
|
]).
|
||||||
|
|
||||||
|
-export_type([
|
||||||
|
start_opts/0,
|
||||||
|
start_error/0
|
||||||
|
]).
|
||||||
|
|
||||||
|
-ifdef(TEST).
|
||||||
|
-export([migrate_to/1]).
|
||||||
|
-endif.
|
||||||
|
|
||||||
|
%%--------------------------------------------------------------------
|
||||||
|
%% APIs
|
||||||
|
%%--------------------------------------------------------------------
|
||||||
|
|
||||||
|
-define(EVICT_INTERVAL_NO_NODES, 30000).
|
||||||
|
|
||||||
|
-type migrate_to() :: [node()] | undefined.
|
||||||
|
|
||||||
|
-type start_opts() :: #{
|
||||||
|
server_reference => emqx_eviction_agent:server_reference(),
|
||||||
|
conn_evict_rate => pos_integer(),
|
||||||
|
sess_evict_rate => pos_integer(),
|
||||||
|
wait_takeover => pos_integer(),
|
||||||
|
migrate_to => migrate_to()
|
||||||
|
}.
|
||||||
|
-type start_error() :: already_started | eviction_agent_busy.
|
||||||
|
-type stats() :: #{
|
||||||
|
initial_conns := non_neg_integer(),
|
||||||
|
initial_sessions := non_neg_integer(),
|
||||||
|
current_conns := non_neg_integer(),
|
||||||
|
current_sessions := non_neg_integer(),
|
||||||
|
conn_evict_rate := pos_integer(),
|
||||||
|
sess_evict_rate := pos_integer(),
|
||||||
|
server_reference := emqx_eviction_agent:server_reference(),
|
||||||
|
migrate_to := migrate_to()
|
||||||
|
}.
|
||||||
|
-type status() :: {enabled, stats()} | disabled.
|
||||||
|
|
||||||
|
-spec start(start_opts()) -> ok_or_error(start_error()).
|
||||||
|
start(StartOpts) ->
|
||||||
|
Opts = maps:merge(default_opts(), StartOpts),
|
||||||
|
gen_statem:call(?MODULE, {start, Opts}).
|
||||||
|
|
||||||
|
-spec stop() -> ok_or_error(not_started).
|
||||||
|
stop() ->
|
||||||
|
gen_statem:call(?MODULE, stop).
|
||||||
|
|
||||||
|
-spec status() -> status().
|
||||||
|
status() ->
|
||||||
|
gen_statem:call(?MODULE, status).
|
||||||
|
|
||||||
|
-spec start_link() -> startlink_ret().
|
||||||
|
start_link() ->
|
||||||
|
gen_statem:start_link({local, ?MODULE}, ?MODULE, [], []).
|
||||||
|
|
||||||
|
-spec available_nodes(list(node())) -> list(node()).
|
||||||
|
available_nodes(Nodes) when is_list(Nodes) ->
|
||||||
|
{Available, _} = emqx_node_rebalance_evacuation_proto_v1:available_nodes(Nodes),
|
||||||
|
lists:filter(fun is_atom/1, Available).
|
||||||
|
|
||||||
|
%%--------------------------------------------------------------------
|
||||||
|
%% gen_statem callbacks
|
||||||
|
%%--------------------------------------------------------------------
|
||||||
|
|
||||||
|
callback_mode() -> handle_event_function.
|
||||||
|
|
||||||
|
%% states: disabled, evicting_conns, waiting_takeover, evicting_sessions, prohibiting
|
||||||
|
|
||||||
|
init([]) ->
|
||||||
|
case emqx_node_rebalance_evacuation_persist:read(default_opts()) of
|
||||||
|
{ok, #{server_reference := ServerReference} = Opts} ->
|
||||||
|
?SLOG(warning, #{msg => "restoring_evacuation_state", opts => Opts}),
|
||||||
|
case emqx_eviction_agent:enable(?MODULE, ServerReference) of
|
||||||
|
ok ->
|
||||||
|
Data = init_data(#{}, Opts),
|
||||||
|
ok = warn_enabled(),
|
||||||
|
{ok, evicting_conns, Data, [{state_timeout, 0, evict_conns}]};
|
||||||
|
{error, eviction_agent_busy} ->
|
||||||
|
emqx_node_rebalance_evacuation_persist:clear(),
|
||||||
|
{ok, disabled, #{}}
|
||||||
|
end;
|
||||||
|
none ->
|
||||||
|
{ok, disabled, #{}}
|
||||||
|
end.
|
||||||
|
|
||||||
|
%% start
|
||||||
|
handle_event(
|
||||||
|
{call, From},
|
||||||
|
{start, #{server_reference := ServerReference} = Opts},
|
||||||
|
disabled,
|
||||||
|
#{} = Data
|
||||||
|
) ->
|
||||||
|
case emqx_eviction_agent:enable(?MODULE, ServerReference) of
|
||||||
|
ok ->
|
||||||
|
NewData = init_data(Data, Opts),
|
||||||
|
ok = emqx_node_rebalance_evacuation_persist:save(Opts),
|
||||||
|
?SLOG(warning, #{
|
||||||
|
msg => "node_evacuation_started",
|
||||||
|
opts => Opts
|
||||||
|
}),
|
||||||
|
{next_state, evicting_conns, NewData, [
|
||||||
|
{state_timeout, 0, evict_conns},
|
||||||
|
{reply, From, ok}
|
||||||
|
]};
|
||||||
|
{error, eviction_agent_busy} ->
|
||||||
|
{keep_state_and_data, [{reply, From, {error, eviction_agent_busy}}]}
|
||||||
|
end;
|
||||||
|
handle_event({call, From}, {start, _Opts}, _State, #{}) ->
|
||||||
|
{keep_state_and_data, [{reply, From, {error, already_started}}]};
|
||||||
|
%% stop
|
||||||
|
handle_event({call, From}, stop, disabled, #{}) ->
|
||||||
|
{keep_state_and_data, [{reply, From, {error, not_started}}]};
|
||||||
|
handle_event({call, From}, stop, _State, Data) ->
|
||||||
|
ok = emqx_node_rebalance_evacuation_persist:clear(),
|
||||||
|
_ = emqx_eviction_agent:disable(?MODULE),
|
||||||
|
?SLOG(warning, #{msg => "node_evacuation_stopped"}),
|
||||||
|
{next_state, disabled, deinit(Data), [{reply, From, ok}]};
|
||||||
|
%% status
|
||||||
|
handle_event({call, From}, status, disabled, #{}) ->
|
||||||
|
{keep_state_and_data, [{reply, From, disabled}]};
|
||||||
|
handle_event({call, From}, status, State, #{migrate_to := MigrateTo} = Data) ->
|
||||||
|
Stats = maps:with(
|
||||||
|
[
|
||||||
|
initial_conns,
|
||||||
|
current_conns,
|
||||||
|
initial_sessions,
|
||||||
|
current_sessions,
|
||||||
|
server_reference,
|
||||||
|
conn_evict_rate,
|
||||||
|
sess_evict_rate
|
||||||
|
],
|
||||||
|
Data
|
||||||
|
),
|
||||||
|
{keep_state_and_data, [
|
||||||
|
{reply, From, {enabled, Stats#{state => State, migrate_to => migrate_to(MigrateTo)}}}
|
||||||
|
]};
|
||||||
|
%% conn eviction
|
||||||
|
handle_event(
|
||||||
|
state_timeout,
|
||||||
|
evict_conns,
|
||||||
|
evicting_conns,
|
||||||
|
#{
|
||||||
|
conn_evict_rate := ConnEvictRate,
|
||||||
|
wait_takeover := WaitTakeover
|
||||||
|
} = Data
|
||||||
|
) ->
|
||||||
|
case emqx_eviction_agent:status() of
|
||||||
|
{enabled, #{connections := Conns}} when Conns > 0 ->
|
||||||
|
ok = emqx_eviction_agent:evict_connections(ConnEvictRate),
|
||||||
|
?tp(debug, node_evacuation_evict_conn, #{conn_evict_rate => ConnEvictRate}),
|
||||||
|
?SLOG(
|
||||||
|
warning,
|
||||||
|
#{
|
||||||
|
msg => "node_evacuation_evict_conns",
|
||||||
|
count => Conns,
|
||||||
|
conn_evict_rate => ConnEvictRate
|
||||||
|
}
|
||||||
|
),
|
||||||
|
NewData = Data#{current_conns => Conns},
|
||||||
|
{keep_state, NewData, [{state_timeout, ?EVICT_INTERVAL, evict_conns}]};
|
||||||
|
{enabled, #{connections := 0}} ->
|
||||||
|
NewData = Data#{current_conns => 0},
|
||||||
|
?SLOG(warning, #{msg => "node_evacuation_evict_conns_done"}),
|
||||||
|
{next_state, waiting_takeover, NewData, [
|
||||||
|
{state_timeout, timer:seconds(WaitTakeover), evict_sessions}
|
||||||
|
]}
|
||||||
|
end;
|
||||||
|
handle_event(
|
||||||
|
state_timeout,
|
||||||
|
evict_sessions,
|
||||||
|
waiting_takeover,
|
||||||
|
Data
|
||||||
|
) ->
|
||||||
|
?SLOG(warning, #{msg => "node_evacuation_waiting_takeover_done"}),
|
||||||
|
{next_state, evicting_sessions, Data, [{state_timeout, 0, evict_sessions}]};
|
||||||
|
%% session eviction
|
||||||
|
handle_event(
|
||||||
|
state_timeout,
|
||||||
|
evict_sessions,
|
||||||
|
evicting_sessions,
|
||||||
|
#{
|
||||||
|
sess_evict_rate := SessEvictRate,
|
||||||
|
migrate_to := MigrateTo,
|
||||||
|
current_sessions := CurrSessCount
|
||||||
|
} = Data
|
||||||
|
) ->
|
||||||
|
case emqx_eviction_agent:status() of
|
||||||
|
{enabled, #{sessions := SessCount}} when SessCount > 0 ->
|
||||||
|
case migrate_to(MigrateTo) of
|
||||||
|
[] ->
|
||||||
|
?SLOG(warning, #{
|
||||||
|
msg => "no_nodes_to_evacuate_sessions", session_count => CurrSessCount
|
||||||
|
}),
|
||||||
|
{keep_state_and_data, [
|
||||||
|
{state_timeout, ?EVICT_INTERVAL_NO_NODES, evict_sessions}
|
||||||
|
]};
|
||||||
|
Nodes ->
|
||||||
|
ok = emqx_eviction_agent:evict_sessions(SessEvictRate, Nodes),
|
||||||
|
?SLOG(
|
||||||
|
warning,
|
||||||
|
#{
|
||||||
|
msg => "node_evacuation_evict_sessions",
|
||||||
|
session_count => SessCount,
|
||||||
|
session_evict_rate => SessEvictRate,
|
||||||
|
target_nodes => Nodes
|
||||||
|
}
|
||||||
|
),
|
||||||
|
NewData = Data#{current_sessions => SessCount},
|
||||||
|
{keep_state, NewData, [{state_timeout, ?EVICT_INTERVAL, evict_sessions}]}
|
||||||
|
end;
|
||||||
|
{enabled, #{sessions := 0}} ->
|
||||||
|
?tp(debug, node_evacuation_evict_sess_over, #{}),
|
||||||
|
?SLOG(warning, #{msg => "node_evacuation_evict_sessions_over"}),
|
||||||
|
NewData = Data#{current_sessions => 0},
|
||||||
|
{next_state, prohibiting, NewData}
|
||||||
|
end;
|
||||||
|
handle_event({call, From}, Msg, State, Data) ->
|
||||||
|
?SLOG(warning, #{msg => "unknown_call", call => Msg, state => State, data => Data}),
|
||||||
|
{keep_state_and_data, [{reply, From, ignored}]};
|
||||||
|
handle_event(info, Msg, State, Data) ->
|
||||||
|
?SLOG(warning, #{msg => "unknown_info", info => Msg, state => State, data => Data}),
|
||||||
|
keep_state_and_data;
|
||||||
|
handle_event(cast, Msg, State, Data) ->
|
||||||
|
?SLOG(warning, #{msg => "unknown_cast", cast => Msg, state => State, data => Data}),
|
||||||
|
keep_state_and_data.
|
||||||
|
|
||||||
|
code_change(_Vsn, State, Data, _Extra) ->
|
||||||
|
{ok, State, Data}.
|
||||||
|
|
||||||
|
%%--------------------------------------------------------------------
|
||||||
|
%% internal funs
|
||||||
|
%%--------------------------------------------------------------------
|
||||||
|
|
||||||
|
default_opts() ->
|
||||||
|
#{
|
||||||
|
server_reference => undefined,
|
||||||
|
conn_evict_rate => ?DEFAULT_CONN_EVICT_RATE,
|
||||||
|
sess_evict_rate => ?DEFAULT_SESS_EVICT_RATE,
|
||||||
|
wait_takeover => ?DEFAULT_WAIT_TAKEOVER,
|
||||||
|
migrate_to => undefined
|
||||||
|
}.
|
||||||
|
|
||||||
|
init_data(Data0, Opts) ->
|
||||||
|
Data1 = maps:merge(Data0, Opts),
|
||||||
|
{enabled, #{connections := ConnCount, sessions := SessCount}} = emqx_eviction_agent:status(),
|
||||||
|
Data1#{
|
||||||
|
initial_conns => ConnCount,
|
||||||
|
current_conns => ConnCount,
|
||||||
|
initial_sessions => SessCount,
|
||||||
|
current_sessions => SessCount
|
||||||
|
}.
|
||||||
|
|
||||||
|
deinit(Data) ->
|
||||||
|
Keys =
|
||||||
|
[initial_conns, current_conns, initial_sessions, current_sessions] ++
|
||||||
|
maps:keys(default_opts()),
|
||||||
|
maps:without(Keys, Data).
|
||||||
|
|
||||||
|
warn_enabled() ->
|
||||||
|
?SLOG(warning, #{msg => "node_evacuation_enabled"}),
|
||||||
|
io:format(
|
||||||
|
standard_error, "Node evacuation is enabled. The node will not receive connections.~n", []
|
||||||
|
).
|
||||||
|
|
||||||
|
migrate_to(undefined) ->
|
||||||
|
migrate_to(all_nodes());
|
||||||
|
migrate_to(Nodes) when is_list(Nodes) ->
|
||||||
|
available_nodes(Nodes).
|
||||||
|
|
||||||
|
is_node_available() ->
|
||||||
|
disabled = emqx_eviction_agent:status(),
|
||||||
|
node().
|
||||||
|
|
||||||
|
all_nodes() ->
|
||||||
|
mria_mnesia:running_nodes() -- [node()].
|
|
@ -0,0 +1,120 @@
|
||||||
|
%%--------------------------------------------------------------------
|
||||||
|
%% Copyright (c) 2022-2023 EMQ Technologies Co., Ltd. All Rights Reserved.
|
||||||
|
%%--------------------------------------------------------------------
|
||||||
|
|
||||||
|
-module(emqx_node_rebalance_evacuation_persist).
|
||||||
|
|
||||||
|
-export([
|
||||||
|
save/1,
|
||||||
|
clear/0,
|
||||||
|
read/1
|
||||||
|
]).
|
||||||
|
|
||||||
|
-ifdef(TEST).
|
||||||
|
-export([evacuation_filepath/0]).
|
||||||
|
-endif.
|
||||||
|
|
||||||
|
-include("emqx_node_rebalance.hrl").
|
||||||
|
-include_lib("emqx/include/types.hrl").
|
||||||
|
|
||||||
|
%%--------------------------------------------------------------------
|
||||||
|
%% APIs
|
||||||
|
%%--------------------------------------------------------------------
|
||||||
|
|
||||||
|
%% do not persist `migrate_to`:
|
||||||
|
%% * after restart there is nothing to migrate
|
||||||
|
%% * this value may be invalid after node was offline
|
||||||
|
-type persisted_start_opts() :: #{
|
||||||
|
server_reference => emqx_eviction_agent:server_reference(),
|
||||||
|
conn_evict_rate => pos_integer(),
|
||||||
|
sess_evict_rate => pos_integer(),
|
||||||
|
wait_takeover => pos_integer()
|
||||||
|
}.
|
||||||
|
-type start_opts() :: #{
|
||||||
|
server_reference => emqx_eviction_agent:server_reference(),
|
||||||
|
conn_evict_rate => pos_integer(),
|
||||||
|
sess_evict_rate => pos_integer(),
|
||||||
|
wait_takeover => pos_integer(),
|
||||||
|
migrate_to => emqx_node_rebalance_evacuation:migrate_to()
|
||||||
|
}.
|
||||||
|
|
||||||
|
-spec save(persisted_start_opts()) -> ok_or_error(term()).
|
||||||
|
save(
|
||||||
|
#{
|
||||||
|
server_reference := ServerReference,
|
||||||
|
conn_evict_rate := ConnEvictRate,
|
||||||
|
sess_evict_rate := SessEvictRate,
|
||||||
|
wait_takeover := WaitTakeover
|
||||||
|
} = Data
|
||||||
|
) when
|
||||||
|
(is_binary(ServerReference) orelse ServerReference =:= undefined) andalso
|
||||||
|
is_integer(ConnEvictRate) andalso ConnEvictRate > 0 andalso
|
||||||
|
is_integer(SessEvictRate) andalso SessEvictRate > 0 andalso
|
||||||
|
is_integer(WaitTakeover) andalso WaitTakeover >= 0
|
||||||
|
->
|
||||||
|
Filepath = evacuation_filepath(),
|
||||||
|
case filelib:ensure_dir(Filepath) of
|
||||||
|
ok ->
|
||||||
|
JsonData = emqx_utils_json:encode(
|
||||||
|
prepare_for_encode(maps:with(persist_keys(), Data)),
|
||||||
|
[pretty]
|
||||||
|
),
|
||||||
|
file:write_file(Filepath, JsonData);
|
||||||
|
{error, _} = Error ->
|
||||||
|
Error
|
||||||
|
end.
|
||||||
|
|
||||||
|
-spec clear() -> ok.
|
||||||
|
clear() ->
|
||||||
|
file:delete(evacuation_filepath()).
|
||||||
|
|
||||||
|
-spec read(start_opts()) -> {ok, start_opts()} | none.
|
||||||
|
read(DefaultOpts) ->
|
||||||
|
case file:read_file(evacuation_filepath()) of
|
||||||
|
{ok, Data} ->
|
||||||
|
case emqx_utils_json:safe_decode(Data, [return_maps]) of
|
||||||
|
{ok, Map} when is_map(Map) ->
|
||||||
|
{ok, map_to_opts(DefaultOpts, Map)};
|
||||||
|
_NotAMap ->
|
||||||
|
{ok, DefaultOpts}
|
||||||
|
end;
|
||||||
|
{error, _} ->
|
||||||
|
none
|
||||||
|
end.
|
||||||
|
|
||||||
|
%%--------------------------------------------------------------------
|
||||||
|
%% Internal funcs
|
||||||
|
%%--------------------------------------------------------------------
|
||||||
|
|
||||||
|
persist_keys() ->
|
||||||
|
[
|
||||||
|
server_reference,
|
||||||
|
conn_evict_rate,
|
||||||
|
sess_evict_rate,
|
||||||
|
wait_takeover
|
||||||
|
].
|
||||||
|
|
||||||
|
prepare_for_encode(#{server_reference := undefined} = Data) ->
|
||||||
|
Data#{server_reference => null};
|
||||||
|
prepare_for_encode(Data) ->
|
||||||
|
Data.
|
||||||
|
|
||||||
|
format_after_decode(#{server_reference := null} = Data) ->
|
||||||
|
Data#{server_reference => undefined};
|
||||||
|
format_after_decode(Data) ->
|
||||||
|
Data.
|
||||||
|
|
||||||
|
map_to_opts(DefaultOpts, Map) ->
|
||||||
|
format_after_decode(
|
||||||
|
map_to_opts(
|
||||||
|
maps:to_list(DefaultOpts), Map, #{}
|
||||||
|
)
|
||||||
|
).
|
||||||
|
|
||||||
|
map_to_opts([], _Map, Opts) ->
|
||||||
|
Opts;
|
||||||
|
map_to_opts([{Key, DefaultVal} | Rest], Map, Opts) ->
|
||||||
|
map_to_opts(Rest, Map, Opts#{Key => maps:get(atom_to_binary(Key), Map, DefaultVal)}).
|
||||||
|
|
||||||
|
evacuation_filepath() ->
|
||||||
|
filename:join([emqx:data_dir(), ?EVACUATION_FILENAME]).
|
|
@ -0,0 +1,238 @@
|
||||||
|
%%--------------------------------------------------------------------
|
||||||
|
%% Copyright (c) 2022-2023 EMQ Technologies Co., Ltd. All Rights Reserved.
|
||||||
|
%%--------------------------------------------------------------------
|
||||||
|
|
||||||
|
-module(emqx_node_rebalance_status).
|
||||||
|
|
||||||
|
-export([
|
||||||
|
local_status/0,
|
||||||
|
local_status/1,
|
||||||
|
global_status/0,
|
||||||
|
format_local_status/1,
|
||||||
|
format_coordinator_status/1
|
||||||
|
]).
|
||||||
|
|
||||||
|
%% For RPC
|
||||||
|
-export([
|
||||||
|
evacuation_status/0,
|
||||||
|
rebalance_status/0
|
||||||
|
]).
|
||||||
|
|
||||||
|
%%--------------------------------------------------------------------
|
||||||
|
%% APIs
|
||||||
|
%%--------------------------------------------------------------------
|
||||||
|
|
||||||
|
-spec local_status() -> disabled | {evacuation, map()} | {rebalance, map()}.
|
||||||
|
local_status() ->
|
||||||
|
case emqx_node_rebalance_evacuation:status() of
|
||||||
|
{enabled, Status} ->
|
||||||
|
{evacuation, evacuation(Status)};
|
||||||
|
disabled ->
|
||||||
|
case emqx_node_rebalance_agent:status() of
|
||||||
|
{enabled, CoordinatorPid} ->
|
||||||
|
case emqx_node_rebalance:status(CoordinatorPid) of
|
||||||
|
{enabled, Status} ->
|
||||||
|
local_rebalance(Status, node());
|
||||||
|
disabled ->
|
||||||
|
disabled
|
||||||
|
end;
|
||||||
|
disabled ->
|
||||||
|
disabled
|
||||||
|
end
|
||||||
|
end.
|
||||||
|
|
||||||
|
-spec local_status(node()) -> disabled | {evacuation, map()} | {rebalance, map()}.
|
||||||
|
local_status(Node) ->
|
||||||
|
emqx_node_rebalance_status_proto_v1:local_status(Node).
|
||||||
|
|
||||||
|
-spec format_local_status(map()) -> iodata().
|
||||||
|
format_local_status(Status) ->
|
||||||
|
format_status(Status, local_status_field_format_order()).
|
||||||
|
|
||||||
|
-spec global_status() -> #{rebalances := [{node(), map()}], evacuations := [{node(), map()}]}.
|
||||||
|
global_status() ->
|
||||||
|
Nodes = mria_mnesia:running_nodes(),
|
||||||
|
{RebalanceResults, _} = emqx_node_rebalance_status_proto_v1:rebalance_status(Nodes),
|
||||||
|
Rebalances = [
|
||||||
|
{Node, coordinator_rebalance(Status)}
|
||||||
|
|| {Node, {enabled, Status}} <- RebalanceResults
|
||||||
|
],
|
||||||
|
{EvacuatioResults, _} = emqx_node_rebalance_status_proto_v1:evacuation_status(Nodes),
|
||||||
|
Evacuations = [{Node, evacuation(Status)} || {Node, {enabled, Status}} <- EvacuatioResults],
|
||||||
|
#{rebalances => Rebalances, evacuations => Evacuations}.
|
||||||
|
|
||||||
|
-spec format_coordinator_status(map()) -> iodata().
|
||||||
|
format_coordinator_status(Status) ->
|
||||||
|
format_status(Status, coordinator_status_field_format_order()).
|
||||||
|
|
||||||
|
%%--------------------------------------------------------------------
|
||||||
|
%% Internal functions
|
||||||
|
%%--------------------------------------------------------------------
|
||||||
|
|
||||||
|
evacuation(Status) ->
|
||||||
|
#{
|
||||||
|
state => maps:get(state, Status),
|
||||||
|
connection_eviction_rate => maps:get(conn_evict_rate, Status),
|
||||||
|
session_eviction_rate => maps:get(sess_evict_rate, Status),
|
||||||
|
connection_goal => 0,
|
||||||
|
session_goal => 0,
|
||||||
|
session_recipients => maps:get(migrate_to, Status),
|
||||||
|
stats => #{
|
||||||
|
initial_connected => maps:get(initial_conns, Status),
|
||||||
|
current_connected => maps:get(current_conns, Status),
|
||||||
|
initial_sessions => maps:get(initial_sessions, Status),
|
||||||
|
current_sessions => maps:get(current_sessions, Status)
|
||||||
|
}
|
||||||
|
}.
|
||||||
|
|
||||||
|
local_rebalance(#{donors := Donors} = Stats, Node) ->
|
||||||
|
case lists:member(Node, Donors) of
|
||||||
|
true -> {rebalance, donor_rebalance(Stats, Node)};
|
||||||
|
false -> disabled
|
||||||
|
end.
|
||||||
|
|
||||||
|
donor_rebalance(Status, Node) ->
|
||||||
|
Opts = maps:get(opts, Status),
|
||||||
|
InitialConnCounts = maps:get(initial_conn_counts, Status),
|
||||||
|
InitialSessCounts = maps:get(initial_sess_counts, Status),
|
||||||
|
|
||||||
|
CurrentStats = #{
|
||||||
|
initial_connected => maps:get(Node, InitialConnCounts),
|
||||||
|
initial_sessions => maps:get(Node, InitialSessCounts),
|
||||||
|
current_connected => emqx_eviction_agent:connection_count(),
|
||||||
|
current_sessions => emqx_eviction_agent:session_count(),
|
||||||
|
current_disconnected_sessions => emqx_eviction_agent:session_count(
|
||||||
|
disconnected
|
||||||
|
)
|
||||||
|
},
|
||||||
|
maps:from_list(
|
||||||
|
[
|
||||||
|
{state, maps:get(state, Status)},
|
||||||
|
{coordinator_node, maps:get(coordinator_node, Status)},
|
||||||
|
{connection_eviction_rate, maps:get(conn_evict_rate, Opts)},
|
||||||
|
{session_eviction_rate, maps:get(sess_evict_rate, Opts)},
|
||||||
|
{recipients, maps:get(recipients, Status)},
|
||||||
|
{stats, CurrentStats}
|
||||||
|
] ++
|
||||||
|
[
|
||||||
|
{connection_goal, maps:get(recipient_conn_avg, Status)}
|
||||||
|
|| maps:is_key(recipient_conn_avg, Status)
|
||||||
|
] ++
|
||||||
|
[
|
||||||
|
{disconnected_session_goal, maps:get(recipient_sess_avg, Status)}
|
||||||
|
|| maps:is_key(recipient_sess_avg, Status)
|
||||||
|
]
|
||||||
|
).
|
||||||
|
|
||||||
|
coordinator_rebalance(Status) ->
|
||||||
|
Opts = maps:get(opts, Status),
|
||||||
|
maps:from_list(
|
||||||
|
[
|
||||||
|
{state, maps:get(state, Status)},
|
||||||
|
{coordinator_node, maps:get(coordinator_node, Status)},
|
||||||
|
{connection_eviction_rate, maps:get(conn_evict_rate, Opts)},
|
||||||
|
{session_eviction_rate, maps:get(sess_evict_rate, Opts)},
|
||||||
|
{recipients, maps:get(recipients, Status)},
|
||||||
|
{donors, maps:get(donors, Status)}
|
||||||
|
] ++
|
||||||
|
[
|
||||||
|
{connection_goal, maps:get(recipient_conn_avg, Status)}
|
||||||
|
|| maps:is_key(recipient_conn_avg, Status)
|
||||||
|
] ++
|
||||||
|
[
|
||||||
|
{disconnected_session_goal, maps:get(recipient_sess_avg, Status)}
|
||||||
|
|| maps:is_key(recipient_sess_avg, Status)
|
||||||
|
] ++
|
||||||
|
[
|
||||||
|
{donor_conn_avg, maps:get(donor_conn_avg, Status)}
|
||||||
|
|| maps:is_key(donor_conn_avg, Status)
|
||||||
|
] ++
|
||||||
|
[
|
||||||
|
{donor_sess_avg, maps:get(donor_sess_avg, Status)}
|
||||||
|
|| maps:is_key(donor_sess_avg, Status)
|
||||||
|
]
|
||||||
|
).
|
||||||
|
|
||||||
|
local_status_field_format_order() ->
|
||||||
|
[
|
||||||
|
state,
|
||||||
|
coordinator_node,
|
||||||
|
connection_eviction_rate,
|
||||||
|
session_eviction_rate,
|
||||||
|
connection_goal,
|
||||||
|
session_goal,
|
||||||
|
disconnected_session_goal,
|
||||||
|
session_recipients,
|
||||||
|
recipients,
|
||||||
|
stats
|
||||||
|
].
|
||||||
|
|
||||||
|
coordinator_status_field_format_order() ->
|
||||||
|
[
|
||||||
|
state,
|
||||||
|
coordinator_node,
|
||||||
|
donors,
|
||||||
|
recipients,
|
||||||
|
connection_eviction_rate,
|
||||||
|
session_eviction_rate,
|
||||||
|
connection_goal,
|
||||||
|
disconnected_session_goal,
|
||||||
|
donor_conn_avg,
|
||||||
|
donor_sess_avg
|
||||||
|
].
|
||||||
|
|
||||||
|
format_status(Status, FieldOrder) ->
|
||||||
|
Fields = lists:flatmap(
|
||||||
|
fun(FieldName) ->
|
||||||
|
maps:to_list(maps:with([FieldName], Status))
|
||||||
|
end,
|
||||||
|
FieldOrder
|
||||||
|
),
|
||||||
|
lists:map(
|
||||||
|
fun format_local_status_field/1,
|
||||||
|
Fields
|
||||||
|
).
|
||||||
|
|
||||||
|
format_local_status_field({state, State}) ->
|
||||||
|
io_lib:format("Rebalance state: ~p~n", [State]);
|
||||||
|
format_local_status_field({coordinator_node, Node}) ->
|
||||||
|
io_lib:format("Coordinator node: ~p~n", [Node]);
|
||||||
|
format_local_status_field({connection_eviction_rate, ConnEvictRate}) ->
|
||||||
|
io_lib:format("Connection eviction rate: ~p connections/second~n", [ConnEvictRate]);
|
||||||
|
format_local_status_field({session_eviction_rate, SessEvictRate}) ->
|
||||||
|
io_lib:format("Session eviction rate: ~p sessions/second~n", [SessEvictRate]);
|
||||||
|
format_local_status_field({connection_goal, ConnGoal}) ->
|
||||||
|
io_lib:format("Connection goal: ~p~n", [ConnGoal]);
|
||||||
|
format_local_status_field({session_goal, SessGoal}) ->
|
||||||
|
io_lib:format("Session goal: ~p~n", [SessGoal]);
|
||||||
|
format_local_status_field({disconnected_session_goal, DisconnSessGoal}) ->
|
||||||
|
io_lib:format("Disconnected session goal: ~p~n", [DisconnSessGoal]);
|
||||||
|
format_local_status_field({session_recipients, SessionRecipients}) ->
|
||||||
|
io_lib:format("Session recipient nodes: ~p~n", [SessionRecipients]);
|
||||||
|
format_local_status_field({recipients, Recipients}) ->
|
||||||
|
io_lib:format("Recipient nodes: ~p~n", [Recipients]);
|
||||||
|
format_local_status_field({donors, Donors}) ->
|
||||||
|
io_lib:format("Donor nodes: ~p~n", [Donors]);
|
||||||
|
format_local_status_field({donor_conn_avg, DonorConnAvg}) ->
|
||||||
|
io_lib:format("Current average donor node connection count: ~p~n", [DonorConnAvg]);
|
||||||
|
format_local_status_field({donor_sess_avg, DonorSessAvg}) ->
|
||||||
|
io_lib:format("Current average donor node disconnected session count: ~p~n", [DonorSessAvg]);
|
||||||
|
format_local_status_field({stats, Stats}) ->
|
||||||
|
format_local_stats(Stats).
|
||||||
|
|
||||||
|
format_local_stats(Stats) ->
|
||||||
|
[
|
||||||
|
"Channel statistics:\n"
|
||||||
|
| lists:map(
|
||||||
|
fun({Name, Value}) ->
|
||||||
|
io_lib:format(" ~p: ~p~n", [Name, Value])
|
||||||
|
end,
|
||||||
|
maps:to_list(Stats)
|
||||||
|
)
|
||||||
|
].
|
||||||
|
|
||||||
|
evacuation_status() ->
|
||||||
|
{node(), emqx_node_rebalance_evacuation:status()}.
|
||||||
|
|
||||||
|
rebalance_status() ->
|
||||||
|
{node(), emqx_node_rebalance:status()}.
|
|
@ -0,0 +1,35 @@
|
||||||
|
%%--------------------------------------------------------------------
|
||||||
|
%% Copyright (c) 2022-2023 EMQ Technologies Co., Ltd. All Rights Reserved.
|
||||||
|
%%--------------------------------------------------------------------
|
||||||
|
|
||||||
|
-module(emqx_node_rebalance_sup).
|
||||||
|
|
||||||
|
-behaviour(supervisor).
|
||||||
|
|
||||||
|
-export([start_link/0]).
|
||||||
|
|
||||||
|
-export([init/1]).
|
||||||
|
|
||||||
|
start_link() ->
|
||||||
|
supervisor:start_link({local, ?MODULE}, ?MODULE, []).
|
||||||
|
|
||||||
|
init([]) ->
|
||||||
|
Childs = [
|
||||||
|
child_spec(emqx_node_rebalance_evacuation, []),
|
||||||
|
child_spec(emqx_node_rebalance_agent, []),
|
||||||
|
child_spec(emqx_node_rebalance, [])
|
||||||
|
],
|
||||||
|
{ok, {
|
||||||
|
#{strategy => one_for_one, intensity => 10, period => 3600},
|
||||||
|
Childs
|
||||||
|
}}.
|
||||||
|
|
||||||
|
child_spec(Mod, Args) ->
|
||||||
|
#{
|
||||||
|
id => Mod,
|
||||||
|
start => {Mod, start_link, Args},
|
||||||
|
restart => permanent,
|
||||||
|
shutdown => 5000,
|
||||||
|
type => worker,
|
||||||
|
modules => [Mod]
|
||||||
|
}.
|
|
@ -0,0 +1,43 @@
|
||||||
|
%%--------------------------------------------------------------------
|
||||||
|
%% Copyright (c) 2022-2023 EMQ Technologies Co., Ltd. All Rights Reserved.
|
||||||
|
%%--------------------------------------------------------------------
|
||||||
|
|
||||||
|
-module(emqx_node_rebalance_api_proto_v1).
|
||||||
|
|
||||||
|
-behaviour(emqx_bpapi).
|
||||||
|
|
||||||
|
-export([
|
||||||
|
introduced_in/0,
|
||||||
|
|
||||||
|
node_rebalance_evacuation_start/2,
|
||||||
|
node_rebalance_evacuation_stop/1,
|
||||||
|
|
||||||
|
node_rebalance_start/2,
|
||||||
|
node_rebalance_stop/1
|
||||||
|
]).
|
||||||
|
|
||||||
|
-include_lib("emqx/include/bpapi.hrl").
|
||||||
|
-include_lib("emqx/include/types.hrl").
|
||||||
|
|
||||||
|
introduced_in() ->
|
||||||
|
"5.0.22".
|
||||||
|
|
||||||
|
-spec node_rebalance_evacuation_start(node(), emqx_node_rebalance_evacuation:start_opts()) ->
|
||||||
|
emqx_rpc:badrpc() | ok_or_error(emqx_node_rebalance_evacuation:start_error()).
|
||||||
|
node_rebalance_evacuation_start(Node, #{} = Opts) ->
|
||||||
|
rpc:call(Node, emqx_node_rebalance_evacuation, start, [Opts]).
|
||||||
|
|
||||||
|
-spec node_rebalance_evacuation_stop(node()) ->
|
||||||
|
emqx_rpc:badrpc() | ok_or_error(not_started).
|
||||||
|
node_rebalance_evacuation_stop(Node) ->
|
||||||
|
rpc:call(Node, emqx_node_rebalance_evacuation, stop, []).
|
||||||
|
|
||||||
|
-spec node_rebalance_start(node(), emqx_node_rebalance:start_opts()) ->
|
||||||
|
emqx_rpc:badrpc() | ok_or_error(emqx_node_rebalance:start_error()).
|
||||||
|
node_rebalance_start(Node, Opts) ->
|
||||||
|
rpc:call(Node, emqx_node_rebalance, start, [Opts]).
|
||||||
|
|
||||||
|
-spec node_rebalance_stop(node()) ->
|
||||||
|
emqx_rpc:badrpc() | ok_or_error(not_started).
|
||||||
|
node_rebalance_stop(Node) ->
|
||||||
|
rpc:call(Node, emqx_node_rebalance, stop, []).
|
|
@ -0,0 +1,22 @@
|
||||||
|
%%--------------------------------------------------------------------
|
||||||
|
%% Copyright (c) 2022-2023 EMQ Technologies Co., Ltd. All Rights Reserved.
|
||||||
|
%%--------------------------------------------------------------------
|
||||||
|
|
||||||
|
-module(emqx_node_rebalance_evacuation_proto_v1).
|
||||||
|
|
||||||
|
-behaviour(emqx_bpapi).
|
||||||
|
|
||||||
|
-export([
|
||||||
|
introduced_in/0,
|
||||||
|
|
||||||
|
available_nodes/1
|
||||||
|
]).
|
||||||
|
|
||||||
|
-include_lib("emqx/include/bpapi.hrl").
|
||||||
|
|
||||||
|
introduced_in() ->
|
||||||
|
"5.0.22".
|
||||||
|
|
||||||
|
-spec available_nodes([node()]) -> emqx_rpc:multicall_result(node()).
|
||||||
|
available_nodes(Nodes) ->
|
||||||
|
rpc:multicall(Nodes, emqx_node_rebalance_evacuation, is_node_available, []).
|
|
@ -0,0 +1,62 @@
|
||||||
|
%%--------------------------------------------------------------------
|
||||||
|
%% Copyright (c) 2022-2023 EMQ Technologies Co., Ltd. All Rights Reserved.
|
||||||
|
%%--------------------------------------------------------------------
|
||||||
|
|
||||||
|
-module(emqx_node_rebalance_proto_v1).
|
||||||
|
|
||||||
|
-behaviour(emqx_bpapi).
|
||||||
|
|
||||||
|
-export([
|
||||||
|
introduced_in/0,
|
||||||
|
|
||||||
|
available_nodes/1,
|
||||||
|
evict_connections/2,
|
||||||
|
evict_sessions/4,
|
||||||
|
connection_counts/1,
|
||||||
|
session_counts/1,
|
||||||
|
enable_rebalance_agent/2,
|
||||||
|
disable_rebalance_agent/2,
|
||||||
|
disconnected_session_counts/1
|
||||||
|
]).
|
||||||
|
|
||||||
|
-include_lib("emqx/include/bpapi.hrl").
|
||||||
|
-include_lib("emqx/include/types.hrl").
|
||||||
|
|
||||||
|
introduced_in() ->
|
||||||
|
"5.0.22".
|
||||||
|
|
||||||
|
-spec available_nodes([node()]) -> emqx_rpc:multicall_result(node()).
|
||||||
|
available_nodes(Nodes) ->
|
||||||
|
rpc:multicall(Nodes, emqx_node_rebalance, is_node_available, []).
|
||||||
|
|
||||||
|
-spec evict_connections([node()], non_neg_integer()) ->
|
||||||
|
emqx_rpc:multicall_result(ok_or_error(disabled)).
|
||||||
|
evict_connections(Nodes, Count) ->
|
||||||
|
rpc:multicall(Nodes, emqx_eviction_agent, evict_connections, [Count]).
|
||||||
|
|
||||||
|
-spec evict_sessions([node()], non_neg_integer(), [node()], emqx_channel:conn_state()) ->
|
||||||
|
emqx_rpc:multicall_result(ok_or_error(disabled)).
|
||||||
|
evict_sessions(Nodes, Count, RecipientNodes, ConnState) ->
|
||||||
|
rpc:multicall(Nodes, emqx_eviction_agent, evict_sessions, [Count, RecipientNodes, ConnState]).
|
||||||
|
|
||||||
|
-spec connection_counts([node()]) -> emqx_rpc:multicall_result({ok, non_neg_integer()}).
|
||||||
|
connection_counts(Nodes) ->
|
||||||
|
rpc:multicall(Nodes, emqx_node_rebalance, connection_count, []).
|
||||||
|
|
||||||
|
-spec session_counts([node()]) -> emqx_rpc:multicall_result({ok, non_neg_integer()}).
|
||||||
|
session_counts(Nodes) ->
|
||||||
|
rpc:multicall(Nodes, emqx_node_rebalance, session_count, []).
|
||||||
|
|
||||||
|
-spec enable_rebalance_agent([node()], pid()) ->
|
||||||
|
emqx_rpc:multicall_result(ok_or_error(already_enabled | eviction_agent_busy)).
|
||||||
|
enable_rebalance_agent(Nodes, OwnerPid) ->
|
||||||
|
rpc:multicall(Nodes, emqx_node_rebalance_agent, enable, [OwnerPid]).
|
||||||
|
|
||||||
|
-spec disable_rebalance_agent([node()], pid()) ->
|
||||||
|
emqx_rpc:multicall_result(ok_or_error(already_disabled | invalid_coordinator)).
|
||||||
|
disable_rebalance_agent(Nodes, OwnerPid) ->
|
||||||
|
rpc:multicall(Nodes, emqx_node_rebalance_agent, disable, [OwnerPid]).
|
||||||
|
|
||||||
|
-spec disconnected_session_counts([node()]) -> emqx_rpc:multicall_result({ok, non_neg_integer()}).
|
||||||
|
disconnected_session_counts(Nodes) ->
|
||||||
|
rpc:multicall(Nodes, emqx_node_rebalance, disconnected_session_count, []).
|
|
@ -0,0 +1,36 @@
|
||||||
|
%%--------------------------------------------------------------------
|
||||||
|
%% Copyright (c) 2022-2023 EMQ Technologies Co., Ltd. All Rights Reserved.
|
||||||
|
%%--------------------------------------------------------------------
|
||||||
|
|
||||||
|
-module(emqx_node_rebalance_status_proto_v1).
|
||||||
|
|
||||||
|
-behaviour(emqx_bpapi).
|
||||||
|
|
||||||
|
-export([
|
||||||
|
introduced_in/0,
|
||||||
|
|
||||||
|
local_status/1,
|
||||||
|
rebalance_status/1,
|
||||||
|
evacuation_status/1
|
||||||
|
]).
|
||||||
|
|
||||||
|
-include_lib("emqx/include/bpapi.hrl").
|
||||||
|
-include_lib("emqx/include/types.hrl").
|
||||||
|
|
||||||
|
introduced_in() ->
|
||||||
|
"5.0.22".
|
||||||
|
|
||||||
|
-spec local_status(node()) ->
|
||||||
|
emqx_rpc:badrpc() | disabled | {evacuation, map()} | {rebalance, map()}.
|
||||||
|
local_status(Node) ->
|
||||||
|
rpc:call(Node, emqx_node_rebalance_status, local_status, []).
|
||||||
|
|
||||||
|
-spec rebalance_status([node()]) ->
|
||||||
|
emqx_rpc:multicall_result({node(), map()}).
|
||||||
|
rebalance_status(Nodes) ->
|
||||||
|
rpc:multicall(Nodes, emqx_node_rebalance_status, rebalance_status, []).
|
||||||
|
|
||||||
|
-spec evacuation_status([node()]) ->
|
||||||
|
emqx_rpc:multicall_result({node(), map()}).
|
||||||
|
evacuation_status(Nodes) ->
|
||||||
|
rpc:multicall(Nodes, emqx_node_rebalance_status, evacuation_status, []).
|
|
@ -0,0 +1,229 @@
|
||||||
|
%%--------------------------------------------------------------------
|
||||||
|
%% Copyright (c) 2022-2023 EMQ Technologies Co., Ltd. All Rights Reserved.
|
||||||
|
%%--------------------------------------------------------------------
|
||||||
|
|
||||||
|
-module(emqx_node_rebalance_SUITE).
|
||||||
|
|
||||||
|
-compile(export_all).
|
||||||
|
-compile(nowarn_export_all).
|
||||||
|
|
||||||
|
-include_lib("emqx/include/emqx.hrl").
|
||||||
|
-include_lib("emqx/include/emqx_mqtt.hrl").
|
||||||
|
-include_lib("emqx/include/asserts.hrl").
|
||||||
|
-include_lib("eunit/include/eunit.hrl").
|
||||||
|
-include_lib("common_test/include/ct.hrl").
|
||||||
|
-include_lib("snabbkaffe/include/snabbkaffe.hrl").
|
||||||
|
|
||||||
|
-import(
|
||||||
|
emqx_eviction_agent_test_helpers,
|
||||||
|
[emqtt_connect_many/1, emqtt_connect_many/2, stop_many/1, case_specific_node_name/3]
|
||||||
|
).
|
||||||
|
|
||||||
|
-define(START_APPS, [emqx_eviction_agent, emqx_node_rebalance]).
|
||||||
|
|
||||||
|
all() ->
|
||||||
|
emqx_common_test_helpers:all(?MODULE).
|
||||||
|
|
||||||
|
init_per_suite(Config) ->
|
||||||
|
ok = emqx_common_test_helpers:start_apps([]),
|
||||||
|
Config.
|
||||||
|
|
||||||
|
end_per_suite(_Config) ->
|
||||||
|
ok = emqx_common_test_helpers:stop_apps([]),
|
||||||
|
ok.
|
||||||
|
|
||||||
|
init_per_testcase(Case, Config) ->
|
||||||
|
ClusterNodes = emqx_eviction_agent_test_helpers:start_cluster(
|
||||||
|
[
|
||||||
|
{case_specific_node_name(?MODULE, Case, '_donor'), 2883},
|
||||||
|
{case_specific_node_name(?MODULE, Case, '_recipient'), 3883}
|
||||||
|
],
|
||||||
|
?START_APPS
|
||||||
|
),
|
||||||
|
ok = snabbkaffe:start_trace(),
|
||||||
|
[{cluster_nodes, ClusterNodes} | Config].
|
||||||
|
|
||||||
|
end_per_testcase(_Case, Config) ->
|
||||||
|
ok = snabbkaffe:stop(),
|
||||||
|
ok = emqx_eviction_agent_test_helpers:stop_cluster(
|
||||||
|
?config(cluster_nodes, Config),
|
||||||
|
?START_APPS
|
||||||
|
).
|
||||||
|
|
||||||
|
%%--------------------------------------------------------------------
|
||||||
|
%% Tests
|
||||||
|
%%--------------------------------------------------------------------
|
||||||
|
|
||||||
|
t_rebalance(Config) ->
|
||||||
|
process_flag(trap_exit, true),
|
||||||
|
|
||||||
|
[{DonorNode, DonorPort}, {RecipientNode, _RecipientPort}] = ?config(cluster_nodes, Config),
|
||||||
|
|
||||||
|
Nodes = [DonorNode, RecipientNode],
|
||||||
|
|
||||||
|
Conns = emqtt_connect_many(DonorPort, 500),
|
||||||
|
|
||||||
|
Opts = #{
|
||||||
|
conn_evict_rate => 10,
|
||||||
|
sess_evict_rate => 10,
|
||||||
|
evict_interval => 10,
|
||||||
|
abs_conn_threshold => 50,
|
||||||
|
abs_sess_threshold => 50,
|
||||||
|
rel_conn_threshold => 1.0,
|
||||||
|
rel_sess_threshold => 1.0,
|
||||||
|
wait_health_check => 0.01,
|
||||||
|
wait_takeover => 0.01,
|
||||||
|
nodes => Nodes
|
||||||
|
},
|
||||||
|
|
||||||
|
?assertWaitEvent(
|
||||||
|
ok = rpc:call(DonorNode, emqx_node_rebalance, start, [Opts]),
|
||||||
|
#{?snk_kind := emqx_node_rebalance_evict_sess_over},
|
||||||
|
10000
|
||||||
|
),
|
||||||
|
|
||||||
|
DonorConnCount = rpc:call(DonorNode, emqx_eviction_agent, connection_count, []),
|
||||||
|
DonorSessCount = rpc:call(DonorNode, emqx_eviction_agent, session_count, []),
|
||||||
|
DonorDSessCount = rpc:call(DonorNode, emqx_eviction_agent, session_count, [disconnected]),
|
||||||
|
|
||||||
|
RecipientConnCount = rpc:call(RecipientNode, emqx_eviction_agent, connection_count, []),
|
||||||
|
RecipientSessCount = rpc:call(RecipientNode, emqx_eviction_agent, session_count, []),
|
||||||
|
RecipientDSessCount = rpc:call(RecipientNode, emqx_eviction_agent, session_count, [disconnected]),
|
||||||
|
|
||||||
|
ct:pal(
|
||||||
|
"Donor: conn=~p, sess=~p, dsess=~p",
|
||||||
|
[DonorConnCount, DonorSessCount, DonorDSessCount]
|
||||||
|
),
|
||||||
|
ct:pal(
|
||||||
|
"Recipient: conn=~p, sess=~p, dsess=~p",
|
||||||
|
[RecipientConnCount, RecipientSessCount, RecipientDSessCount]
|
||||||
|
),
|
||||||
|
|
||||||
|
?assert(DonorConnCount - 50 =< RecipientConnCount),
|
||||||
|
?assert(DonorDSessCount - 50 =< RecipientDSessCount),
|
||||||
|
|
||||||
|
ok = stop_many(Conns).
|
||||||
|
|
||||||
|
t_rebalance_node_crash(Config) ->
|
||||||
|
process_flag(trap_exit, true),
|
||||||
|
|
||||||
|
[{DonorNode, DonorPort}, {RecipientNode, _RecipientPort}] = ?config(cluster_nodes, Config),
|
||||||
|
|
||||||
|
Nodes = [DonorNode, RecipientNode],
|
||||||
|
|
||||||
|
Conns = emqtt_connect_many(DonorPort, 500),
|
||||||
|
|
||||||
|
Opts = #{
|
||||||
|
conn_evict_rate => 10,
|
||||||
|
sess_evict_rate => 10,
|
||||||
|
evict_interval => 10,
|
||||||
|
abs_conn_threshold => 50,
|
||||||
|
abs_sess_threshold => 50,
|
||||||
|
rel_conn_threshold => 1.0,
|
||||||
|
rel_sess_threshold => 1.0,
|
||||||
|
wait_health_check => 0.01,
|
||||||
|
wait_takeover => 0.01,
|
||||||
|
nodes => Nodes
|
||||||
|
},
|
||||||
|
|
||||||
|
?assertWaitEvent(
|
||||||
|
begin
|
||||||
|
ok = rpc:call(DonorNode, emqx_node_rebalance, start, [Opts]),
|
||||||
|
emqx_common_test_helpers:stop_slave(RecipientNode)
|
||||||
|
end,
|
||||||
|
#{?snk_kind := emqx_node_rebalance_started},
|
||||||
|
1000
|
||||||
|
),
|
||||||
|
|
||||||
|
?assertEqual(
|
||||||
|
disabled,
|
||||||
|
rpc:call(DonorNode, emqx_node_rebalance, status, [])
|
||||||
|
),
|
||||||
|
|
||||||
|
ok = stop_many(Conns).
|
||||||
|
|
||||||
|
t_no_need_to_rebalance(Config) ->
|
||||||
|
process_flag(trap_exit, true),
|
||||||
|
|
||||||
|
[{DonorNode, DonorPort}, {RecipientNode, _RecipientPort}] = ?config(cluster_nodes, Config),
|
||||||
|
|
||||||
|
Nodes = [DonorNode, RecipientNode],
|
||||||
|
|
||||||
|
Opts = #{
|
||||||
|
conn_evict_rate => 10,
|
||||||
|
sess_evict_rate => 10,
|
||||||
|
evict_interval => 10,
|
||||||
|
abs_conn_threshold => 50,
|
||||||
|
abs_sess_threshold => 50,
|
||||||
|
rel_conn_threshold => 1.0,
|
||||||
|
rel_sess_threshold => 1.0,
|
||||||
|
wait_health_check => 0.01,
|
||||||
|
wait_takeover => 0.01,
|
||||||
|
nodes => Nodes
|
||||||
|
},
|
||||||
|
|
||||||
|
?assertEqual(
|
||||||
|
{error, nothing_to_balance},
|
||||||
|
rpc:call(DonorNode, emqx_node_rebalance, start, [Opts])
|
||||||
|
),
|
||||||
|
|
||||||
|
Conns = emqtt_connect_many(DonorPort, 50),
|
||||||
|
|
||||||
|
?assertEqual(
|
||||||
|
{error, nothing_to_balance},
|
||||||
|
rpc:call(DonorNode, emqx_node_rebalance, start, [Opts])
|
||||||
|
),
|
||||||
|
|
||||||
|
ok = stop_many(Conns).
|
||||||
|
|
||||||
|
t_unknown_mesages(Config) ->
|
||||||
|
process_flag(trap_exit, true),
|
||||||
|
[{DonorNode, DonorPort}, {RecipientNode, _RecipientPort}] = ?config(cluster_nodes, Config),
|
||||||
|
|
||||||
|
Nodes = [DonorNode, RecipientNode],
|
||||||
|
|
||||||
|
Conns = emqtt_connect_many(DonorPort, 500),
|
||||||
|
|
||||||
|
Opts = #{
|
||||||
|
wait_health_check => 100,
|
||||||
|
abs_conn_threshold => 50,
|
||||||
|
nodes => Nodes
|
||||||
|
},
|
||||||
|
|
||||||
|
Pid = rpc:call(DonorNode, erlang, whereis, [emqx_node_rebalance]),
|
||||||
|
|
||||||
|
Pid ! unknown,
|
||||||
|
ok = gen_server:cast(Pid, unknown),
|
||||||
|
?assertEqual(
|
||||||
|
ignored,
|
||||||
|
gen_server:call(Pid, unknown)
|
||||||
|
),
|
||||||
|
|
||||||
|
ok = rpc:call(DonorNode, emqx_node_rebalance, start, [Opts]),
|
||||||
|
|
||||||
|
Pid ! unknown,
|
||||||
|
ok = gen_server:cast(Pid, unknown),
|
||||||
|
?assertEqual(
|
||||||
|
ignored,
|
||||||
|
gen_server:call(Pid, unknown)
|
||||||
|
),
|
||||||
|
|
||||||
|
ok = stop_many(Conns).
|
||||||
|
|
||||||
|
t_available_nodes(Config) ->
|
||||||
|
[{DonorNode, _DonorPort}, {RecipientNode, _RecipientPort}] = ?config(cluster_nodes, Config),
|
||||||
|
|
||||||
|
%% Start eviction agent on RecipientNode so that it will be "occupied"
|
||||||
|
%% and not available for rebalance
|
||||||
|
ok = rpc:call(RecipientNode, emqx_eviction_agent, enable, [test_rebalance, undefined]),
|
||||||
|
|
||||||
|
%% Only DonorNode should be is available for rebalance, since RecipientNode is "occupied"
|
||||||
|
?assertEqual(
|
||||||
|
[DonorNode],
|
||||||
|
rpc:call(
|
||||||
|
DonorNode,
|
||||||
|
emqx_node_rebalance,
|
||||||
|
available_nodes,
|
||||||
|
[[DonorNode, RecipientNode]]
|
||||||
|
)
|
||||||
|
).
|
|
@ -0,0 +1,214 @@
|
||||||
|
%%--------------------------------------------------------------------
|
||||||
|
%% Copyright (c) 2022-2023 EMQ Technologies Co., Ltd. All Rights Reserved.
|
||||||
|
%%--------------------------------------------------------------------
|
||||||
|
|
||||||
|
-module(emqx_node_rebalance_agent_SUITE).
|
||||||
|
|
||||||
|
-compile(export_all).
|
||||||
|
-compile(nowarn_export_all).
|
||||||
|
|
||||||
|
-include_lib("emqx/include/emqx.hrl").
|
||||||
|
-include_lib("emqx/include/emqx_mqtt.hrl").
|
||||||
|
-include_lib("eunit/include/eunit.hrl").
|
||||||
|
-include_lib("common_test/include/ct.hrl").
|
||||||
|
-include_lib("snabbkaffe/include/snabbkaffe.hrl").
|
||||||
|
|
||||||
|
-import(
|
||||||
|
emqx_eviction_agent_test_helpers,
|
||||||
|
[case_specific_node_name/2]
|
||||||
|
).
|
||||||
|
|
||||||
|
all() ->
|
||||||
|
[
|
||||||
|
{group, local},
|
||||||
|
{group, cluster}
|
||||||
|
].
|
||||||
|
|
||||||
|
groups() ->
|
||||||
|
[
|
||||||
|
{local, [], [
|
||||||
|
t_enable_disable,
|
||||||
|
t_enable_egent_busy,
|
||||||
|
t_unknown_messages
|
||||||
|
]},
|
||||||
|
{cluster, [], [
|
||||||
|
t_rebalance_agent_coordinator_fail,
|
||||||
|
t_rebalance_agent_fail
|
||||||
|
]}
|
||||||
|
].
|
||||||
|
|
||||||
|
init_per_suite(Config) ->
|
||||||
|
ok = emqx_common_test_helpers:start_apps([emqx_eviction_agent, emqx_node_rebalance]),
|
||||||
|
Config.
|
||||||
|
|
||||||
|
end_per_suite(_Config) ->
|
||||||
|
ok = emqx_common_test_helpers:stop_apps([emqx_eviction_agent, emqx_node_rebalance]),
|
||||||
|
ok.
|
||||||
|
|
||||||
|
init_per_group(local, Config) ->
|
||||||
|
[{cluster, false} | Config];
|
||||||
|
init_per_group(cluster, Config) ->
|
||||||
|
[{cluster, true} | Config].
|
||||||
|
|
||||||
|
end_per_group(_Group, _Config) ->
|
||||||
|
ok.
|
||||||
|
|
||||||
|
init_per_testcase(Case, Config) ->
|
||||||
|
case ?config(cluster, Config) of
|
||||||
|
true ->
|
||||||
|
ClusterNodes = emqx_eviction_agent_test_helpers:start_cluster(
|
||||||
|
[{case_specific_node_name(?MODULE, Case), 2883}],
|
||||||
|
[emqx_eviction_agent, emqx_node_rebalance]
|
||||||
|
),
|
||||||
|
[{cluster_nodes, ClusterNodes} | Config];
|
||||||
|
false ->
|
||||||
|
Config
|
||||||
|
end.
|
||||||
|
|
||||||
|
end_per_testcase(_Case, Config) ->
|
||||||
|
case ?config(cluster, Config) of
|
||||||
|
true ->
|
||||||
|
emqx_eviction_agent_test_helpers:stop_cluster(
|
||||||
|
?config(cluster_nodes, Config),
|
||||||
|
[emqx_eviction_agent, emqx_node_rebalance]
|
||||||
|
);
|
||||||
|
false ->
|
||||||
|
ok
|
||||||
|
end.
|
||||||
|
|
||||||
|
%%--------------------------------------------------------------------
|
||||||
|
%% Tests
|
||||||
|
%%--------------------------------------------------------------------
|
||||||
|
|
||||||
|
%% Local tests
|
||||||
|
|
||||||
|
t_enable_disable(_Config) ->
|
||||||
|
?assertEqual(
|
||||||
|
disabled,
|
||||||
|
emqx_node_rebalance_agent:status()
|
||||||
|
),
|
||||||
|
|
||||||
|
?assertEqual(
|
||||||
|
ok,
|
||||||
|
emqx_node_rebalance_agent:enable(self())
|
||||||
|
),
|
||||||
|
|
||||||
|
?assertEqual(
|
||||||
|
{error, already_enabled},
|
||||||
|
emqx_node_rebalance_agent:enable(self())
|
||||||
|
),
|
||||||
|
|
||||||
|
?assertEqual(
|
||||||
|
{enabled, self()},
|
||||||
|
emqx_node_rebalance_agent:status()
|
||||||
|
),
|
||||||
|
|
||||||
|
?assertEqual(
|
||||||
|
{error, invalid_coordinator},
|
||||||
|
emqx_node_rebalance_agent:disable(spawn_link(fun() -> ok end))
|
||||||
|
),
|
||||||
|
|
||||||
|
?assertEqual(
|
||||||
|
ok,
|
||||||
|
emqx_node_rebalance_agent:disable(self())
|
||||||
|
),
|
||||||
|
|
||||||
|
?assertEqual(
|
||||||
|
{error, already_disabled},
|
||||||
|
emqx_node_rebalance_agent:disable(self())
|
||||||
|
),
|
||||||
|
|
||||||
|
?assertEqual(
|
||||||
|
disabled,
|
||||||
|
emqx_node_rebalance_agent:status()
|
||||||
|
).
|
||||||
|
|
||||||
|
t_enable_egent_busy(_Config) ->
|
||||||
|
ok = emqx_eviction_agent:enable(rebalance_test, undefined),
|
||||||
|
|
||||||
|
?assertEqual(
|
||||||
|
{error, eviction_agent_busy},
|
||||||
|
emqx_node_rebalance_agent:enable(self())
|
||||||
|
),
|
||||||
|
|
||||||
|
ok = emqx_eviction_agent:disable(rebalance_test).
|
||||||
|
|
||||||
|
t_unknown_messages(_Config) ->
|
||||||
|
Pid = whereis(emqx_node_rebalance_agent),
|
||||||
|
|
||||||
|
ok = gen_server:cast(Pid, unknown),
|
||||||
|
|
||||||
|
Pid ! unknown,
|
||||||
|
|
||||||
|
ignored = gen_server:call(Pid, unknown).
|
||||||
|
|
||||||
|
%% Cluster tests
|
||||||
|
|
||||||
|
% The following tests verify that emqx_node_rebalance_agent correctly links
|
||||||
|
% coordinator process with emqx_eviction_agent-s.
|
||||||
|
|
||||||
|
t_rebalance_agent_coordinator_fail(Config) ->
|
||||||
|
process_flag(trap_exit, true),
|
||||||
|
|
||||||
|
[{Node, _}] = ?config(cluster_nodes, Config),
|
||||||
|
|
||||||
|
CoordinatorPid = spawn_link(
|
||||||
|
fun() ->
|
||||||
|
receive
|
||||||
|
done -> ok
|
||||||
|
end
|
||||||
|
end
|
||||||
|
),
|
||||||
|
|
||||||
|
?assertEqual(
|
||||||
|
disabled,
|
||||||
|
rpc:call(Node, emqx_eviction_agent, status, [])
|
||||||
|
),
|
||||||
|
|
||||||
|
?assertEqual(
|
||||||
|
ok,
|
||||||
|
rpc:call(Node, emqx_node_rebalance_agent, enable, [CoordinatorPid])
|
||||||
|
),
|
||||||
|
|
||||||
|
?assertMatch(
|
||||||
|
{enabled, _},
|
||||||
|
rpc:call(Node, emqx_eviction_agent, status, [])
|
||||||
|
),
|
||||||
|
|
||||||
|
EvictionAgentPid = rpc:call(Node, erlang, whereis, [emqx_eviction_agent]),
|
||||||
|
true = link(EvictionAgentPid),
|
||||||
|
|
||||||
|
true = exit(CoordinatorPid, kill),
|
||||||
|
|
||||||
|
receive
|
||||||
|
{'EXIT', EvictionAgentPid, _} -> true
|
||||||
|
after 1000 ->
|
||||||
|
ct:fail("emqx_eviction_agent did not exit")
|
||||||
|
end.
|
||||||
|
|
||||||
|
t_rebalance_agent_fail(Config) ->
|
||||||
|
process_flag(trap_exit, true),
|
||||||
|
|
||||||
|
[{Node, _}] = ?config(cluster_nodes, Config),
|
||||||
|
|
||||||
|
CoordinatorPid = spawn_link(
|
||||||
|
fun() ->
|
||||||
|
receive
|
||||||
|
done -> ok
|
||||||
|
end
|
||||||
|
end
|
||||||
|
),
|
||||||
|
|
||||||
|
?assertEqual(
|
||||||
|
ok,
|
||||||
|
rpc:call(Node, emqx_node_rebalance_agent, enable, [CoordinatorPid])
|
||||||
|
),
|
||||||
|
|
||||||
|
EvictionAgentPid = rpc:call(Node, erlang, whereis, [emqx_eviction_agent]),
|
||||||
|
true = exit(EvictionAgentPid, kill),
|
||||||
|
|
||||||
|
receive
|
||||||
|
{'EXIT', CoordinatorPid, _} -> true
|
||||||
|
after 1000 ->
|
||||||
|
ct:fail("emqx_node_rebalance_agent did not exit")
|
||||||
|
end.
|
|
@ -0,0 +1,444 @@
|
||||||
|
%%--------------------------------------------------------------------
|
||||||
|
%% Copyright (c) 2022-2023 EMQ Technologies Co., Ltd. All Rights Reserved.
|
||||||
|
%%--------------------------------------------------------------------
|
||||||
|
|
||||||
|
-module(emqx_node_rebalance_api_SUITE).
|
||||||
|
|
||||||
|
-compile(export_all).
|
||||||
|
-compile(nowarn_export_all).
|
||||||
|
|
||||||
|
-include_lib("eunit/include/eunit.hrl").
|
||||||
|
-include_lib("common_test/include/ct.hrl").
|
||||||
|
|
||||||
|
-import(
|
||||||
|
emqx_mgmt_api_test_util,
|
||||||
|
[
|
||||||
|
request/2,
|
||||||
|
request/3,
|
||||||
|
uri/1
|
||||||
|
]
|
||||||
|
).
|
||||||
|
|
||||||
|
-import(
|
||||||
|
emqx_eviction_agent_test_helpers,
|
||||||
|
[emqtt_connect_many/2, stop_many/1, case_specific_node_name/3]
|
||||||
|
).
|
||||||
|
|
||||||
|
-define(START_APPS, [emqx_eviction_agent, emqx_node_rebalance]).
|
||||||
|
|
||||||
|
all() ->
|
||||||
|
emqx_common_test_helpers:all(?MODULE).
|
||||||
|
|
||||||
|
init_per_suite(Config) ->
|
||||||
|
ok = emqx_common_test_helpers:start_apps(?START_APPS),
|
||||||
|
Config.
|
||||||
|
|
||||||
|
end_per_suite(_Config) ->
|
||||||
|
ok = emqx_common_test_helpers:stop_apps(?START_APPS),
|
||||||
|
ok.
|
||||||
|
|
||||||
|
init_per_testcase(Case, Config) ->
|
||||||
|
[{DonorNode, _} | _] =
|
||||||
|
ClusterNodes = emqx_eviction_agent_test_helpers:start_cluster(
|
||||||
|
[
|
||||||
|
{case_specific_node_name(?MODULE, Case, '_donor'), 2883},
|
||||||
|
{case_specific_node_name(?MODULE, Case, '_recipient'), 3883}
|
||||||
|
],
|
||||||
|
?START_APPS,
|
||||||
|
[{emqx, data_dir, case_specific_data_dir(Case, Config)}]
|
||||||
|
),
|
||||||
|
|
||||||
|
ok = rpc:call(DonorNode, emqx_mgmt_api_test_util, init_suite, []),
|
||||||
|
ok = take_auth_header_from(DonorNode),
|
||||||
|
|
||||||
|
[{cluster_nodes, ClusterNodes} | Config].
|
||||||
|
end_per_testcase(_Case, Config) ->
|
||||||
|
_ = emqx_eviction_agent_test_helpers:stop_cluster(
|
||||||
|
?config(cluster_nodes, Config),
|
||||||
|
?START_APPS
|
||||||
|
).
|
||||||
|
|
||||||
|
%%--------------------------------------------------------------------
|
||||||
|
%% Tests
|
||||||
|
%%--------------------------------------------------------------------
|
||||||
|
|
||||||
|
t_start_evacuation_validation(Config) ->
|
||||||
|
[{DonorNode, _}, {RecipientNode, _}] = ?config(cluster_nodes, Config),
|
||||||
|
BadOpts = [
|
||||||
|
#{conn_evict_rate => <<"conn">>},
|
||||||
|
#{sess_evict_rate => <<"sess">>},
|
||||||
|
#{redirect_to => 123},
|
||||||
|
#{wait_takeover => <<"wait">>},
|
||||||
|
#{migrate_to => []},
|
||||||
|
#{migrate_to => <<"migrate_to">>},
|
||||||
|
#{migrate_to => [<<"bad_node">>]},
|
||||||
|
#{migrate_to => [<<"bad_node">>, atom_to_binary(DonorNode)]},
|
||||||
|
#{unknown => <<"Value">>}
|
||||||
|
],
|
||||||
|
lists:foreach(
|
||||||
|
fun(Opts) ->
|
||||||
|
?assertMatch(
|
||||||
|
{ok, 400, #{}},
|
||||||
|
api_post(
|
||||||
|
["load_rebalance", atom_to_list(DonorNode), "evacuation", "start"],
|
||||||
|
Opts
|
||||||
|
)
|
||||||
|
)
|
||||||
|
end,
|
||||||
|
BadOpts
|
||||||
|
),
|
||||||
|
?assertMatch(
|
||||||
|
{ok, 404, #{}},
|
||||||
|
api_post(
|
||||||
|
["load_rebalance", "bad@node", "evacuation", "start"],
|
||||||
|
#{}
|
||||||
|
)
|
||||||
|
),
|
||||||
|
|
||||||
|
?assertMatch(
|
||||||
|
{ok, 200, #{}},
|
||||||
|
api_post(
|
||||||
|
["load_rebalance", atom_to_list(DonorNode), "evacuation", "start"],
|
||||||
|
#{
|
||||||
|
conn_evict_rate => 10,
|
||||||
|
sess_evict_rate => 10,
|
||||||
|
wait_takeover => 10,
|
||||||
|
redirect_to => <<"srv">>,
|
||||||
|
migrate_to => [atom_to_binary(RecipientNode)]
|
||||||
|
}
|
||||||
|
)
|
||||||
|
),
|
||||||
|
|
||||||
|
DonorNodeBin = atom_to_binary(DonorNode),
|
||||||
|
?assertMatch(
|
||||||
|
{ok, 200, #{<<"evacuations">> := [#{<<"node">> := DonorNodeBin}]}},
|
||||||
|
api_get(["load_rebalance", "global_status"])
|
||||||
|
).
|
||||||
|
|
||||||
|
t_start_rebalance_validation(Config) ->
|
||||||
|
process_flag(trap_exit, true),
|
||||||
|
|
||||||
|
[{DonorNode, DonorPort}, {RecipientNode, _}] = ?config(cluster_nodes, Config),
|
||||||
|
|
||||||
|
BadOpts = [
|
||||||
|
#{conn_evict_rate => <<"conn">>},
|
||||||
|
#{sess_evict_rate => <<"sess">>},
|
||||||
|
#{abs_conn_threshold => <<"act">>},
|
||||||
|
#{rel_conn_threshold => <<"rct">>},
|
||||||
|
#{abs_sess_threshold => <<"act">>},
|
||||||
|
#{rel_sess_threshold => <<"rct">>},
|
||||||
|
#{wait_takeover => <<"wait">>},
|
||||||
|
#{wait_health_check => <<"wait">>},
|
||||||
|
#{nodes => <<"nodes">>},
|
||||||
|
#{nodes => []},
|
||||||
|
#{nodes => [<<"bad_node">>]},
|
||||||
|
#{nodes => [<<"bad_node">>, atom_to_binary(DonorNode)]},
|
||||||
|
#{unknown => <<"Value">>}
|
||||||
|
],
|
||||||
|
lists:foreach(
|
||||||
|
fun(Opts) ->
|
||||||
|
?assertMatch(
|
||||||
|
{ok, 400, #{}},
|
||||||
|
api_post(
|
||||||
|
["load_rebalance", atom_to_list(DonorNode), "start"],
|
||||||
|
Opts
|
||||||
|
)
|
||||||
|
)
|
||||||
|
end,
|
||||||
|
BadOpts
|
||||||
|
),
|
||||||
|
?assertMatch(
|
||||||
|
{ok, 404, #{}},
|
||||||
|
api_post(
|
||||||
|
["load_rebalance", "bad@node", "start"],
|
||||||
|
#{}
|
||||||
|
)
|
||||||
|
),
|
||||||
|
|
||||||
|
Conns = emqtt_connect_many(DonorPort, 50),
|
||||||
|
|
||||||
|
?assertMatch(
|
||||||
|
{ok, 200, #{}},
|
||||||
|
api_post(
|
||||||
|
["load_rebalance", atom_to_list(DonorNode), "start"],
|
||||||
|
#{
|
||||||
|
conn_evict_rate => 10,
|
||||||
|
sess_evict_rate => 10,
|
||||||
|
wait_takeover => 10,
|
||||||
|
wait_health_check => 10,
|
||||||
|
abs_conn_threshold => 10,
|
||||||
|
rel_conn_threshold => 1.001,
|
||||||
|
abs_sess_threshold => 10,
|
||||||
|
rel_sess_threshold => 1.001,
|
||||||
|
nodes => [
|
||||||
|
atom_to_binary(DonorNode),
|
||||||
|
atom_to_binary(RecipientNode)
|
||||||
|
]
|
||||||
|
}
|
||||||
|
)
|
||||||
|
),
|
||||||
|
|
||||||
|
DonorNodeBin = atom_to_binary(DonorNode),
|
||||||
|
?assertMatch(
|
||||||
|
{ok, 200, #{<<"rebalances">> := [#{<<"node">> := DonorNodeBin}]}},
|
||||||
|
api_get(["load_rebalance", "global_status"])
|
||||||
|
),
|
||||||
|
|
||||||
|
ok = stop_many(Conns).
|
||||||
|
|
||||||
|
t_start_stop_evacuation(Config) ->
|
||||||
|
[{DonorNode, _}, {RecipientNode, _}] = ?config(cluster_nodes, Config),
|
||||||
|
|
||||||
|
StartOpts = maps:merge(
|
||||||
|
emqx_node_rebalance_api:rebalance_evacuation_example(),
|
||||||
|
#{migrate_to => [atom_to_binary(RecipientNode)]}
|
||||||
|
),
|
||||||
|
|
||||||
|
?assertMatch(
|
||||||
|
{ok, 200, #{}},
|
||||||
|
api_post(
|
||||||
|
["load_rebalance", atom_to_list(DonorNode), "evacuation", "start"],
|
||||||
|
StartOpts
|
||||||
|
)
|
||||||
|
),
|
||||||
|
|
||||||
|
StatusResponse = api_get(["load_rebalance", "status"]),
|
||||||
|
|
||||||
|
?assertMatch(
|
||||||
|
{ok, 200, _},
|
||||||
|
StatusResponse
|
||||||
|
),
|
||||||
|
|
||||||
|
{ok, 200, Status} = StatusResponse,
|
||||||
|
|
||||||
|
?assertMatch(
|
||||||
|
#{
|
||||||
|
process := evacuation,
|
||||||
|
connection_eviction_rate := 100,
|
||||||
|
session_eviction_rate := 100,
|
||||||
|
connection_goal := 0,
|
||||||
|
session_goal := 0,
|
||||||
|
stats := #{
|
||||||
|
initial_connected := _,
|
||||||
|
current_connected := _,
|
||||||
|
initial_sessions := _,
|
||||||
|
current_sessions := _
|
||||||
|
}
|
||||||
|
},
|
||||||
|
emqx_node_rebalance_api:translate(local_status_enabled, Status)
|
||||||
|
),
|
||||||
|
|
||||||
|
DonorNodeBin = atom_to_binary(DonorNode),
|
||||||
|
|
||||||
|
GlobalStatusResponse = api_get(["load_rebalance", "global_status"]),
|
||||||
|
|
||||||
|
?assertMatch(
|
||||||
|
{ok, 200, _},
|
||||||
|
GlobalStatusResponse
|
||||||
|
),
|
||||||
|
|
||||||
|
{ok, 200, GlobalStatus} = GlobalStatusResponse,
|
||||||
|
|
||||||
|
?assertMatch(
|
||||||
|
#{
|
||||||
|
rebalances := [],
|
||||||
|
evacuations := [
|
||||||
|
#{
|
||||||
|
node := DonorNodeBin,
|
||||||
|
connection_eviction_rate := 100,
|
||||||
|
session_eviction_rate := 100,
|
||||||
|
connection_goal := 0,
|
||||||
|
session_goal := 0,
|
||||||
|
stats := #{
|
||||||
|
initial_connected := _,
|
||||||
|
current_connected := _,
|
||||||
|
initial_sessions := _,
|
||||||
|
current_sessions := _
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
emqx_node_rebalance_api:translate(global_status, GlobalStatus)
|
||||||
|
),
|
||||||
|
|
||||||
|
?assertMatch(
|
||||||
|
{ok, 200, #{}},
|
||||||
|
api_post(
|
||||||
|
["load_rebalance", atom_to_list(DonorNode), "evacuation", "stop"],
|
||||||
|
#{}
|
||||||
|
)
|
||||||
|
),
|
||||||
|
|
||||||
|
?assertMatch(
|
||||||
|
{ok, 200, #{<<"status">> := <<"disabled">>}},
|
||||||
|
api_get(["load_rebalance", "status"])
|
||||||
|
),
|
||||||
|
|
||||||
|
?assertMatch(
|
||||||
|
{ok, 200, #{<<"evacuations">> := [], <<"rebalances">> := []}},
|
||||||
|
api_get(["load_rebalance", "global_status"])
|
||||||
|
).
|
||||||
|
|
||||||
|
t_start_stop_rebalance(Config) ->
|
||||||
|
process_flag(trap_exit, true),
|
||||||
|
|
||||||
|
[{DonorNode, DonorPort}, {RecipientNode, _}] = ?config(cluster_nodes, Config),
|
||||||
|
|
||||||
|
?assertMatch(
|
||||||
|
{ok, 200, #{<<"status">> := <<"disabled">>}},
|
||||||
|
api_get(["load_rebalance", "status"])
|
||||||
|
),
|
||||||
|
|
||||||
|
Conns = emqtt_connect_many(DonorPort, 100),
|
||||||
|
|
||||||
|
StartOpts = maps:without(
|
||||||
|
[nodes],
|
||||||
|
emqx_node_rebalance_api:rebalance_example()
|
||||||
|
),
|
||||||
|
|
||||||
|
?assertMatch(
|
||||||
|
{ok, 200, #{}},
|
||||||
|
api_post(
|
||||||
|
["load_rebalance", atom_to_list(DonorNode), "start"],
|
||||||
|
StartOpts
|
||||||
|
)
|
||||||
|
),
|
||||||
|
|
||||||
|
StatusResponse = api_get(["load_rebalance", "status"]),
|
||||||
|
|
||||||
|
?assertMatch(
|
||||||
|
{ok, 200, _},
|
||||||
|
StatusResponse
|
||||||
|
),
|
||||||
|
|
||||||
|
{ok, 200, Status} = StatusResponse,
|
||||||
|
|
||||||
|
?assertMatch(
|
||||||
|
#{process := rebalance, connection_eviction_rate := 10, session_eviction_rate := 20},
|
||||||
|
emqx_node_rebalance_api:translate(local_status_enabled, Status)
|
||||||
|
),
|
||||||
|
|
||||||
|
DonorNodeBin = atom_to_binary(DonorNode),
|
||||||
|
RecipientNodeBin = atom_to_binary(RecipientNode),
|
||||||
|
|
||||||
|
GlobalStatusResponse = api_get(["load_rebalance", "global_status"]),
|
||||||
|
|
||||||
|
?assertMatch(
|
||||||
|
{ok, 200, _},
|
||||||
|
GlobalStatusResponse
|
||||||
|
),
|
||||||
|
|
||||||
|
{ok, 200, GlobalStatus} = GlobalStatusResponse,
|
||||||
|
|
||||||
|
?assertMatch(
|
||||||
|
{ok, 200, #{
|
||||||
|
<<"evacuations">> := [],
|
||||||
|
<<"rebalances">> :=
|
||||||
|
[
|
||||||
|
#{
|
||||||
|
<<"state">> := _,
|
||||||
|
<<"node">> := DonorNodeBin,
|
||||||
|
<<"coordinator_node">> := _,
|
||||||
|
<<"connection_eviction_rate">> := 10,
|
||||||
|
<<"session_eviction_rate">> := 20,
|
||||||
|
<<"donors">> := [DonorNodeBin],
|
||||||
|
<<"recipients">> := [RecipientNodeBin]
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}},
|
||||||
|
GlobalStatusResponse
|
||||||
|
),
|
||||||
|
|
||||||
|
?assertMatch(
|
||||||
|
#{
|
||||||
|
evacuations := [],
|
||||||
|
rebalances := [
|
||||||
|
#{
|
||||||
|
state := _,
|
||||||
|
node := DonorNodeBin,
|
||||||
|
coordinator_node := _,
|
||||||
|
connection_eviction_rate := 10,
|
||||||
|
session_eviction_rate := 20,
|
||||||
|
donors := [DonorNodeBin],
|
||||||
|
recipients := [RecipientNodeBin]
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
emqx_node_rebalance_api:translate(global_status, GlobalStatus)
|
||||||
|
),
|
||||||
|
|
||||||
|
?assertMatch(
|
||||||
|
{ok, 200, #{}},
|
||||||
|
api_post(
|
||||||
|
["load_rebalance", atom_to_list(DonorNode), "stop"],
|
||||||
|
#{}
|
||||||
|
)
|
||||||
|
),
|
||||||
|
|
||||||
|
?assertMatch(
|
||||||
|
{ok, 200, #{<<"status">> := <<"disabled">>}},
|
||||||
|
api_get(["load_rebalance", "status"])
|
||||||
|
),
|
||||||
|
|
||||||
|
?assertMatch(
|
||||||
|
{ok, 200, #{<<"evacuations">> := [], <<"rebalances">> := []}},
|
||||||
|
api_get(["load_rebalance", "global_status"])
|
||||||
|
),
|
||||||
|
|
||||||
|
ok = stop_many(Conns).
|
||||||
|
|
||||||
|
t_availability_check(Config) ->
|
||||||
|
[{DonorNode, _} | _] = ?config(cluster_nodes, Config),
|
||||||
|
?assertMatch(
|
||||||
|
{ok, 200, #{}},
|
||||||
|
api_get(["load_rebalance", "availability_check"])
|
||||||
|
),
|
||||||
|
|
||||||
|
ok = rpc:call(DonorNode, emqx_node_rebalance_evacuation, start, [#{}]),
|
||||||
|
|
||||||
|
?assertMatch(
|
||||||
|
{ok, 503, _},
|
||||||
|
api_get(["load_rebalance", "availability_check"])
|
||||||
|
),
|
||||||
|
|
||||||
|
ok = rpc:call(DonorNode, emqx_node_rebalance_evacuation, stop, []),
|
||||||
|
|
||||||
|
?assertMatch(
|
||||||
|
{ok, 200, #{}},
|
||||||
|
api_get(["load_rebalance", "availability_check"])
|
||||||
|
).
|
||||||
|
|
||||||
|
%%--------------------------------------------------------------------
|
||||||
|
%% Helpers
|
||||||
|
%%--------------------------------------------------------------------
|
||||||
|
|
||||||
|
api_get(Path) ->
|
||||||
|
case request(get, uri(Path)) of
|
||||||
|
{ok, Code, ResponseBody} ->
|
||||||
|
{ok, Code, jiffy:decode(ResponseBody, [return_maps])};
|
||||||
|
{error, _} = Error ->
|
||||||
|
Error
|
||||||
|
end.
|
||||||
|
|
||||||
|
api_post(Path, Data) ->
|
||||||
|
case request(post, uri(Path), Data) of
|
||||||
|
{ok, Code, ResponseBody} ->
|
||||||
|
{ok, Code, jiffy:decode(ResponseBody, [return_maps])};
|
||||||
|
{error, _} = Error ->
|
||||||
|
Error
|
||||||
|
end.
|
||||||
|
|
||||||
|
take_auth_header_from(Node) ->
|
||||||
|
meck:new(emqx_common_test_http, [passthrough]),
|
||||||
|
meck:expect(
|
||||||
|
emqx_common_test_http,
|
||||||
|
default_auth_header,
|
||||||
|
fun() -> rpc:call(Node, emqx_common_test_http, default_auth_header, []) end
|
||||||
|
),
|
||||||
|
ok.
|
||||||
|
|
||||||
|
case_specific_data_dir(Case, Config) ->
|
||||||
|
case ?config(priv_dir, Config) of
|
||||||
|
undefined -> undefined;
|
||||||
|
PrivDir -> filename:join(PrivDir, atom_to_list(Case))
|
||||||
|
end.
|
|
@ -0,0 +1,291 @@
|
||||||
|
%%--------------------------------------------------------------------
|
||||||
|
%% Copyright (c) 2022-2023 EMQ Technologies Co., Ltd. All Rights Reserved.
|
||||||
|
%%%--------------------------------------------------------------------
|
||||||
|
|
||||||
|
-module(emqx_node_rebalance_cli_SUITE).
|
||||||
|
|
||||||
|
-compile(export_all).
|
||||||
|
-compile(nowarn_export_all).
|
||||||
|
|
||||||
|
-include_lib("eunit/include/eunit.hrl").
|
||||||
|
-include_lib("common_test/include/ct.hrl").
|
||||||
|
|
||||||
|
-import(
|
||||||
|
emqx_eviction_agent_test_helpers,
|
||||||
|
[emqtt_connect_many/2, stop_many/1, case_specific_node_name/3]
|
||||||
|
).
|
||||||
|
|
||||||
|
-define(START_APPS, [emqx_eviction_agent, emqx_node_rebalance]).
|
||||||
|
|
||||||
|
all() ->
|
||||||
|
emqx_common_test_helpers:all(?MODULE).
|
||||||
|
|
||||||
|
init_per_suite(Config) ->
|
||||||
|
emqx_common_test_helpers:start_apps(?START_APPS),
|
||||||
|
Config.
|
||||||
|
|
||||||
|
end_per_suite(Config) ->
|
||||||
|
emqx_common_test_helpers:stop_apps(lists:reverse(?START_APPS)),
|
||||||
|
Config.
|
||||||
|
|
||||||
|
init_per_testcase(Case = t_rebalance, Config) ->
|
||||||
|
_ = emqx_node_rebalance_evacuation:stop(),
|
||||||
|
ClusterNodes = emqx_eviction_agent_test_helpers:start_cluster(
|
||||||
|
[
|
||||||
|
{case_specific_node_name(?MODULE, Case, '_donor'), 2883},
|
||||||
|
{case_specific_node_name(?MODULE, Case, '_recipient'), 3883}
|
||||||
|
],
|
||||||
|
?START_APPS
|
||||||
|
),
|
||||||
|
[{cluster_nodes, ClusterNodes} | Config];
|
||||||
|
init_per_testcase(_Case, Config) ->
|
||||||
|
_ = emqx_node_rebalance_evacuation:stop(),
|
||||||
|
_ = emqx_node_rebalance:stop(),
|
||||||
|
Config.
|
||||||
|
|
||||||
|
end_per_testcase(t_rebalance, Config) ->
|
||||||
|
_ = emqx_node_rebalance_evacuation:stop(),
|
||||||
|
_ = emqx_node_rebalance:stop(),
|
||||||
|
_ = emqx_eviction_agent_test_helpers:stop_cluster(
|
||||||
|
?config(cluster_nodes, Config),
|
||||||
|
?START_APPS
|
||||||
|
);
|
||||||
|
end_per_testcase(_Case, _Config) ->
|
||||||
|
_ = emqx_node_rebalance_evacuation:stop(),
|
||||||
|
_ = emqx_node_rebalance:stop().
|
||||||
|
|
||||||
|
%%--------------------------------------------------------------------
|
||||||
|
%% Tests
|
||||||
|
%%--------------------------------------------------------------------
|
||||||
|
|
||||||
|
t_evacuation(_Config) ->
|
||||||
|
%% usage
|
||||||
|
ok = emqx_node_rebalance_cli:cli(["foobar"]),
|
||||||
|
|
||||||
|
%% status
|
||||||
|
ok = emqx_node_rebalance_cli:cli(["status"]),
|
||||||
|
ok = emqx_node_rebalance_cli:cli(["node-status"]),
|
||||||
|
ok = emqx_node_rebalance_cli:cli(["node-status", atom_to_list(node())]),
|
||||||
|
|
||||||
|
%% start with invalid args
|
||||||
|
?assertNot(
|
||||||
|
emqx_node_rebalance_cli:cli(["start", "--evacuation", "--foo-bar"])
|
||||||
|
),
|
||||||
|
|
||||||
|
?assertNot(
|
||||||
|
emqx_node_rebalance_cli:cli(["start", "--evacuation", "--conn-evict-rate", "foobar"])
|
||||||
|
),
|
||||||
|
|
||||||
|
?assertNot(
|
||||||
|
emqx_node_rebalance_cli:cli(["start", "--evacuation", "--sess-evict-rate", "foobar"])
|
||||||
|
),
|
||||||
|
|
||||||
|
?assertNot(
|
||||||
|
emqx_node_rebalance_cli:cli(["start", "--evacuation", "--wait-takeover", "foobar"])
|
||||||
|
),
|
||||||
|
|
||||||
|
?assertNot(
|
||||||
|
emqx_node_rebalance_cli:cli([
|
||||||
|
"start",
|
||||||
|
"--evacuation",
|
||||||
|
"--migrate-to",
|
||||||
|
"nonexistent@node"
|
||||||
|
])
|
||||||
|
),
|
||||||
|
?assertNot(
|
||||||
|
emqx_node_rebalance_cli:cli([
|
||||||
|
"start",
|
||||||
|
"--evacuation",
|
||||||
|
"--migrate-to",
|
||||||
|
""
|
||||||
|
])
|
||||||
|
),
|
||||||
|
?assertNot(
|
||||||
|
emqx_node_rebalance_cli:cli([
|
||||||
|
"start",
|
||||||
|
"--evacuation",
|
||||||
|
"--unknown-arg"
|
||||||
|
])
|
||||||
|
),
|
||||||
|
?assert(
|
||||||
|
emqx_node_rebalance_cli:cli([
|
||||||
|
"start",
|
||||||
|
"--evacuation",
|
||||||
|
"--conn-evict-rate",
|
||||||
|
"10",
|
||||||
|
"--sess-evict-rate",
|
||||||
|
"10",
|
||||||
|
"--wait-takeover",
|
||||||
|
"10",
|
||||||
|
"--migrate-to",
|
||||||
|
atom_to_list(node()),
|
||||||
|
"--redirect-to",
|
||||||
|
"srv"
|
||||||
|
])
|
||||||
|
),
|
||||||
|
|
||||||
|
%% status
|
||||||
|
ok = emqx_node_rebalance_cli:cli(["status"]),
|
||||||
|
ok = emqx_node_rebalance_cli:cli(["node-status"]),
|
||||||
|
ok = emqx_node_rebalance_cli:cli(["node-status", atom_to_list(node())]),
|
||||||
|
|
||||||
|
?assertMatch(
|
||||||
|
{enabled, #{}},
|
||||||
|
emqx_node_rebalance_evacuation:status()
|
||||||
|
),
|
||||||
|
|
||||||
|
%% already enabled
|
||||||
|
?assertNot(
|
||||||
|
emqx_node_rebalance_cli:cli([
|
||||||
|
"start",
|
||||||
|
"--evacuation",
|
||||||
|
"--conn-evict-rate",
|
||||||
|
"10",
|
||||||
|
"--redirect-to",
|
||||||
|
"srv"
|
||||||
|
])
|
||||||
|
),
|
||||||
|
|
||||||
|
%% stop
|
||||||
|
true = emqx_node_rebalance_cli:cli(["stop"]),
|
||||||
|
|
||||||
|
false = emqx_node_rebalance_cli:cli(["stop"]),
|
||||||
|
|
||||||
|
?assertEqual(
|
||||||
|
disabled,
|
||||||
|
emqx_node_rebalance_evacuation:status()
|
||||||
|
).
|
||||||
|
|
||||||
|
t_rebalance(Config) ->
|
||||||
|
process_flag(trap_exit, true),
|
||||||
|
|
||||||
|
[{DonorNode, DonorPort}, {RecipientNode, _}] = ?config(cluster_nodes, Config),
|
||||||
|
|
||||||
|
%% start with invalid args
|
||||||
|
?assertNot(
|
||||||
|
emqx_node_rebalance_cli(DonorNode, ["start", "--foo-bar"])
|
||||||
|
),
|
||||||
|
|
||||||
|
?assertNot(
|
||||||
|
emqx_node_rebalance_cli(DonorNode, ["start", "--conn-evict-rate", "foobar"])
|
||||||
|
),
|
||||||
|
|
||||||
|
?assertNot(
|
||||||
|
emqx_node_rebalance_cli(DonorNode, ["start", "--abs-conn-threshold", "foobar"])
|
||||||
|
),
|
||||||
|
|
||||||
|
?assertNot(
|
||||||
|
emqx_node_rebalance_cli(DonorNode, ["start", "--rel-conn-threshold", "foobar"])
|
||||||
|
),
|
||||||
|
|
||||||
|
?assertNot(
|
||||||
|
emqx_node_rebalance_cli(DonorNode, ["start", "--sess-evict-rate", "foobar"])
|
||||||
|
),
|
||||||
|
|
||||||
|
?assertNot(
|
||||||
|
emqx_node_rebalance_cli(DonorNode, ["start", "--abs-sess-threshold", "foobar"])
|
||||||
|
),
|
||||||
|
|
||||||
|
?assertNot(
|
||||||
|
emqx_node_rebalance_cli(DonorNode, ["start", "--rel-sess-threshold", "foobar"])
|
||||||
|
),
|
||||||
|
|
||||||
|
?assertNot(
|
||||||
|
emqx_node_rebalance_cli(DonorNode, ["start", "--wait-takeover", "foobar"])
|
||||||
|
),
|
||||||
|
|
||||||
|
?assertNot(
|
||||||
|
emqx_node_rebalance_cli(DonorNode, ["start", "--wait-health-check", "foobar"])
|
||||||
|
),
|
||||||
|
|
||||||
|
?assertNot(
|
||||||
|
emqx_node_rebalance_cli(DonorNode, [
|
||||||
|
"start",
|
||||||
|
"--nodes",
|
||||||
|
"nonexistent@node"
|
||||||
|
])
|
||||||
|
),
|
||||||
|
?assertNot(
|
||||||
|
emqx_node_rebalance_cli(DonorNode, [
|
||||||
|
"start",
|
||||||
|
"--nodes",
|
||||||
|
""
|
||||||
|
])
|
||||||
|
),
|
||||||
|
?assertNot(
|
||||||
|
emqx_node_rebalance_cli(DonorNode, [
|
||||||
|
"start",
|
||||||
|
"--nodes",
|
||||||
|
atom_to_list(RecipientNode)
|
||||||
|
])
|
||||||
|
),
|
||||||
|
?assertNot(
|
||||||
|
emqx_node_rebalance_cli(DonorNode, [
|
||||||
|
"start",
|
||||||
|
"--unknown-arg"
|
||||||
|
])
|
||||||
|
),
|
||||||
|
|
||||||
|
Conns = emqtt_connect_many(DonorPort, 20),
|
||||||
|
|
||||||
|
?assert(
|
||||||
|
emqx_node_rebalance_cli(DonorNode, [
|
||||||
|
"start",
|
||||||
|
"--conn-evict-rate",
|
||||||
|
"10",
|
||||||
|
"--abs-conn-threshold",
|
||||||
|
"10",
|
||||||
|
"--rel-conn-threshold",
|
||||||
|
"1.1",
|
||||||
|
"--sess-evict-rate",
|
||||||
|
"10",
|
||||||
|
"--abs-sess-threshold",
|
||||||
|
"10",
|
||||||
|
"--rel-sess-threshold",
|
||||||
|
"1.1",
|
||||||
|
"--wait-takeover",
|
||||||
|
"10",
|
||||||
|
"--nodes",
|
||||||
|
atom_to_list(DonorNode) ++ "," ++
|
||||||
|
atom_to_list(RecipientNode)
|
||||||
|
])
|
||||||
|
),
|
||||||
|
|
||||||
|
%% status
|
||||||
|
ok = emqx_node_rebalance_cli(DonorNode, ["status"]),
|
||||||
|
ok = emqx_node_rebalance_cli(DonorNode, ["node-status"]),
|
||||||
|
ok = emqx_node_rebalance_cli(DonorNode, ["node-status", atom_to_list(DonorNode)]),
|
||||||
|
|
||||||
|
?assertMatch(
|
||||||
|
{enabled, #{}},
|
||||||
|
rpc:call(DonorNode, emqx_node_rebalance, status, [])
|
||||||
|
),
|
||||||
|
|
||||||
|
%% already enabled
|
||||||
|
?assertNot(
|
||||||
|
emqx_node_rebalance_cli(DonorNode, ["start"])
|
||||||
|
),
|
||||||
|
|
||||||
|
%% stop
|
||||||
|
true = emqx_node_rebalance_cli(DonorNode, ["stop"]),
|
||||||
|
|
||||||
|
false = emqx_node_rebalance_cli(DonorNode, ["stop"]),
|
||||||
|
|
||||||
|
?assertEqual(
|
||||||
|
disabled,
|
||||||
|
rpc:call(DonorNode, emqx_node_rebalance, status, [])
|
||||||
|
),
|
||||||
|
|
||||||
|
ok = stop_many(Conns).
|
||||||
|
|
||||||
|
%%--------------------------------------------------------------------
|
||||||
|
%% Helpers
|
||||||
|
%%--------------------------------------------------------------------
|
||||||
|
|
||||||
|
emqx_node_rebalance_cli(Node, Args) ->
|
||||||
|
case rpc:call(Node, emqx_node_rebalance_cli, cli, [Args]) of
|
||||||
|
{badrpc, Reason} ->
|
||||||
|
error(Reason);
|
||||||
|
Result ->
|
||||||
|
Result
|
||||||
|
end.
|
|
@ -0,0 +1,270 @@
|
||||||
|
%%--------------------------------------------------------------------
|
||||||
|
%% Copyright (c) 2022-2023 EMQ Technologies Co., Ltd. All Rights Reserved.
|
||||||
|
%%--------------------------------------------------------------------
|
||||||
|
|
||||||
|
-module(emqx_node_rebalance_evacuation_SUITE).
|
||||||
|
|
||||||
|
-compile(export_all).
|
||||||
|
-compile(nowarn_export_all).
|
||||||
|
|
||||||
|
-include_lib("emqx/include/emqx_mqtt.hrl").
|
||||||
|
-include_lib("emqx/include/asserts.hrl").
|
||||||
|
-include_lib("eunit/include/eunit.hrl").
|
||||||
|
-include_lib("common_test/include/ct.hrl").
|
||||||
|
-include_lib("snabbkaffe/include/snabbkaffe.hrl").
|
||||||
|
|
||||||
|
-import(
|
||||||
|
emqx_eviction_agent_test_helpers,
|
||||||
|
[emqtt_connect/1, emqtt_try_connect/1, case_specific_node_name/3]
|
||||||
|
).
|
||||||
|
|
||||||
|
all() -> [{group, one_node}, {group, two_node}].
|
||||||
|
|
||||||
|
groups() ->
|
||||||
|
[
|
||||||
|
{one_node, [], one_node_cases()},
|
||||||
|
{two_node, [], two_node_cases()}
|
||||||
|
].
|
||||||
|
|
||||||
|
two_node_cases() ->
|
||||||
|
[
|
||||||
|
t_conn_evicted,
|
||||||
|
t_migrate_to,
|
||||||
|
t_session_evicted
|
||||||
|
].
|
||||||
|
|
||||||
|
one_node_cases() ->
|
||||||
|
emqx_common_test_helpers:all(?MODULE) -- two_node_cases().
|
||||||
|
|
||||||
|
init_per_suite(Config) ->
|
||||||
|
ok = emqx_common_test_helpers:start_apps([]),
|
||||||
|
Config.
|
||||||
|
|
||||||
|
end_per_suite(_Config) ->
|
||||||
|
ok = emqx_common_test_helpers:stop_apps([]),
|
||||||
|
ok.
|
||||||
|
|
||||||
|
init_per_group(one_node, Config) ->
|
||||||
|
[{cluster_type, one_node} | Config];
|
||||||
|
init_per_group(two_node, Config) ->
|
||||||
|
[{cluster_type, two_node} | Config].
|
||||||
|
|
||||||
|
end_per_group(_Group, _Config) ->
|
||||||
|
ok.
|
||||||
|
|
||||||
|
init_per_testcase(Case, Config) ->
|
||||||
|
NodesWithPorts =
|
||||||
|
case ?config(cluster_type, Config) of
|
||||||
|
one_node ->
|
||||||
|
[{case_specific_node_name(?MODULE, Case, '_evacuated'), 2883}];
|
||||||
|
two_node ->
|
||||||
|
[
|
||||||
|
{case_specific_node_name(?MODULE, Case, '_evacuated'), 2883},
|
||||||
|
{case_specific_node_name(?MODULE, Case, '_recipient'), 3883}
|
||||||
|
]
|
||||||
|
end,
|
||||||
|
ClusterNodes = emqx_eviction_agent_test_helpers:start_cluster(
|
||||||
|
NodesWithPorts,
|
||||||
|
[emqx_eviction_agent, emqx_node_rebalance],
|
||||||
|
[{emqx, data_dir, case_specific_data_dir(Case, Config)}]
|
||||||
|
),
|
||||||
|
ok = snabbkaffe:start_trace(),
|
||||||
|
[{cluster_nodes, ClusterNodes} | Config].
|
||||||
|
|
||||||
|
end_per_testcase(_Case, Config) ->
|
||||||
|
ok = snabbkaffe:stop(),
|
||||||
|
ok = emqx_eviction_agent_test_helpers:stop_cluster(
|
||||||
|
?config(cluster_nodes, Config),
|
||||||
|
[emqx_eviction_agent, emqx_node_rebalance]
|
||||||
|
).
|
||||||
|
|
||||||
|
%%--------------------------------------------------------------------
|
||||||
|
%% Tests
|
||||||
|
%%--------------------------------------------------------------------
|
||||||
|
|
||||||
|
%% One node tests
|
||||||
|
|
||||||
|
t_agent_busy(Config) ->
|
||||||
|
[{DonorNode, _DonorPort}] = ?config(cluster_nodes, Config),
|
||||||
|
ok = rpc:call(DonorNode, emqx_eviction_agent, enable, [other_rebalance, undefined]),
|
||||||
|
|
||||||
|
?assertEqual(
|
||||||
|
{error, eviction_agent_busy},
|
||||||
|
rpc:call(DonorNode, emqx_node_rebalance_evacuation, start, [opts(Config)])
|
||||||
|
).
|
||||||
|
|
||||||
|
t_already_started(Config) ->
|
||||||
|
[{DonorNode, _DonorPort}] = ?config(cluster_nodes, Config),
|
||||||
|
ok = rpc:call(DonorNode, emqx_node_rebalance_evacuation, start, [opts(Config)]),
|
||||||
|
|
||||||
|
?assertEqual(
|
||||||
|
{error, already_started},
|
||||||
|
rpc:call(DonorNode, emqx_node_rebalance_evacuation, start, [opts(Config)])
|
||||||
|
).
|
||||||
|
|
||||||
|
t_not_started(Config) ->
|
||||||
|
[{DonorNode, _DonorPort}] = ?config(cluster_nodes, Config),
|
||||||
|
|
||||||
|
?assertEqual(
|
||||||
|
{error, not_started},
|
||||||
|
rpc:call(DonorNode, emqx_node_rebalance_evacuation, stop, [])
|
||||||
|
).
|
||||||
|
|
||||||
|
t_start(Config) ->
|
||||||
|
process_flag(trap_exit, true),
|
||||||
|
|
||||||
|
[{DonorNode, DonorPort}] = ?config(cluster_nodes, Config),
|
||||||
|
|
||||||
|
ok = rpc:call(DonorNode, emqx_node_rebalance_evacuation, start, [opts(Config)]),
|
||||||
|
?assertMatch(
|
||||||
|
{error, {use_another_server, #{}}},
|
||||||
|
emqtt_try_connect([{port, DonorPort}])
|
||||||
|
).
|
||||||
|
|
||||||
|
t_persistence(Config) ->
|
||||||
|
process_flag(trap_exit, true),
|
||||||
|
|
||||||
|
[{DonorNode, DonorPort}] = ?config(cluster_nodes, Config),
|
||||||
|
|
||||||
|
ok = rpc:call(DonorNode, emqx_node_rebalance_evacuation, start, [opts(Config)]),
|
||||||
|
|
||||||
|
?assertMatch(
|
||||||
|
{error, {use_another_server, #{}}},
|
||||||
|
emqtt_try_connect([{port, DonorPort}])
|
||||||
|
),
|
||||||
|
|
||||||
|
ok = rpc:call(DonorNode, supervisor, terminate_child, [
|
||||||
|
emqx_node_rebalance_sup, emqx_node_rebalance_evacuation
|
||||||
|
]),
|
||||||
|
{ok, _} = rpc:call(DonorNode, supervisor, restart_child, [
|
||||||
|
emqx_node_rebalance_sup, emqx_node_rebalance_evacuation
|
||||||
|
]),
|
||||||
|
|
||||||
|
?assertMatch(
|
||||||
|
{error, {use_another_server, #{}}},
|
||||||
|
emqtt_try_connect([{port, DonorPort}])
|
||||||
|
),
|
||||||
|
?assertMatch(
|
||||||
|
{enabled, #{conn_evict_rate := 10}},
|
||||||
|
rpc:call(DonorNode, emqx_node_rebalance_evacuation, status, [])
|
||||||
|
).
|
||||||
|
|
||||||
|
t_unknown_messages(Config) ->
|
||||||
|
process_flag(trap_exit, true),
|
||||||
|
|
||||||
|
[{DonorNode, _DonorPort}] = ?config(cluster_nodes, Config),
|
||||||
|
|
||||||
|
ok = rpc:call(DonorNode, emqx_node_rebalance_evacuation, start, [opts(Config)]),
|
||||||
|
|
||||||
|
Pid = rpc:call(DonorNode, erlang, whereis, [emqx_node_rebalance_evacuation]),
|
||||||
|
|
||||||
|
Pid ! unknown,
|
||||||
|
|
||||||
|
ok = gen_server:cast(Pid, unknown),
|
||||||
|
|
||||||
|
?assertEqual(
|
||||||
|
ignored,
|
||||||
|
gen_server:call(Pid, unknown)
|
||||||
|
).
|
||||||
|
|
||||||
|
%% Two node tests
|
||||||
|
|
||||||
|
t_conn_evicted(Config) ->
|
||||||
|
process_flag(trap_exit, true),
|
||||||
|
|
||||||
|
[{DonorNode, DonorPort}, _] = ?config(cluster_nodes, Config),
|
||||||
|
|
||||||
|
{ok, C} = emqtt_connect([{clientid, <<"evacuated">>}, {port, DonorPort}]),
|
||||||
|
|
||||||
|
?assertWaitEvent(
|
||||||
|
ok = rpc:call(DonorNode, emqx_node_rebalance_evacuation, start, [opts(Config)]),
|
||||||
|
#{?snk_kind := node_evacuation_evict_conn},
|
||||||
|
1000
|
||||||
|
),
|
||||||
|
|
||||||
|
?assertMatch(
|
||||||
|
{error, {use_another_server, #{}}},
|
||||||
|
emqtt_try_connect([{clientid, <<"connecting">>}, {port, DonorPort}])
|
||||||
|
),
|
||||||
|
|
||||||
|
receive
|
||||||
|
{'EXIT', C, {disconnected, 156, _}} -> ok
|
||||||
|
after 1000 ->
|
||||||
|
ct:fail("Connection not evicted")
|
||||||
|
end.
|
||||||
|
|
||||||
|
t_migrate_to(Config) ->
|
||||||
|
[{DonorNode, _DonorPort}, {RecipientNode, _RecipientPort}] = ?config(cluster_nodes, Config),
|
||||||
|
|
||||||
|
?assertEqual(
|
||||||
|
[RecipientNode],
|
||||||
|
rpc:call(DonorNode, emqx_node_rebalance_evacuation, migrate_to, [undefined])
|
||||||
|
),
|
||||||
|
|
||||||
|
?assertEqual(
|
||||||
|
[],
|
||||||
|
rpc:call(DonorNode, emqx_node_rebalance_evacuation, migrate_to, [['unknown@node']])
|
||||||
|
),
|
||||||
|
|
||||||
|
ok = rpc:call(RecipientNode, emqx_eviction_agent, enable, [test_rebalance, undefined]),
|
||||||
|
|
||||||
|
?assertEqual(
|
||||||
|
[],
|
||||||
|
rpc:call(DonorNode, emqx_node_rebalance_evacuation, migrate_to, [undefined])
|
||||||
|
).
|
||||||
|
|
||||||
|
t_session_evicted(Config) ->
|
||||||
|
process_flag(trap_exit, true),
|
||||||
|
|
||||||
|
[{DonorNode, DonorPort}, {RecipientNode, _RecipientPort}] = ?config(cluster_nodes, Config),
|
||||||
|
|
||||||
|
{ok, C} = emqtt_connect([
|
||||||
|
{port, DonorPort}, {clientid, <<"client_with_sess">>}, {clean_start, false}
|
||||||
|
]),
|
||||||
|
|
||||||
|
?assertWaitEvent(
|
||||||
|
ok = rpc:call(DonorNode, emqx_node_rebalance_evacuation, start, [opts(Config)]),
|
||||||
|
#{?snk_kind := node_evacuation_evict_sess_over},
|
||||||
|
5000
|
||||||
|
),
|
||||||
|
|
||||||
|
receive
|
||||||
|
{'EXIT', C, {disconnected, ?RC_USE_ANOTHER_SERVER, _}} -> ok
|
||||||
|
after 1000 ->
|
||||||
|
ct:fail("Connection not evicted")
|
||||||
|
end,
|
||||||
|
|
||||||
|
[ChannelPid] = rpc:call(DonorNode, emqx_cm_registry, lookup_channels, [<<"client_with_sess">>]),
|
||||||
|
|
||||||
|
?assertEqual(
|
||||||
|
RecipientNode,
|
||||||
|
node(ChannelPid)
|
||||||
|
).
|
||||||
|
|
||||||
|
%%--------------------------------------------------------------------
|
||||||
|
%% Helpers
|
||||||
|
%%--------------------------------------------------------------------
|
||||||
|
|
||||||
|
opts(Config) ->
|
||||||
|
#{
|
||||||
|
server_reference => <<"srv">>,
|
||||||
|
conn_evict_rate => 10,
|
||||||
|
sess_evict_rate => 10,
|
||||||
|
wait_takeover => 1,
|
||||||
|
migrate_to => migrate_to(Config)
|
||||||
|
}.
|
||||||
|
|
||||||
|
migrate_to(Config) ->
|
||||||
|
case ?config(cluster_type, Config) of
|
||||||
|
one_node ->
|
||||||
|
[];
|
||||||
|
two_node ->
|
||||||
|
[_, {RecipientNode, _RecipientPort}] = ?config(cluster_nodes, Config),
|
||||||
|
[RecipientNode]
|
||||||
|
end.
|
||||||
|
|
||||||
|
case_specific_data_dir(Case, Config) ->
|
||||||
|
case ?config(priv_dir, Config) of
|
||||||
|
undefined -> undefined;
|
||||||
|
PrivDir -> filename:join(PrivDir, atom_to_list(Case))
|
||||||
|
end.
|
|
@ -0,0 +1,108 @@
|
||||||
|
%%--------------------------------------------------------------------
|
||||||
|
%% Copyright (c) 2022-2023 EMQ Technologies Co., Ltd. All Rights Reserved.
|
||||||
|
%%--------------------------------------------------------------------
|
||||||
|
|
||||||
|
-module(emqx_node_rebalance_evacuation_persist_SUITE).
|
||||||
|
|
||||||
|
-compile(export_all).
|
||||||
|
-compile(nowarn_export_all).
|
||||||
|
|
||||||
|
-include_lib("eunit/include/eunit.hrl").
|
||||||
|
-include_lib("common_test/include/ct.hrl").
|
||||||
|
|
||||||
|
all() ->
|
||||||
|
emqx_common_test_helpers:all(?MODULE).
|
||||||
|
|
||||||
|
init_per_suite(Config) ->
|
||||||
|
Config.
|
||||||
|
|
||||||
|
end_per_suite(_Config) ->
|
||||||
|
ok.
|
||||||
|
|
||||||
|
init_per_testcase(_Case, Config) ->
|
||||||
|
_ = emqx_node_rebalance_evacuation_persist:clear(),
|
||||||
|
Config.
|
||||||
|
|
||||||
|
end_per_testcase(_Case, _Config) ->
|
||||||
|
_ = emqx_node_rebalance_evacuation_persist:clear().
|
||||||
|
|
||||||
|
%%--------------------------------------------------------------------
|
||||||
|
%% Tests
|
||||||
|
%%--------------------------------------------------------------------
|
||||||
|
|
||||||
|
t_save_read(_Config) ->
|
||||||
|
DefaultOpts = #{
|
||||||
|
server_reference => <<"default_ref">>,
|
||||||
|
conn_evict_rate => 2001,
|
||||||
|
sess_evict_rate => 2002,
|
||||||
|
wait_takeover => 2003
|
||||||
|
},
|
||||||
|
|
||||||
|
Opts0 = #{
|
||||||
|
server_reference => <<"ref">>,
|
||||||
|
conn_evict_rate => 1001,
|
||||||
|
sess_evict_rate => 1002,
|
||||||
|
wait_takeover => 1003
|
||||||
|
},
|
||||||
|
ok = emqx_node_rebalance_evacuation_persist:save(Opts0),
|
||||||
|
|
||||||
|
{ok, ReadOpts0} = emqx_node_rebalance_evacuation_persist:read(DefaultOpts),
|
||||||
|
?assertEqual(Opts0, ReadOpts0),
|
||||||
|
|
||||||
|
Opts1 = Opts0#{server_reference => undefined},
|
||||||
|
ok = emqx_node_rebalance_evacuation_persist:save(Opts1),
|
||||||
|
|
||||||
|
{ok, ReadOpts1} = emqx_node_rebalance_evacuation_persist:read(DefaultOpts),
|
||||||
|
?assertEqual(Opts1, ReadOpts1).
|
||||||
|
|
||||||
|
t_read_default(_Config) ->
|
||||||
|
ok = write_evacuation_file(<<"{}">>),
|
||||||
|
|
||||||
|
DefaultOpts = #{
|
||||||
|
server_reference => <<"ref">>,
|
||||||
|
conn_evict_rate => 1001,
|
||||||
|
sess_evict_rate => 1002,
|
||||||
|
wait_takeover => 1003
|
||||||
|
},
|
||||||
|
|
||||||
|
{ok, ReadOpts} = emqx_node_rebalance_evacuation_persist:read(DefaultOpts),
|
||||||
|
?assertEqual(DefaultOpts, ReadOpts).
|
||||||
|
|
||||||
|
t_read_bad_data(_Config) ->
|
||||||
|
ok = write_evacuation_file(<<"{bad json">>),
|
||||||
|
|
||||||
|
DefaultOpts = #{
|
||||||
|
server_reference => <<"ref">>,
|
||||||
|
conn_evict_rate => 1001,
|
||||||
|
sess_evict_rate => 1002,
|
||||||
|
wait_takeover => 1003
|
||||||
|
},
|
||||||
|
|
||||||
|
{ok, ReadOpts} = emqx_node_rebalance_evacuation_persist:read(DefaultOpts),
|
||||||
|
?assertEqual(DefaultOpts, ReadOpts).
|
||||||
|
|
||||||
|
t_clear(_Config) ->
|
||||||
|
ok = write_evacuation_file(<<"{}">>),
|
||||||
|
|
||||||
|
?assertMatch(
|
||||||
|
{ok, _},
|
||||||
|
emqx_node_rebalance_evacuation_persist:read(#{})
|
||||||
|
),
|
||||||
|
|
||||||
|
ok = emqx_node_rebalance_evacuation_persist:clear(),
|
||||||
|
|
||||||
|
?assertEqual(
|
||||||
|
none,
|
||||||
|
emqx_node_rebalance_evacuation_persist:read(#{})
|
||||||
|
).
|
||||||
|
|
||||||
|
%%--------------------------------------------------------------------
|
||||||
|
%% Helpers
|
||||||
|
%%--------------------------------------------------------------------
|
||||||
|
|
||||||
|
write_evacuation_file(Json) ->
|
||||||
|
ok = filelib:ensure_dir(emqx_node_rebalance_evacuation_persist:evacuation_filepath()),
|
||||||
|
ok = file:write_file(
|
||||||
|
emqx_node_rebalance_evacuation_persist:evacuation_filepath(),
|
||||||
|
Json
|
||||||
|
).
|
|
@ -9,18 +9,7 @@
|
||||||
|
|
||||||
-include_lib("eunit/include/eunit.hrl").
|
-include_lib("eunit/include/eunit.hrl").
|
||||||
-include_lib("common_test/include/ct.hrl").
|
-include_lib("common_test/include/ct.hrl").
|
||||||
-include_lib("snabbkaffe/include/snabbkaffe.hrl").
|
-include_lib("emqx/include/asserts.hrl").
|
||||||
|
|
||||||
-define(assertWaitEvent(Code, EventMatch, Timeout),
|
|
||||||
?assertMatch(
|
|
||||||
{_, {ok, EventMatch}},
|
|
||||||
?wait_async_action(
|
|
||||||
Code,
|
|
||||||
EventMatch,
|
|
||||||
Timeout
|
|
||||||
)
|
|
||||||
)
|
|
||||||
).
|
|
||||||
|
|
||||||
all() -> emqx_common_test_helpers:all(?MODULE).
|
all() -> emqx_common_test_helpers:all(?MODULE).
|
||||||
|
|
||||||
|
|
|
@ -72,4 +72,6 @@ is_running_node(Node) ->
|
||||||
handle_result({ok, Result}) ->
|
handle_result({ok, Result}) ->
|
||||||
?OK(Result);
|
?OK(Result);
|
||||||
handle_result({error, Reason}) ->
|
handle_result({error, Reason}) ->
|
||||||
?BAD_REQUEST(Reason).
|
?BAD_REQUEST(Reason);
|
||||||
|
handle_result({HTTPCode, Content}) when is_integer(HTTPCode) ->
|
||||||
|
{HTTPCode, Content}.
|
||||||
|
|
|
@ -0,0 +1,2 @@
|
||||||
|
Add node rebalance/node evacuation functionality.
|
||||||
|
See also: [design doc](https://github.com/emqx/eip/blob/main/active/0020-node-rebalance.md)
|
2
mix.exs
2
mix.exs
|
@ -412,6 +412,8 @@ defmodule EMQXUmbrella.MixProject do
|
||||||
emqx_bridge_oracle: :permanent,
|
emqx_bridge_oracle: :permanent,
|
||||||
emqx_bridge_rabbitmq: :permanent,
|
emqx_bridge_rabbitmq: :permanent,
|
||||||
emqx_ee_schema_registry: :permanent,
|
emqx_ee_schema_registry: :permanent,
|
||||||
|
emqx_eviction_agent: :permanent,
|
||||||
|
emqx_node_rebalance: :permanent,
|
||||||
emqx_ft: :permanent
|
emqx_ft: :permanent
|
||||||
],
|
],
|
||||||
else: []
|
else: []
|
||||||
|
|
|
@ -481,6 +481,8 @@ relx_apps_per_edition(ee) ->
|
||||||
emqx_bridge_oracle,
|
emqx_bridge_oracle,
|
||||||
emqx_bridge_rabbitmq,
|
emqx_bridge_rabbitmq,
|
||||||
emqx_ee_schema_registry,
|
emqx_ee_schema_registry,
|
||||||
|
emqx_eviction_agent,
|
||||||
|
emqx_node_rebalance,
|
||||||
emqx_ft
|
emqx_ft
|
||||||
];
|
];
|
||||||
relx_apps_per_edition(ce) ->
|
relx_apps_per_edition(ce) ->
|
||||||
|
|
|
@ -0,0 +1,9 @@
|
||||||
|
emqx_eviction_agent_api {
|
||||||
|
|
||||||
|
node_eviction_status_get.desc:
|
||||||
|
"""Get the node eviction status"""
|
||||||
|
|
||||||
|
node_eviction_status_get.label:
|
||||||
|
"""Node Eviction Status"""
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,267 @@
|
||||||
|
emqx_node_rebalance_api {
|
||||||
|
|
||||||
|
load_rebalance_status.desc:
|
||||||
|
"""Get rebalance status of the current node"""
|
||||||
|
|
||||||
|
load_rebalance_status.label:
|
||||||
|
"""Get rebalance status"""
|
||||||
|
|
||||||
|
load_rebalance_global_status.desc:
|
||||||
|
"""Get status of all rebalance/evacuation processes across the cluster"""
|
||||||
|
|
||||||
|
load_rebalance_global_status.label:
|
||||||
|
"""Get global rebalance status"""
|
||||||
|
|
||||||
|
load_rebalance_availability_check.desc:
|
||||||
|
"""Check if the node is being evacuated or rebalanced"""
|
||||||
|
|
||||||
|
load_rebalance_availability_check.label:
|
||||||
|
"""Availability check"""
|
||||||
|
|
||||||
|
load_rebalance_start.desc:
|
||||||
|
"""Start rebalance process"""
|
||||||
|
|
||||||
|
load_rebalance_start.label:
|
||||||
|
"""Start rebalance"""
|
||||||
|
|
||||||
|
load_rebalance_stop.desc:
|
||||||
|
"""Stop rebalance process"""
|
||||||
|
|
||||||
|
load_rebalance_stop.label:
|
||||||
|
"""Stop rebalance"""
|
||||||
|
|
||||||
|
load_rebalance_evacuation_start.desc:
|
||||||
|
"""Start evacuation process"""
|
||||||
|
|
||||||
|
load_rebalance_evacuation_start.label:
|
||||||
|
"""Start evacuation"""
|
||||||
|
|
||||||
|
load_rebalance_evacuation_stop.desc:
|
||||||
|
"""Stop evacuation process"""
|
||||||
|
|
||||||
|
load_rebalance_evacuation_stop.label:
|
||||||
|
"""Stop evacuation"""
|
||||||
|
|
||||||
|
param_node.desc:
|
||||||
|
"""Node name"""
|
||||||
|
|
||||||
|
param_node.label:
|
||||||
|
"""Node name"""
|
||||||
|
|
||||||
|
wait_health_check.desc:
|
||||||
|
"""Time to wait before starting the rebalance process, in seconds"""
|
||||||
|
|
||||||
|
wait_health_check.label:
|
||||||
|
"""Wait health check"""
|
||||||
|
|
||||||
|
conn_evict_rate.desc:
|
||||||
|
"""The rate of evicting connections, in connections per second"""
|
||||||
|
|
||||||
|
conn_evict_rate.label:
|
||||||
|
"""Connection eviction rate"""
|
||||||
|
|
||||||
|
sess_evict_rate.desc:
|
||||||
|
"""The rate of evicting sessions, in sessions per second"""
|
||||||
|
|
||||||
|
sess_evict_rate.label:
|
||||||
|
"""Session eviction rate"""
|
||||||
|
|
||||||
|
abs_conn_threshold.desc:
|
||||||
|
"""Maximum desired difference between the number of connections on the node and the average number of connections on the recipient nodes. Difference lower than this is the goal of the rebalance process."""
|
||||||
|
|
||||||
|
abs_conn_threshold.label:
|
||||||
|
"""Absolute connection threshold"""
|
||||||
|
|
||||||
|
rel_conn_threshold.desc:
|
||||||
|
"""Maximum desired fraction between the number of connections on the node and the average number of connections on the recipient nodes. Fraction lower than this is the goal of the rebalance process."""
|
||||||
|
|
||||||
|
rel_conn_threshold.label:
|
||||||
|
"""Relative connection threshold"""
|
||||||
|
|
||||||
|
abs_sess_threshold.desc:
|
||||||
|
"""Maximum desired difference between the number of sessions on the node and the average number of sessions on the recipient nodes. Difference lower than this is the goal of the evacuation process."""
|
||||||
|
|
||||||
|
abs_sess_threshold.label:
|
||||||
|
"""Absolute session threshold"""
|
||||||
|
|
||||||
|
rel_sess_threshold.desc:
|
||||||
|
"""Maximum desired fraction between the number of sessions on the node and the average number of sessions on the recipient nodes. Fraction lower than this is the goal of the evacuation process"""
|
||||||
|
|
||||||
|
rel_sess_threshold.label:
|
||||||
|
"""Relative session threshold"""
|
||||||
|
|
||||||
|
wait_takeover.desc:
|
||||||
|
"""Time to wait before starting session evacuation process, in seconds"""
|
||||||
|
|
||||||
|
wait_takeover.label:
|
||||||
|
"""Wait takeover"""
|
||||||
|
|
||||||
|
redirect_to.desc:
|
||||||
|
"""Server reference to redirect clients to (MQTTv5 Server redirection)"""
|
||||||
|
|
||||||
|
redirect_to.label:
|
||||||
|
"""Redirect to"""
|
||||||
|
|
||||||
|
migrate_to.desc:
|
||||||
|
"""Nodes to migrate sessions to"""
|
||||||
|
|
||||||
|
migrate_to.label:
|
||||||
|
"""Migrate to"""
|
||||||
|
|
||||||
|
rebalance_nodes.desc:
|
||||||
|
"""Nodes to participate in rebalance"""
|
||||||
|
|
||||||
|
rebalance_nodes.label:
|
||||||
|
"""Rebalance nodes"""
|
||||||
|
|
||||||
|
local_status_enabled.desc:
|
||||||
|
"""Whether the node is being evacuated"""
|
||||||
|
|
||||||
|
local_status_enabled.label:
|
||||||
|
"""Local evacuation status"""
|
||||||
|
|
||||||
|
local_status_process.desc:
|
||||||
|
"""The type of the task that is being performed on the node: 'evacuation' or 'rebalance'"""
|
||||||
|
|
||||||
|
local_status_process.label:
|
||||||
|
"""Task Type"""
|
||||||
|
|
||||||
|
local_status_state.desc:
|
||||||
|
"""The state of the process that is being performed on the node"""
|
||||||
|
|
||||||
|
local_status_state.label:
|
||||||
|
"""Rebalance/evacuation current state"""
|
||||||
|
|
||||||
|
local_status_coordinator_node.desc:
|
||||||
|
"""The node that is coordinating rebalance process"""
|
||||||
|
|
||||||
|
local_status_coordinator_node.label:
|
||||||
|
"""Coordinator node"""
|
||||||
|
|
||||||
|
local_status_connection_eviction_rate.desc:
|
||||||
|
"""The rate of evicting connections, in connections per second"""
|
||||||
|
|
||||||
|
local_status_connection_eviction_rate.label:
|
||||||
|
"""Connection eviction rate"""
|
||||||
|
|
||||||
|
local_status_session_eviction_rate.desc:
|
||||||
|
"""The rate of evicting sessions, in sessions per second"""
|
||||||
|
|
||||||
|
local_status_session_eviction_rate.label:
|
||||||
|
"""Session eviction rate"""
|
||||||
|
|
||||||
|
local_status_connection_goal.desc:
|
||||||
|
"""The number of connections that the node should have after the rebalance/evacuation process"""
|
||||||
|
|
||||||
|
local_status_connection_goal.label:
|
||||||
|
"""Connection goal"""
|
||||||
|
|
||||||
|
local_status_session_goal.desc:
|
||||||
|
"""The number of sessions that the node should have after the evacuation process"""
|
||||||
|
|
||||||
|
local_status_session_goal.label:
|
||||||
|
"""Session goal"""
|
||||||
|
|
||||||
|
local_status_disconnected_session_goal.desc:
|
||||||
|
"""The number of disconnected sessions that the node should have after the rebalance process"""
|
||||||
|
|
||||||
|
local_status_disconnected_session_goal.label:
|
||||||
|
"""Disconnected session goal"""
|
||||||
|
|
||||||
|
local_status_session_recipients.desc:
|
||||||
|
"""List of nodes to which sessions are being evacuated"""
|
||||||
|
|
||||||
|
local_status_session_recipients.label:
|
||||||
|
"""Session recipients"""
|
||||||
|
|
||||||
|
local_status_recipients.desc:
|
||||||
|
"""List of nodes to which connections/sessions are being evacuated during rebalance"""
|
||||||
|
|
||||||
|
local_status_recipients.label:
|
||||||
|
"""Recipients"""
|
||||||
|
|
||||||
|
local_status_stats.desc:
|
||||||
|
"""Statistics of the evacuation/rebalance process"""
|
||||||
|
|
||||||
|
local_status_stats.label:
|
||||||
|
"""Statistics"""
|
||||||
|
|
||||||
|
status_stats_initial_connected.desc:
|
||||||
|
"""The number of connections on the node before the evacuation/rebalance process"""
|
||||||
|
|
||||||
|
status_stats_initial_connected.label:
|
||||||
|
"""Initial connected"""
|
||||||
|
|
||||||
|
status_stats_current_connected.desc:
|
||||||
|
"""Current number of connections on the node"""
|
||||||
|
|
||||||
|
status_stats_current_connected.label:
|
||||||
|
"""Current connections"""
|
||||||
|
|
||||||
|
status_stats_initial_sessions.desc:
|
||||||
|
"""The number of sessions on the node before the evacuation/rebalance process"""
|
||||||
|
|
||||||
|
status_stats_initial_sessions.label:
|
||||||
|
"""Initial sessions"""
|
||||||
|
|
||||||
|
status_stats_current_sessions.desc:
|
||||||
|
"""Current number of sessions on the node"""
|
||||||
|
|
||||||
|
status_stats_current_sessions.label:
|
||||||
|
"""Current sessions"""
|
||||||
|
|
||||||
|
status_stats_current_disconnected_sessions.desc:
|
||||||
|
"""Current number of disconnected sessions on the node"""
|
||||||
|
|
||||||
|
status_stats_current_disconnected_sessions.label:
|
||||||
|
"""Current disconnected sessions"""
|
||||||
|
|
||||||
|
coordinator_status_donors.desc:
|
||||||
|
"""List of nodes from which connections/sessions are being evacuated"""
|
||||||
|
|
||||||
|
coordinator_status_donors.label:
|
||||||
|
"""Donors"""
|
||||||
|
|
||||||
|
coordinator_status_donor_conn_avg.desc:
|
||||||
|
"""Average number of connections per donor node"""
|
||||||
|
|
||||||
|
coordinator_status_donor_conn_avg.label:
|
||||||
|
"""Donor connections average"""
|
||||||
|
|
||||||
|
coordinator_status_donor_sess_avg.desc:
|
||||||
|
"""Average number of sessions per donor node"""
|
||||||
|
|
||||||
|
coordinator_status_donor_sess_avg.label:
|
||||||
|
"""Donor sessions average"""
|
||||||
|
|
||||||
|
coordinator_status_node.desc:
|
||||||
|
"""The node that is coordinating the evacuation/rebalance process"""
|
||||||
|
|
||||||
|
coordinator_status_node.label:
|
||||||
|
"""Coordinator node"""
|
||||||
|
|
||||||
|
evacuation_status_node.desc:
|
||||||
|
"""The node that is being evacuated"""
|
||||||
|
|
||||||
|
evacuation_status_node.label:
|
||||||
|
"""Evacuated node"""
|
||||||
|
|
||||||
|
global_status_evacuations.desc:
|
||||||
|
"""List of nodes that are being evacuated"""
|
||||||
|
|
||||||
|
global_status_evacuations.label:
|
||||||
|
"""Evacuations"""
|
||||||
|
|
||||||
|
global_status_rebalances.desc:
|
||||||
|
"""List of nodes that coordinate a rebalance"""
|
||||||
|
|
||||||
|
global_status_rebalances.label:
|
||||||
|
"""Rebalances"""
|
||||||
|
|
||||||
|
empty_response.desc:
|
||||||
|
"""The response is empty"""
|
||||||
|
|
||||||
|
empty_response.label:
|
||||||
|
"""Empty response"""
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,9 @@
|
||||||
|
emqx_eviction_agent_api {
|
||||||
|
|
||||||
|
node_eviction_status_get.desc:
|
||||||
|
"""获取节点驱逐状态"""
|
||||||
|
|
||||||
|
node_eviction_status_get.label:
|
||||||
|
"""节点驱逐状态"""
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,266 @@
|
||||||
|
emqx_node_rebalance_api {
|
||||||
|
|
||||||
|
load_rebalance_status.desc:
|
||||||
|
"""获取当前节点的重平衡状态"""
|
||||||
|
|
||||||
|
load_rebalance_status.label:
|
||||||
|
"""获取重平衡状态"""
|
||||||
|
|
||||||
|
load_rebalance_global_status.desc:
|
||||||
|
"""获取集群中所有重平衡/疏散任务的状态"""
|
||||||
|
|
||||||
|
load_rebalance_global_status.label:
|
||||||
|
"""获取全局重平衡状态"""
|
||||||
|
|
||||||
|
load_rebalance_availability_check.desc:
|
||||||
|
"""检查节点是否正在被执行重平衡或疏散"""
|
||||||
|
|
||||||
|
load_rebalance_availability_check.label:
|
||||||
|
"""可用性检查"""
|
||||||
|
|
||||||
|
load_rebalance_start.desc:
|
||||||
|
"""启动重平衡任务"""
|
||||||
|
|
||||||
|
load_rebalance_start.label:
|
||||||
|
"""启动重平衡"""
|
||||||
|
|
||||||
|
load_rebalance_stop.desc:
|
||||||
|
"""停止重平衡任务"""
|
||||||
|
|
||||||
|
load_rebalance_stop.label:
|
||||||
|
"""停止重平衡"""
|
||||||
|
|
||||||
|
load_rebalance_evacuation_start.desc:
|
||||||
|
"""启动疏散任务"""
|
||||||
|
|
||||||
|
load_rebalance_evacuation_start.label:
|
||||||
|
"""启动疏散"""
|
||||||
|
|
||||||
|
load_rebalance_evacuation_stop.desc:
|
||||||
|
"""停止疏散任务"""
|
||||||
|
|
||||||
|
load_rebalance_evacuation_stop.label:
|
||||||
|
"""停止疏散"""
|
||||||
|
|
||||||
|
param_node.desc:
|
||||||
|
"""节点名称"""
|
||||||
|
|
||||||
|
param_node.label:
|
||||||
|
"""节点名称"""
|
||||||
|
|
||||||
|
wait_health_check.desc:
|
||||||
|
"""启动重平衡任务前等待的时间,单位为秒"""
|
||||||
|
|
||||||
|
wait_health_check.label:
|
||||||
|
"""等待健康检查"""
|
||||||
|
|
||||||
|
conn_evict_rate.desc:
|
||||||
|
"""每秒迁出连接数"""
|
||||||
|
|
||||||
|
conn_evict_rate.label:
|
||||||
|
"""迁出速率"""
|
||||||
|
|
||||||
|
sess_evict_rate.desc:
|
||||||
|
"""每秒迁出会话数"""
|
||||||
|
|
||||||
|
sess_evict_rate.label:
|
||||||
|
"""会话迁出速率"""
|
||||||
|
|
||||||
|
abs_conn_threshold.desc:
|
||||||
|
"""当前节点上的连接数与迁入节点上的平均连接数的差值(绝对值)上限,低于该差值时停止迁移连接。"""
|
||||||
|
|
||||||
|
abs_conn_threshold.label:
|
||||||
|
"""连接数差值"""
|
||||||
|
|
||||||
|
rel_conn_threshold.desc:
|
||||||
|
"""当前节点上的连接数与迁入节点上的平均连接数的比值上限,低于该比值时停止迁移连接。"""
|
||||||
|
|
||||||
|
rel_conn_threshold.label:
|
||||||
|
"""连接数比值"""
|
||||||
|
|
||||||
|
abs_sess_threshold.desc:
|
||||||
|
"""当前节点上的会话数与迁入节点上的平均会话数之间的差值(绝对值)上限,低于该差值时停止迁移会话。"""
|
||||||
|
|
||||||
|
abs_sess_threshold.label:
|
||||||
|
"""会话数差值"""
|
||||||
|
|
||||||
|
rel_sess_threshold.desc:
|
||||||
|
"""当前节点上的会话数与迁入节点上的平均会话数的比值上限,低于该比值时停止迁移会话。"""
|
||||||
|
|
||||||
|
rel_sess_threshold.label:
|
||||||
|
"""会话数比值"""
|
||||||
|
|
||||||
|
wait_takeover.desc:
|
||||||
|
"""开始会话疏散任务之前的等待时间,以秒为单位"""
|
||||||
|
|
||||||
|
wait_takeover.label:
|
||||||
|
"""等待接管"""
|
||||||
|
|
||||||
|
redirect_to.desc:
|
||||||
|
"""将客户端重定向到的服务器参考(MQTTv5 服务器重定向)"""
|
||||||
|
|
||||||
|
redirect_to.label:
|
||||||
|
"""重定向至"""
|
||||||
|
|
||||||
|
migrate_to.desc:
|
||||||
|
"""接受会话迁入的节点"""
|
||||||
|
|
||||||
|
migrate_to.label:
|
||||||
|
"""迁入节点"""
|
||||||
|
|
||||||
|
rebalance_nodes.desc:
|
||||||
|
"""参与重平衡的节点"""
|
||||||
|
|
||||||
|
rebalance_nodes.label:
|
||||||
|
"""重新平衡节点"""
|
||||||
|
|
||||||
|
local_status_enabled.desc:
|
||||||
|
"""节点是否正在执行重平衡疏散任务"""
|
||||||
|
|
||||||
|
local_status_enabled.label:
|
||||||
|
"""运行状态"""
|
||||||
|
|
||||||
|
local_status_process.desc:
|
||||||
|
"""正在节点上执行的任务:'evacuation' 或 'rebalance'"""
|
||||||
|
|
||||||
|
local_status_process.label:
|
||||||
|
"""节点任务"""
|
||||||
|
|
||||||
|
local_status_state.desc:
|
||||||
|
"""正在节点上执行的任务的状态"""
|
||||||
|
|
||||||
|
local_status_state.label:
|
||||||
|
"""重新平衡/疏散当前状态"""
|
||||||
|
|
||||||
|
local_status_coordinator_node.desc:
|
||||||
|
"""协调分配重平衡任务的节点"""
|
||||||
|
|
||||||
|
local_status_coordinator_node.label:
|
||||||
|
"""协调节点"""
|
||||||
|
|
||||||
|
local_status_connection_eviction_rate.desc:
|
||||||
|
"""每秒迁出的连接数"""
|
||||||
|
|
||||||
|
local_status_connection_eviction_rate.label:
|
||||||
|
"""连接迁出速率"""
|
||||||
|
|
||||||
|
local_status_session_eviction_rate.desc:
|
||||||
|
"""每秒迁出的会话数"""
|
||||||
|
|
||||||
|
local_status_session_eviction_rate.label:
|
||||||
|
"""会话迁出速率"""
|
||||||
|
|
||||||
|
local_status_connection_goal.desc:
|
||||||
|
"""节点在重新平衡/疏散任务完成后预期拥有的连接数"""
|
||||||
|
|
||||||
|
local_status_connection_goal.label:
|
||||||
|
"""连接数目标"""
|
||||||
|
|
||||||
|
local_status_session_goal.desc:
|
||||||
|
"""疏散任务完成后节点预期的会话数"""
|
||||||
|
|
||||||
|
local_status_session_goal.label:
|
||||||
|
"""会话数目标"""
|
||||||
|
|
||||||
|
local_status_disconnected_session_goal.desc:
|
||||||
|
"""重新平衡任务完成后节点预期的无连接的会话数"""
|
||||||
|
|
||||||
|
local_status_disconnected_session_goal.label:
|
||||||
|
"""预期无连接会话数"""
|
||||||
|
|
||||||
|
local_status_session_recipients.desc:
|
||||||
|
"""会话被迁入的节点列表"""
|
||||||
|
|
||||||
|
local_status_session_recipients.label:
|
||||||
|
"""会话迁入节点"""
|
||||||
|
|
||||||
|
local_status_recipients.desc:
|
||||||
|
"""在重新平衡期间接受连接/会话迁入的节点列表"""
|
||||||
|
|
||||||
|
local_status_recipients.label:
|
||||||
|
"""接受迁入节点"""
|
||||||
|
|
||||||
|
local_status_stats.desc:
|
||||||
|
"""疏散/重平衡的统计"""
|
||||||
|
|
||||||
|
local_status_stats.label:
|
||||||
|
"""统计数据"""
|
||||||
|
|
||||||
|
status_stats_initial_connected.desc:
|
||||||
|
"""疏散/重新平衡任务开始之前节点上的连接数"""
|
||||||
|
|
||||||
|
status_stats_initial_connected.label:
|
||||||
|
"""初始连接"""
|
||||||
|
|
||||||
|
status_stats_current_connected.desc:
|
||||||
|
"""节点上的当前连接数"""
|
||||||
|
|
||||||
|
status_stats_current_connected.label:
|
||||||
|
"""当前连接"""
|
||||||
|
|
||||||
|
status_stats_initial_sessions.desc:
|
||||||
|
"""疏散/重新平衡任务开始之前节点上的会话数"""
|
||||||
|
|
||||||
|
status_stats_initial_sessions.label:
|
||||||
|
"""初始会话"""
|
||||||
|
|
||||||
|
status_stats_current_sessions.desc:
|
||||||
|
"""节点上的当前会话数"""
|
||||||
|
|
||||||
|
status_stats_current_sessions.label:
|
||||||
|
"""当前会话"""
|
||||||
|
|
||||||
|
status_stats_current_disconnected_sessions.desc:
|
||||||
|
"""节点上当前无连接的会话数"""
|
||||||
|
|
||||||
|
status_stats_current_disconnected_sessions.label:
|
||||||
|
"""当前无连接会话"""
|
||||||
|
|
||||||
|
coordinator_status_donors.desc:
|
||||||
|
"""正在迁出连接/会话的节点列表"""
|
||||||
|
|
||||||
|
coordinator_status_donors.label:
|
||||||
|
"""迁出节点"""
|
||||||
|
|
||||||
|
coordinator_status_donor_conn_avg.desc:
|
||||||
|
"""每个迁出节点的平均连接数"""
|
||||||
|
|
||||||
|
coordinator_status_donor_conn_avg.label:
|
||||||
|
"""迁出节点连接平均值"""
|
||||||
|
|
||||||
|
coordinator_status_donor_sess_avg.desc:
|
||||||
|
"""每个迁出节点的平均会话数"""
|
||||||
|
|
||||||
|
coordinator_status_donor_sess_avg.label:
|
||||||
|
"""迁出节点会话平均数"""
|
||||||
|
|
||||||
|
coordinator_status_node.desc:
|
||||||
|
"""协调分配疏散/重平衡任务的节点"""
|
||||||
|
|
||||||
|
coordinator_status_node.label:
|
||||||
|
"""协调节点"""
|
||||||
|
|
||||||
|
evacuation_status_node.desc:
|
||||||
|
"""正在迁出的节点"""
|
||||||
|
|
||||||
|
evacuation_status_node.label:
|
||||||
|
"""疏散节点"""
|
||||||
|
|
||||||
|
global_status_evacuations.desc:
|
||||||
|
"""正在迁出的节点列表"""
|
||||||
|
|
||||||
|
global_status_evacuations.label:
|
||||||
|
"""疏散"""
|
||||||
|
|
||||||
|
global_status_rebalances.desc:
|
||||||
|
"""协调重平衡的节点列表"""
|
||||||
|
|
||||||
|
global_status_rebalances.label:
|
||||||
|
"""重平衡"""
|
||||||
|
|
||||||
|
empty_response.desc:
|
||||||
|
"""响应为空"""
|
||||||
|
|
||||||
|
empty_response.label:
|
||||||
|
"""空响应"""
|
||||||
|
}
|
Loading…
Reference in New Issue