feat(rebalance): port apps from 4.x
This commit is contained in:
parent
954eef8f39
commit
609f7bd8fd
1
Makefile
1
Makefile
|
@ -179,6 +179,7 @@ clean-all:
|
|||
@rm -f rebar.lock
|
||||
@rm -rf deps
|
||||
@rm -rf _build
|
||||
@rm -f emqx_dialyzer_*_plt
|
||||
|
||||
.PHONY: deps-all
|
||||
deps-all: $(REBAR) $(PROFILES:%=deps-%)
|
||||
|
|
|
@ -0,0 +1,31 @@
|
|||
%%--------------------------------------------------------------------
|
||||
%% Copyright (c) 2023 EMQ Technologies Co., Ltd. All Rights Reserved.
|
||||
%%
|
||||
%% Licensed under the Apache License, Version 2.0 (the "License");
|
||||
%% you may not use this file except in compliance with the License.
|
||||
%% You may obtain a copy of the License at
|
||||
%%
|
||||
%% http://www.apache.org/licenses/LICENSE-2.0
|
||||
%%
|
||||
%% Unless required by applicable law or agreed to in writing, software
|
||||
%% distributed under the License is distributed on an "AS IS" BASIS,
|
||||
%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
%% See the License for the specific language governing permissions and
|
||||
%% limitations under the License.
|
||||
%%--------------------------------------------------------------------
|
||||
|
||||
%% This file contains common macros for testing.
|
||||
%% It must not be used anywhere except in test suites.
|
||||
|
||||
-include_lib("snabbkaffe/include/snabbkaffe.hrl").
|
||||
|
||||
-define(assertWaitEvent(Code, EventMatch, Timeout),
|
||||
?assertMatch(
|
||||
{_, {ok, EventMatch}},
|
||||
?wait_async_action(
|
||||
Code,
|
||||
EventMatch,
|
||||
Timeout
|
||||
)
|
||||
)
|
||||
).
|
|
@ -0,0 +1,42 @@
|
|||
%%--------------------------------------------------------------------
|
||||
%% Copyright (c) 2017-2023 EMQ Technologies Co., Ltd. All Rights Reserved.
|
||||
%%
|
||||
%% Licensed under the Apache License, Version 2.0 (the "License");
|
||||
%% you may not use this file except in compliance with the License.
|
||||
%% You may obtain a copy of the License at
|
||||
%%
|
||||
%% http://www.apache.org/licenses/LICENSE-2.0
|
||||
%%
|
||||
%% Unless required by applicable law or agreed to in writing, software
|
||||
%% distributed under the License is distributed on an "AS IS" BASIS,
|
||||
%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
%% See the License for the specific language governing permissions and
|
||||
%% limitations under the License.
|
||||
%%--------------------------------------------------------------------
|
||||
|
||||
-define(CHANNEL_METRICS, [
|
||||
recv_pkt,
|
||||
recv_msg,
|
||||
'recv_msg.qos0',
|
||||
'recv_msg.qos1',
|
||||
'recv_msg.qos2',
|
||||
'recv_msg.dropped',
|
||||
'recv_msg.dropped.await_pubrel_timeout',
|
||||
send_pkt,
|
||||
send_msg,
|
||||
'send_msg.qos0',
|
||||
'send_msg.qos1',
|
||||
'send_msg.qos2',
|
||||
'send_msg.dropped',
|
||||
'send_msg.dropped.expired',
|
||||
'send_msg.dropped.queue_full',
|
||||
'send_msg.dropped.too_large'
|
||||
]).
|
||||
|
||||
-define(INFO_KEYS, [
|
||||
conninfo,
|
||||
conn_state,
|
||||
clientinfo,
|
||||
session,
|
||||
will_msg
|
||||
]).
|
|
@ -34,6 +34,7 @@
|
|||
-define(HP_BRIDGE, 870).
|
||||
-define(HP_DELAY_PUB, 860).
|
||||
%% apps that can stop the hooks chain from continuing
|
||||
-define(HP_NODE_REBALANCE, 110).
|
||||
-define(HP_EXHOOK, 100).
|
||||
|
||||
%% == Lowest Priority = 0, don't change this value as the plugins may depend on it.
|
||||
|
|
|
@ -13,6 +13,7 @@
|
|||
{emqx_conf,2}.
|
||||
{emqx_dashboard,1}.
|
||||
{emqx_delayed,1}.
|
||||
{emqx_eviction_agent,1}.
|
||||
{emqx_exhook,1}.
|
||||
{emqx_gateway_api_listeners,1}.
|
||||
{emqx_gateway_cm,1}.
|
||||
|
@ -26,6 +27,10 @@
|
|||
{emqx_mgmt_cluster,1}.
|
||||
{emqx_mgmt_trace,1}.
|
||||
{emqx_mgmt_trace,2}.
|
||||
{emqx_node_rebalance,1}.
|
||||
{emqx_node_rebalance_api,1}.
|
||||
{emqx_node_rebalance_evacuation,1}.
|
||||
{emqx_node_rebalance_status,1}.
|
||||
{emqx_persistent_session,1}.
|
||||
{emqx_plugin_libs,1}.
|
||||
{emqx_plugins,1}.
|
||||
|
|
|
@ -18,6 +18,7 @@
|
|||
-module(emqx_channel).
|
||||
|
||||
-include("emqx.hrl").
|
||||
-include("emqx_channel.hrl").
|
||||
-include("emqx_mqtt.hrl").
|
||||
-include("logger.hrl").
|
||||
-include("types.hrl").
|
||||
|
@ -57,6 +58,12 @@
|
|||
clear_keepalive/1
|
||||
]).
|
||||
|
||||
%% Export for emqx_channel implementations
|
||||
-export([
|
||||
maybe_nack/1,
|
||||
maybe_mark_as_delivered/2
|
||||
]).
|
||||
|
||||
%% Exports for CT
|
||||
-export([set_field/3]).
|
||||
|
||||
|
@ -69,7 +76,7 @@
|
|||
]
|
||||
).
|
||||
|
||||
-export_type([channel/0, opts/0]).
|
||||
-export_type([channel/0, opts/0, conn_state/0]).
|
||||
|
||||
-record(channel, {
|
||||
%% MQTT ConnInfo
|
||||
|
@ -131,33 +138,6 @@
|
|||
quota_timer => expire_quota_limit
|
||||
}).
|
||||
|
||||
-define(CHANNEL_METRICS, [
|
||||
recv_pkt,
|
||||
recv_msg,
|
||||
'recv_msg.qos0',
|
||||
'recv_msg.qos1',
|
||||
'recv_msg.qos2',
|
||||
'recv_msg.dropped',
|
||||
'recv_msg.dropped.await_pubrel_timeout',
|
||||
send_pkt,
|
||||
send_msg,
|
||||
'send_msg.qos0',
|
||||
'send_msg.qos1',
|
||||
'send_msg.qos2',
|
||||
'send_msg.dropped',
|
||||
'send_msg.dropped.expired',
|
||||
'send_msg.dropped.queue_full',
|
||||
'send_msg.dropped.too_large'
|
||||
]).
|
||||
|
||||
-define(INFO_KEYS, [
|
||||
conninfo,
|
||||
conn_state,
|
||||
clientinfo,
|
||||
session,
|
||||
will_msg
|
||||
]).
|
||||
|
||||
-define(LIMITER_ROUTING, message_routing).
|
||||
|
||||
-dialyzer({no_match, [shutdown/4, ensure_timer/2, interval/2]}).
|
||||
|
@ -1078,10 +1058,12 @@ handle_out(unsuback, {PacketId, _ReasonCodes}, Channel) ->
|
|||
handle_out(disconnect, ReasonCode, Channel) when is_integer(ReasonCode) ->
|
||||
ReasonName = disconnect_reason(ReasonCode),
|
||||
handle_out(disconnect, {ReasonCode, ReasonName}, Channel);
|
||||
handle_out(disconnect, {ReasonCode, ReasonName}, Channel = ?IS_MQTT_V5) ->
|
||||
Packet = ?DISCONNECT_PACKET(ReasonCode),
|
||||
handle_out(disconnect, {ReasonCode, ReasonName}, Channel) ->
|
||||
handle_out(disconnect, {ReasonCode, ReasonName, #{}}, Channel);
|
||||
handle_out(disconnect, {ReasonCode, ReasonName, Props}, Channel = ?IS_MQTT_V5) ->
|
||||
Packet = ?DISCONNECT_PACKET(ReasonCode, Props),
|
||||
{ok, [{outgoing, Packet}, {close, ReasonName}], Channel};
|
||||
handle_out(disconnect, {_ReasonCode, ReasonName}, Channel) ->
|
||||
handle_out(disconnect, {_ReasonCode, ReasonName, _Props}, Channel) ->
|
||||
{ok, {close, ReasonName}, Channel};
|
||||
handle_out(auth, {ReasonCode, Properties}, Channel) ->
|
||||
{ok, ?AUTH_PACKET(ReasonCode, Properties), Channel};
|
||||
|
@ -1198,13 +1180,19 @@ handle_call(
|
|||
{takeover, 'end'},
|
||||
Channel = #channel{
|
||||
session = Session,
|
||||
pendings = Pendings
|
||||
pendings = Pendings,
|
||||
conninfo = #{clientid := ClientId}
|
||||
}
|
||||
) ->
|
||||
ok = emqx_session:takeover(Session),
|
||||
%% TODO: Should not drain deliver here (side effect)
|
||||
Delivers = emqx_utils:drain_deliver(),
|
||||
AllPendings = lists:append(Delivers, Pendings),
|
||||
?tp(
|
||||
debug,
|
||||
emqx_channel_takeover_end,
|
||||
#{clientid => ClientId}
|
||||
),
|
||||
disconnect_and_shutdown(takenover, AllPendings, Channel);
|
||||
handle_call(list_authz_cache, Channel) ->
|
||||
{reply, emqx_authz_cache:list_authz_cache(), Channel};
|
||||
|
@ -1276,6 +1264,8 @@ handle_info(die_if_test = Info, Channel) ->
|
|||
die_if_test_compiled(),
|
||||
?SLOG(error, #{msg => "unexpected_info", info => Info}),
|
||||
{ok, Channel};
|
||||
handle_info({disconnect, ReasonCode, ReasonName, Props}, Channel) ->
|
||||
handle_out(disconnect, {ReasonCode, ReasonName, Props}, Channel);
|
||||
handle_info(Info, Channel) ->
|
||||
?SLOG(error, #{msg => "unexpected_info", info => Info}),
|
||||
{ok, Channel}.
|
||||
|
|
|
@ -23,6 +23,8 @@
|
|||
-include("logger.hrl").
|
||||
-include("types.hrl").
|
||||
-include_lib("snabbkaffe/include/snabbkaffe.hrl").
|
||||
-include_lib("stdlib/include/qlc.hrl").
|
||||
-include_lib("stdlib/include/ms_transform.hrl").
|
||||
|
||||
-export([start_link/0]).
|
||||
|
||||
|
@ -72,6 +74,12 @@
|
|||
get_session_confs/2
|
||||
]).
|
||||
|
||||
%% Client management
|
||||
-export([
|
||||
channel_with_session_table/1,
|
||||
live_connection_table/1
|
||||
]).
|
||||
|
||||
%% gen_server callbacks
|
||||
-export([
|
||||
init/1,
|
||||
|
@ -593,6 +601,40 @@ all_channels() ->
|
|||
Pat = [{{'_', '$1'}, [], ['$1']}],
|
||||
ets:select(?CHAN_TAB, Pat).
|
||||
|
||||
%% @doc Get clientinfo for all clients with sessions
|
||||
channel_with_session_table(ConnModules) ->
|
||||
Ms = ets:fun2ms(
|
||||
fun({{ClientId, _ChanPid}, Info, _Stats}) ->
|
||||
{ClientId, Info}
|
||||
end
|
||||
),
|
||||
Table = ets:table(?CHAN_INFO_TAB, [{traverse, {select, Ms}}]),
|
||||
ConnModuleMap = maps:from_list([{Mod, true} || Mod <- ConnModules]),
|
||||
qlc:q([
|
||||
{ClientId, ConnState, ConnInfo, ClientInfo}
|
||||
|| {ClientId, #{
|
||||
conn_state := ConnState,
|
||||
clientinfo := ClientInfo,
|
||||
conninfo := #{clean_start := false, conn_mod := ConnModule} = ConnInfo
|
||||
}} <-
|
||||
Table,
|
||||
maps:is_key(ConnModule, ConnModuleMap)
|
||||
]).
|
||||
|
||||
%% @doc Get all local connection query handle
|
||||
live_connection_table(ConnModules) ->
|
||||
Ms = lists:map(fun live_connection_ms/1, ConnModules),
|
||||
Table = ets:table(?CHAN_CONN_TAB, [{traverse, {select, Ms}}]),
|
||||
qlc:q([{ClientId, ChanPid} || {ClientId, ChanPid} <- Table, is_channel_connected(ChanPid)]).
|
||||
|
||||
live_connection_ms(ConnModule) ->
|
||||
{{{'$1', '$2'}, ConnModule}, [], [{{'$1', '$2'}}]}.
|
||||
|
||||
is_channel_connected(ChanPid) when node(ChanPid) =:= node() ->
|
||||
ets:member(?CHAN_LIVE_TAB, ChanPid);
|
||||
is_channel_connected(_ChanPid) ->
|
||||
false.
|
||||
|
||||
%% @doc Get all registered clientIDs. Debug/test interface
|
||||
all_client_ids() ->
|
||||
Pat = [{{'$1', '_'}, [], ['$1']}],
|
||||
|
@ -693,7 +735,8 @@ code_change(_OldVsn, State, _Extra) ->
|
|||
%%--------------------------------------------------------------------
|
||||
|
||||
clean_down({ChanPid, ClientId}) ->
|
||||
do_unregister_channel({ClientId, ChanPid}).
|
||||
do_unregister_channel({ClientId, ChanPid}),
|
||||
ok = ?tp(debug, emqx_cm_clean_down, #{client_id => ClientId}).
|
||||
|
||||
stats_fun() ->
|
||||
lists:foreach(fun update_stats/1, ?CHAN_STATS).
|
||||
|
@ -719,12 +762,12 @@ get_chann_conn_mod(ClientId, ChanPid) ->
|
|||
wrap_rpc(emqx_cm_proto_v1:get_chann_conn_mod(ClientId, ChanPid)).
|
||||
|
||||
mark_channel_connected(ChanPid) ->
|
||||
?tp(emqx_cm_connected_client_count_inc, #{}),
|
||||
?tp(emqx_cm_connected_client_count_inc, #{chan_pid => ChanPid}),
|
||||
ets:insert_new(?CHAN_LIVE_TAB, {ChanPid, true}),
|
||||
ok.
|
||||
|
||||
mark_channel_disconnected(ChanPid) ->
|
||||
?tp(emqx_cm_connected_client_count_dec, #{}),
|
||||
?tp(emqx_cm_connected_client_count_dec, #{chan_pid => ChanPid}),
|
||||
ets:delete(?CHAN_LIVE_TAB, ChanPid),
|
||||
ok.
|
||||
|
||||
|
|
|
@ -167,9 +167,15 @@ handle_info(Info, State) ->
|
|||
{noreply, State}.
|
||||
|
||||
terminate(_Reason, _State) ->
|
||||
ok = ekka:unmonitor(membership),
|
||||
emqx_stats:cancel_update(route_stats),
|
||||
mnesia:unsubscribe({table, ?ROUTING_NODE, simple}).
|
||||
try
|
||||
ok = ekka:unmonitor(membership),
|
||||
emqx_stats:cancel_update(route_stats),
|
||||
mnesia:unsubscribe({table, ?ROUTING_NODE, simple})
|
||||
catch
|
||||
exit:{noproc, {gen_server, call, [mria_membership, _]}} ->
|
||||
?SLOG(warning, #{msg => "mria_membership_down"}),
|
||||
ok
|
||||
end.
|
||||
|
||||
code_change(_OldVsn, State, _Extra) ->
|
||||
{ok, State}.
|
||||
|
|
|
@ -20,6 +20,7 @@
|
|||
set_default_config/0,
|
||||
set_default_config/1,
|
||||
set_default_config/2,
|
||||
set_default_config/3,
|
||||
request/2,
|
||||
request/3,
|
||||
request/4,
|
||||
|
@ -40,11 +41,14 @@ set_default_config(DefaultUsername) ->
|
|||
set_default_config(DefaultUsername, false).
|
||||
|
||||
set_default_config(DefaultUsername, HAProxyEnabled) ->
|
||||
set_default_config(DefaultUsername, HAProxyEnabled, #{}).
|
||||
|
||||
set_default_config(DefaultUsername, HAProxyEnabled, Opts) ->
|
||||
Config = #{
|
||||
listeners => #{
|
||||
http => #{
|
||||
enable => true,
|
||||
bind => 18083,
|
||||
bind => maps:get(bind, Opts, 18083),
|
||||
inet6 => false,
|
||||
ipv6_v6only => false,
|
||||
max_connections => 512,
|
||||
|
|
|
@ -149,8 +149,14 @@ basic_reboot_apps() ->
|
|||
emqx_plugins
|
||||
],
|
||||
case emqx_release:edition() of
|
||||
ce -> CE;
|
||||
ee -> CE ++ []
|
||||
ce ->
|
||||
CE;
|
||||
ee ->
|
||||
CE ++
|
||||
[
|
||||
emqx_eviction_agent,
|
||||
emqx_node_rebalance
|
||||
]
|
||||
end.
|
||||
|
||||
sorted_reboot_apps() ->
|
||||
|
|
|
@ -0,0 +1 @@
|
|||
Add node rebalance/node evacuation [functionality](https://github.com/emqx/eip/blob/main/active/0020-node-rebalance.md).
|
|
@ -0,0 +1 @@
|
|||
添加节点再平衡/节点疏散[功能](https://github.com/emqx/eip/blob/main/active/0020-node-rebalance.md)。
|
|
@ -0,0 +1,9 @@
|
|||
emqx_eviction_agent
|
||||
=====
|
||||
|
||||
An OTP library
|
||||
|
||||
Build
|
||||
-----
|
||||
|
||||
$ rebar3 compile
|
|
@ -0,0 +1,3 @@
|
|||
##--------------------------------------------------------------------
|
||||
## EMQX Eviction Agent Plugin
|
||||
##--------------------------------------------------------------------
|
|
@ -0,0 +1,14 @@
|
|||
emqx_eviction_agent_api {
|
||||
|
||||
node_eviction_status_get {
|
||||
desc {
|
||||
en: "Get the node eviction status"
|
||||
zh: "获取节点驱逐状态"
|
||||
}
|
||||
label {
|
||||
en: "Node Eviction Status"
|
||||
zh: "节点驱逐状态"
|
||||
}
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,2 @@
|
|||
{deps, [{emqx, {path, "../../apps/emqx"}}]}.
|
||||
{project_plugins, [erlfmt]}.
|
|
@ -0,0 +1,22 @@
|
|||
{application, emqx_eviction_agent, [
|
||||
{description, "EMQX Eviction Agent"},
|
||||
{vsn, "5.0.0"},
|
||||
{registered, [
|
||||
emqx_eviction_agent_sup,
|
||||
emqx_eviction_agent,
|
||||
emqx_eviction_agent_conn_sup
|
||||
]},
|
||||
{applications, [
|
||||
kernel,
|
||||
stdlib,
|
||||
emqx_ctl
|
||||
]},
|
||||
{mod, {emqx_eviction_agent_app, []}},
|
||||
{env, []},
|
||||
{modules, []},
|
||||
{maintainers, ["EMQX Team <contact@emqx.io>"]},
|
||||
{links, [
|
||||
{"Homepage", "https://emqx.io/"},
|
||||
{"Github", "https://github.com/emqx"}
|
||||
]}
|
||||
]}.
|
|
@ -0,0 +1,3 @@
|
|||
%% -*- mode: erlang -*-
|
||||
%% Unless you know what you are doing, DO NOT edit manually!!
|
||||
{VSN, [{<<".*">>, []}], [{<<".*">>, []}]}.
|
|
@ -0,0 +1,346 @@
|
|||
%%--------------------------------------------------------------------
|
||||
%% Copyright (c) 2022-2023 EMQ Technologies Co., Ltd. All Rights Reserved.
|
||||
%%--------------------------------------------------------------------
|
||||
|
||||
-module(emqx_eviction_agent).
|
||||
|
||||
-include_lib("emqx/include/emqx_mqtt.hrl").
|
||||
-include_lib("emqx/include/logger.hrl").
|
||||
-include_lib("emqx/include/types.hrl").
|
||||
-include_lib("emqx/include/emqx_hooks.hrl").
|
||||
|
||||
-include_lib("stdlib/include/qlc.hrl").
|
||||
-include_lib("snabbkaffe/include/snabbkaffe.hrl").
|
||||
|
||||
-export([
|
||||
start_link/0,
|
||||
enable/2,
|
||||
disable/1,
|
||||
status/0,
|
||||
connection_count/0,
|
||||
session_count/0,
|
||||
session_count/1,
|
||||
evict_connections/1,
|
||||
evict_sessions/2,
|
||||
evict_sessions/3,
|
||||
evict_session_channel/3
|
||||
]).
|
||||
|
||||
-behaviour(gen_server).
|
||||
|
||||
-export([
|
||||
init/1,
|
||||
handle_call/3,
|
||||
handle_info/2,
|
||||
handle_cast/2,
|
||||
code_change/3
|
||||
]).
|
||||
|
||||
-export([
|
||||
on_connect/2,
|
||||
on_connack/3
|
||||
]).
|
||||
|
||||
-export([
|
||||
hook/0,
|
||||
unhook/0
|
||||
]).
|
||||
|
||||
-export_type([server_reference/0]).
|
||||
|
||||
-define(CONN_MODULES, [emqx_connection, emqx_ws_connection, emqx_eviction_agent_channel]).
|
||||
|
||||
%%--------------------------------------------------------------------
|
||||
%% APIs
|
||||
%%--------------------------------------------------------------------
|
||||
|
||||
-type server_reference() :: binary() | undefined.
|
||||
-type status() :: {enabled, conn_stats()} | disabled.
|
||||
-type conn_stats() :: #{
|
||||
connections := non_neg_integer(),
|
||||
sessions := non_neg_integer()
|
||||
}.
|
||||
-type kind() :: atom().
|
||||
|
||||
-spec start_link() -> startlink_ret().
|
||||
start_link() ->
|
||||
gen_server:start_link({local, ?MODULE}, ?MODULE, [], []).
|
||||
|
||||
-spec enable(kind(), server_reference()) -> ok_or_error(eviction_agent_busy).
|
||||
enable(Kind, ServerReference) ->
|
||||
gen_server:call(?MODULE, {enable, Kind, ServerReference}).
|
||||
|
||||
-spec disable(kind()) -> ok.
|
||||
disable(Kind) ->
|
||||
gen_server:call(?MODULE, {disable, Kind}).
|
||||
|
||||
-spec status() -> status().
|
||||
status() ->
|
||||
case enable_status() of
|
||||
{enabled, _Kind, _ServerReference} ->
|
||||
{enabled, stats()};
|
||||
disabled ->
|
||||
disabled
|
||||
end.
|
||||
|
||||
-spec evict_connections(pos_integer()) -> ok_or_error(disabled).
|
||||
evict_connections(N) ->
|
||||
case enable_status() of
|
||||
{enabled, _Kind, ServerReference} ->
|
||||
ok = do_evict_connections(N, ServerReference);
|
||||
disabled ->
|
||||
{error, disabled}
|
||||
end.
|
||||
|
||||
-spec evict_sessions(pos_integer(), node() | [node()]) -> ok_or_error(disabled).
|
||||
evict_sessions(N, Node) when is_atom(Node) ->
|
||||
evict_sessions(N, [Node]);
|
||||
evict_sessions(N, Nodes) when is_list(Nodes) andalso length(Nodes) > 0 ->
|
||||
evict_sessions(N, Nodes, any).
|
||||
|
||||
-spec evict_sessions(pos_integer(), node() | [node()], atom()) -> ok_or_error(disabled).
|
||||
evict_sessions(N, Node, ConnState) when is_atom(Node) ->
|
||||
evict_sessions(N, [Node], ConnState);
|
||||
evict_sessions(N, Nodes, ConnState) when
|
||||
is_list(Nodes) andalso length(Nodes) > 0
|
||||
->
|
||||
case enable_status() of
|
||||
{enabled, _Kind, _ServerReference} ->
|
||||
ok = do_evict_sessions(N, Nodes, ConnState);
|
||||
disabled ->
|
||||
{error, disabled}
|
||||
end.
|
||||
|
||||
%%--------------------------------------------------------------------
|
||||
%% gen_server callbacks
|
||||
%%--------------------------------------------------------------------
|
||||
|
||||
init([]) ->
|
||||
_ = persistent_term:erase(?MODULE),
|
||||
{ok, #{}}.
|
||||
|
||||
%% enable
|
||||
handle_call({enable, Kind, ServerReference}, _From, St) ->
|
||||
Reply =
|
||||
case enable_status() of
|
||||
disabled ->
|
||||
ok = persistent_term:put(?MODULE, {enabled, Kind, ServerReference});
|
||||
{enabled, Kind, _ServerReference} ->
|
||||
ok = persistent_term:put(?MODULE, {enabled, Kind, ServerReference});
|
||||
{enabled, _OtherKind, _ServerReference} ->
|
||||
{error, eviction_agent_busy}
|
||||
end,
|
||||
{reply, Reply, St};
|
||||
%% disable
|
||||
handle_call({disable, Kind}, _From, St) ->
|
||||
Reply =
|
||||
case enable_status() of
|
||||
disabled ->
|
||||
{error, disabled};
|
||||
{enabled, Kind, _ServerReference} ->
|
||||
_ = persistent_term:erase(?MODULE),
|
||||
ok;
|
||||
{enabled, _OtherKind, _ServerReference} ->
|
||||
{error, eviction_agent_busy}
|
||||
end,
|
||||
{reply, Reply, St};
|
||||
handle_call(Msg, _From, St) ->
|
||||
?SLOG(warning, #{msg => "unknown_call", call => Msg, state => St}),
|
||||
{reply, {error, unknown_call}, St}.
|
||||
|
||||
handle_info(Msg, St) ->
|
||||
?SLOG(warning, #{msg => "unknown_msg", info => Msg, state => St}),
|
||||
{noreply, St}.
|
||||
|
||||
handle_cast(Msg, St) ->
|
||||
?SLOG(warning, #{msg => "unknown_cast", cast => Msg, state => St}),
|
||||
{noreply, St}.
|
||||
|
||||
code_change(_Vsn, State, _Extra) ->
|
||||
{ok, State}.
|
||||
|
||||
%%--------------------------------------------------------------------
|
||||
%% Hook callbacks
|
||||
%%--------------------------------------------------------------------
|
||||
|
||||
on_connect(_ConnInfo, _Props) ->
|
||||
case enable_status() of
|
||||
{enabled, _Kind, _ServerReference} ->
|
||||
{stop, {error, ?RC_USE_ANOTHER_SERVER}};
|
||||
disabled ->
|
||||
ignore
|
||||
end.
|
||||
|
||||
on_connack(
|
||||
#{proto_name := <<"MQTT">>, proto_ver := ?MQTT_PROTO_V5},
|
||||
use_another_server,
|
||||
Props
|
||||
) ->
|
||||
case enable_status() of
|
||||
{enabled, _Kind, ServerReference} ->
|
||||
{ok, Props#{'Server-Reference' => ServerReference}};
|
||||
disabled ->
|
||||
{ok, Props}
|
||||
end;
|
||||
on_connack(_ClientInfo, _Reason, Props) ->
|
||||
{ok, Props}.
|
||||
|
||||
%%--------------------------------------------------------------------
|
||||
%% Hook funcs
|
||||
%%--------------------------------------------------------------------
|
||||
|
||||
hook() ->
|
||||
?tp(debug, eviction_agent_hook, #{}),
|
||||
ok = emqx_hooks:put('client.connack', {?MODULE, on_connack, []}, ?HP_NODE_REBALANCE),
|
||||
ok = emqx_hooks:put('client.connect', {?MODULE, on_connect, []}, ?HP_NODE_REBALANCE).
|
||||
|
||||
unhook() ->
|
||||
?tp(debug, eviction_agent_unhook, #{}),
|
||||
ok = emqx_hooks:del('client.connect', {?MODULE, on_connect}),
|
||||
ok = emqx_hooks:del('client.connack', {?MODULE, on_connack}).
|
||||
|
||||
enable_status() ->
|
||||
persistent_term:get(?MODULE, disabled).
|
||||
|
||||
% connection management
|
||||
stats() ->
|
||||
#{
|
||||
connections => connection_count(),
|
||||
sessions => session_count()
|
||||
}.
|
||||
|
||||
connection_table() ->
|
||||
emqx_cm:live_connection_table(?CONN_MODULES).
|
||||
|
||||
connection_count() ->
|
||||
table_count(connection_table()).
|
||||
|
||||
channel_with_session_table(any) ->
|
||||
qlc:q([
|
||||
{ClientId, ConnInfo, ClientInfo}
|
||||
|| {ClientId, _, ConnInfo, ClientInfo} <-
|
||||
emqx_cm:channel_with_session_table(?CONN_MODULES)
|
||||
]);
|
||||
channel_with_session_table(RequiredConnState) ->
|
||||
qlc:q([
|
||||
{ClientId, ConnInfo, ClientInfo}
|
||||
|| {ClientId, ConnState, ConnInfo, ClientInfo} <-
|
||||
emqx_cm:channel_with_session_table(?CONN_MODULES),
|
||||
RequiredConnState =:= ConnState
|
||||
]).
|
||||
|
||||
session_count() ->
|
||||
session_count(any).
|
||||
|
||||
session_count(ConnState) ->
|
||||
table_count(channel_with_session_table(ConnState)).
|
||||
|
||||
table_count(QH) ->
|
||||
qlc:fold(fun(_, Acc) -> Acc + 1 end, 0, QH).
|
||||
|
||||
take_connections(N) ->
|
||||
ChanQH = qlc:q([ChanPid || {_ClientId, ChanPid} <- connection_table()]),
|
||||
ChanPidCursor = qlc:cursor(ChanQH),
|
||||
ChanPids = qlc:next_answers(ChanPidCursor, N),
|
||||
ok = qlc:delete_cursor(ChanPidCursor),
|
||||
ChanPids.
|
||||
|
||||
take_channel_with_sessions(N, ConnState) ->
|
||||
ChanPidCursor = qlc:cursor(channel_with_session_table(ConnState)),
|
||||
Channels = qlc:next_answers(ChanPidCursor, N),
|
||||
ok = qlc:delete_cursor(ChanPidCursor),
|
||||
Channels.
|
||||
|
||||
do_evict_connections(N, ServerReference) when N > 0 ->
|
||||
ChanPids = take_connections(N),
|
||||
ok = lists:foreach(
|
||||
fun(ChanPid) ->
|
||||
disconnect_channel(ChanPid, ServerReference)
|
||||
end,
|
||||
ChanPids
|
||||
).
|
||||
|
||||
do_evict_sessions(N, Nodes, ConnState) when N > 0 ->
|
||||
Channels = take_channel_with_sessions(N, ConnState),
|
||||
ok = lists:foreach(
|
||||
fun({ClientId, ConnInfo, ClientInfo}) ->
|
||||
evict_session_channel(Nodes, ClientId, ConnInfo, ClientInfo)
|
||||
end,
|
||||
Channels
|
||||
).
|
||||
|
||||
evict_session_channel(Nodes, ClientId, ConnInfo, ClientInfo) ->
|
||||
Node = select_random(Nodes),
|
||||
?SLOG(
|
||||
info,
|
||||
#{
|
||||
msg => "evict_session_channel",
|
||||
client_id => ClientId,
|
||||
node => Node,
|
||||
conn_info => ConnInfo,
|
||||
client_info => ClientInfo
|
||||
}
|
||||
),
|
||||
case emqx_eviction_agent_proto_v1:evict_session_channel(Node, ClientId, ConnInfo, ClientInfo) of
|
||||
{badrpc, Reason} ->
|
||||
?SLOG(
|
||||
error,
|
||||
#{
|
||||
msg => "evict_session_channel_rpc_error",
|
||||
client_id => ClientId,
|
||||
node => Node,
|
||||
reason => Reason
|
||||
}
|
||||
),
|
||||
{error, Reason};
|
||||
{error, Reason} = Error ->
|
||||
?SLOG(
|
||||
error,
|
||||
#{
|
||||
msg => "evict_session_channel_error",
|
||||
client_id => ClientId,
|
||||
node => Node,
|
||||
reason => Reason
|
||||
}
|
||||
),
|
||||
Error;
|
||||
Res ->
|
||||
Res
|
||||
end.
|
||||
|
||||
-spec evict_session_channel(
|
||||
emqx_types:clientid(),
|
||||
emqx_types:conninfo(),
|
||||
emqx_types:clientinfo()
|
||||
) -> supervisor:startchild_ret().
|
||||
evict_session_channel(ClientId, ConnInfo, ClientInfo) ->
|
||||
?SLOG(info, #{
|
||||
msg => "evict_session_channel",
|
||||
client_id => ClientId,
|
||||
conn_info => ConnInfo,
|
||||
client_info => ClientInfo
|
||||
}),
|
||||
Result = emqx_eviction_agent_channel:start_supervised(
|
||||
#{
|
||||
conninfo => ConnInfo,
|
||||
clientinfo => ClientInfo
|
||||
}
|
||||
),
|
||||
?SLOG(
|
||||
info,
|
||||
#{
|
||||
msg => "evict_session_channel_result",
|
||||
client_id => ClientId,
|
||||
result => Result
|
||||
}
|
||||
),
|
||||
Result.
|
||||
|
||||
disconnect_channel(ChanPid, ServerReference) ->
|
||||
ChanPid !
|
||||
{disconnect, ?RC_USE_ANOTHER_SERVER, use_another_server, #{
|
||||
'Server-Reference' => ServerReference
|
||||
}}.
|
||||
|
||||
select_random(List) when length(List) > 0 ->
|
||||
lists:nth(rand:uniform(length(List)), List).
|
|
@ -0,0 +1,85 @@
|
|||
%%--------------------------------------------------------------------
|
||||
%% Copyright (c) 2022-2023 EMQ Technologies Co., Ltd. All Rights Reserved.
|
||||
%%--------------------------------------------------------------------
|
||||
|
||||
-module(emqx_eviction_agent_api).
|
||||
|
||||
-behaviour(minirest_api).
|
||||
|
||||
-include_lib("typerefl/include/types.hrl").
|
||||
-include_lib("hocon/include/hoconsc.hrl").
|
||||
-include_lib("emqx/include/logger.hrl").
|
||||
|
||||
%% Swagger specs from hocon schema
|
||||
-export([
|
||||
api_spec/0,
|
||||
paths/0,
|
||||
schema/1,
|
||||
namespace/0
|
||||
]).
|
||||
|
||||
-export([
|
||||
fields/1,
|
||||
roots/0
|
||||
]).
|
||||
|
||||
%% API callbacks
|
||||
-export([
|
||||
'/node_eviction/status'/2
|
||||
]).
|
||||
|
||||
-import(hoconsc, [mk/2, ref/1, ref/2]).
|
||||
|
||||
namespace() -> "node_eviction".
|
||||
|
||||
api_spec() ->
|
||||
emqx_dashboard_swagger:spec(?MODULE, #{check_schema => true}).
|
||||
|
||||
paths() ->
|
||||
[
|
||||
"/node_eviction/status"
|
||||
].
|
||||
|
||||
schema("/node_eviction/status") ->
|
||||
#{
|
||||
'operationId' => '/node_eviction/status',
|
||||
get => #{
|
||||
tags => [<<"node_eviction">>],
|
||||
summary => <<"Get node eviction status">>,
|
||||
description => ?DESC("node_eviction_status_get"),
|
||||
responses => #{
|
||||
200 => schema_status()
|
||||
}
|
||||
}
|
||||
}.
|
||||
|
||||
'/node_eviction/status'(_Bindings, _Params) ->
|
||||
case emqx_eviction_agent:status() of
|
||||
disabled ->
|
||||
{200, #{status => disabled}};
|
||||
{enabled, Stats} ->
|
||||
{200, #{
|
||||
status => enabled,
|
||||
stats => Stats
|
||||
}}
|
||||
end.
|
||||
|
||||
schema_status() ->
|
||||
mk(hoconsc:union([ref(status_enabled), ref(status_disabled)]), #{}).
|
||||
|
||||
roots() -> [].
|
||||
|
||||
fields(status_enabled) ->
|
||||
[
|
||||
{status, mk(enabled, #{default => enabled})},
|
||||
{stats, ref(stats)}
|
||||
];
|
||||
fields(stats) ->
|
||||
[
|
||||
{connections, mk(integer(), #{})},
|
||||
{sessions, mk(integer(), #{})}
|
||||
];
|
||||
fields(status_disabled) ->
|
||||
[
|
||||
{status, mk(disabled, #{default => disabled})}
|
||||
].
|
|
@ -0,0 +1,24 @@
|
|||
%%--------------------------------------------------------------------
|
||||
%% Copyright (c) 2022-2023 EMQ Technologies Co., Ltd. All Rights Reserved.
|
||||
%%--------------------------------------------------------------------
|
||||
|
||||
-module(emqx_eviction_agent_app).
|
||||
|
||||
-behaviour(application).
|
||||
|
||||
-emqx_plugin(?MODULE).
|
||||
|
||||
-export([
|
||||
start/2,
|
||||
stop/1
|
||||
]).
|
||||
|
||||
start(_Type, _Args) ->
|
||||
ok = emqx_eviction_agent:hook(),
|
||||
{ok, Sup} = emqx_eviction_agent_sup:start_link(),
|
||||
ok = emqx_eviction_agent_cli:load(),
|
||||
{ok, Sup}.
|
||||
|
||||
stop(_State) ->
|
||||
ok = emqx_eviction_agent:unhook(),
|
||||
ok = emqx_eviction_agent_cli:unload().
|
|
@ -0,0 +1,368 @@
|
|||
%%--------------------------------------------------------------------
|
||||
%% Copyright (c) 2022-2023 EMQ Technologies Co., Ltd. All Rights Reserved.
|
||||
%%--------------------------------------------------------------------
|
||||
|
||||
%% MQTT Channel
|
||||
-module(emqx_eviction_agent_channel).
|
||||
|
||||
-include_lib("emqx/include/emqx.hrl").
|
||||
-include_lib("emqx/include/emqx_channel.hrl").
|
||||
-include_lib("emqx/include/emqx_mqtt.hrl").
|
||||
-include_lib("emqx/include/logger.hrl").
|
||||
-include_lib("emqx/include/types.hrl").
|
||||
|
||||
-include_lib("snabbkaffe/include/snabbkaffe.hrl").
|
||||
|
||||
-logger_header("[Evicted Channel]").
|
||||
|
||||
-export([
|
||||
start_link/1,
|
||||
start_supervised/1,
|
||||
call/2,
|
||||
call/3,
|
||||
cast/2,
|
||||
stop/1
|
||||
]).
|
||||
|
||||
-export([
|
||||
init/1,
|
||||
handle_call/3,
|
||||
handle_cast/2,
|
||||
handle_info/2,
|
||||
terminate/2,
|
||||
code_change/3
|
||||
]).
|
||||
|
||||
-import(
|
||||
emqx_misc,
|
||||
[
|
||||
maybe_apply/2
|
||||
]
|
||||
).
|
||||
|
||||
-type opts() :: #{
|
||||
conninfo := emqx_types:conninfo(),
|
||||
clientinfo := emqx_types:clientinfo()
|
||||
}.
|
||||
|
||||
%%--------------------------------------------------------------------
|
||||
%% API
|
||||
%%--------------------------------------------------------------------
|
||||
|
||||
-spec start_supervised(opts()) -> supervisor:startchild_ret().
|
||||
start_supervised(#{clientinfo := #{clientid := ClientId}} = Opts) ->
|
||||
RandomId = integer_to_binary(erlang:unique_integer([positive])),
|
||||
ClientIdBin = bin_clientid(ClientId),
|
||||
Id = <<ClientIdBin/binary, "-", RandomId/binary>>,
|
||||
ChildSpec = #{
|
||||
id => Id,
|
||||
start => {?MODULE, start_link, [Opts]},
|
||||
restart => temporary,
|
||||
shutdown => 5000,
|
||||
type => worker,
|
||||
modules => [?MODULE]
|
||||
},
|
||||
supervisor:start_child(
|
||||
emqx_eviction_agent_conn_sup,
|
||||
ChildSpec
|
||||
).
|
||||
|
||||
-spec start_link(opts()) -> startlink_ret().
|
||||
start_link(Opts) ->
|
||||
gen_server:start_link(?MODULE, [Opts], []).
|
||||
|
||||
-spec cast(pid(), term()) -> ok.
|
||||
cast(Pid, Req) ->
|
||||
gen_server:cast(Pid, Req).
|
||||
|
||||
-spec call(pid(), term()) -> term().
|
||||
call(Pid, Req) ->
|
||||
call(Pid, Req, infinity).
|
||||
|
||||
-spec call(pid(), term(), timeout()) -> term().
|
||||
call(Pid, Req, Timeout) ->
|
||||
gen_server:call(Pid, Req, Timeout).
|
||||
|
||||
-spec stop(pid()) -> ok.
|
||||
stop(Pid) ->
|
||||
gen_server:stop(Pid).
|
||||
|
||||
%%--------------------------------------------------------------------
|
||||
%% gen_server API
|
||||
%%--------------------------------------------------------------------
|
||||
|
||||
init([#{conninfo := OldConnInfo, clientinfo := #{clientid := ClientId} = OldClientInfo}]) ->
|
||||
process_flag(trap_exit, true),
|
||||
ClientInfo = clientinfo(OldClientInfo),
|
||||
ConnInfo = conninfo(OldConnInfo),
|
||||
case open_session(ConnInfo, ClientInfo) of
|
||||
{ok, Channel0} ->
|
||||
case set_expiry_timer(Channel0) of
|
||||
{ok, Channel1} ->
|
||||
?SLOG(
|
||||
info,
|
||||
#{
|
||||
msg => "channel_initialized",
|
||||
clientid => ClientId,
|
||||
node => node()
|
||||
}
|
||||
),
|
||||
ok = emqx_cm:mark_channel_disconnected(self()),
|
||||
{ok, Channel1, hibernate};
|
||||
{error, Reason} ->
|
||||
{stop, Reason}
|
||||
end;
|
||||
{error, Reason} ->
|
||||
{stop, Reason}
|
||||
end.
|
||||
|
||||
handle_call(kick, _From, Channel) ->
|
||||
{stop, kicked, ok, Channel};
|
||||
handle_call(discard, _From, Channel) ->
|
||||
{stop, discarded, ok, Channel};
|
||||
handle_call({takeover, 'begin'}, _From, #{session := Session} = Channel) ->
|
||||
{reply, Session, Channel#{takeover => true}};
|
||||
handle_call(
|
||||
{takeover, 'end'},
|
||||
_From,
|
||||
#{
|
||||
session := Session,
|
||||
clientinfo := #{clientid := ClientId},
|
||||
pendings := Pendings
|
||||
} = Channel
|
||||
) ->
|
||||
ok = emqx_session:takeover(Session),
|
||||
%% TODO: Should not drain deliver here (side effect)
|
||||
Delivers = emqx_misc:drain_deliver(),
|
||||
AllPendings = lists:append(Delivers, Pendings),
|
||||
?tp(
|
||||
debug,
|
||||
emqx_channel_takeover_end,
|
||||
#{clientid => ClientId}
|
||||
),
|
||||
{stop, normal, AllPendings, Channel};
|
||||
handle_call(list_acl_cache, _From, Channel) ->
|
||||
{reply, [], Channel};
|
||||
handle_call({quota, _Policy}, _From, Channel) ->
|
||||
{reply, ok, Channel};
|
||||
handle_call(Req, _From, Channel) ->
|
||||
?SLOG(
|
||||
error,
|
||||
#{
|
||||
msg => "unexpected_call",
|
||||
req => Req
|
||||
}
|
||||
),
|
||||
{reply, ignored, Channel}.
|
||||
|
||||
handle_info(Deliver = {deliver, _Topic, _Msg}, Channel) ->
|
||||
Delivers = [Deliver | emqx_misc:drain_deliver()],
|
||||
{noreply, handle_deliver(Delivers, Channel)};
|
||||
handle_info(expire_session, Channel) ->
|
||||
{stop, expired, Channel};
|
||||
handle_info(Info, Channel) ->
|
||||
?SLOG(
|
||||
error,
|
||||
#{
|
||||
msg => "unexpected_info",
|
||||
info => Info
|
||||
}
|
||||
),
|
||||
{noreply, Channel}.
|
||||
|
||||
handle_cast(Msg, Channel) ->
|
||||
?SLOG(error, #{msg => "unexpected_cast", cast => Msg}),
|
||||
{noreply, Channel}.
|
||||
|
||||
terminate(Reason, #{conninfo := ConnInfo, clientinfo := ClientInfo, session := Session} = Channel) ->
|
||||
ok = cancel_expiry_timer(Channel),
|
||||
(Reason =:= expired) andalso emqx_persistent_session:persist(ClientInfo, ConnInfo, Session),
|
||||
emqx_session:terminate(ClientInfo, Reason, Session).
|
||||
|
||||
code_change(_OldVsn, Channel, _Extra) ->
|
||||
{ok, Channel}.
|
||||
|
||||
%%--------------------------------------------------------------------
|
||||
%% Internal functions
|
||||
%%--------------------------------------------------------------------
|
||||
|
||||
%% TODO: sync with emqx_channel
|
||||
handle_deliver(
|
||||
Delivers,
|
||||
#{
|
||||
takeover := true,
|
||||
pendings := Pendings,
|
||||
session := Session,
|
||||
clientinfo := #{clientid := ClientId} = ClientInfo
|
||||
} = Channel
|
||||
) ->
|
||||
%% NOTE: Order is important here. While the takeover is in
|
||||
%% progress, the session cannot enqueue messages, since it already
|
||||
%% passed on the queue to the new connection in the session state.
|
||||
NPendings = lists:append(
|
||||
Pendings,
|
||||
emqx_session:ignore_local(ClientInfo, emqx_channel:maybe_nack(Delivers), ClientId, Session)
|
||||
),
|
||||
Channel#{pendings => NPendings};
|
||||
handle_deliver(
|
||||
Delivers,
|
||||
#{
|
||||
takeover := false,
|
||||
session := Session,
|
||||
clientinfo := #{clientid := ClientId} = ClientInfo
|
||||
} = Channel
|
||||
) ->
|
||||
Delivers1 = emqx_channel:maybe_nack(Delivers),
|
||||
Delivers2 = emqx_session:ignore_local(ClientInfo, Delivers1, ClientId, Session),
|
||||
NSession = emqx_session:enqueue(ClientInfo, Delivers2, Session),
|
||||
NChannel = persist(NSession, Channel),
|
||||
%% We consider queued/dropped messages as delivered since they are now in the session state.
|
||||
emqx_channel:maybe_mark_as_delivered(Session, Delivers),
|
||||
NChannel.
|
||||
|
||||
cancel_expiry_timer(#{expiry_timer := TRef}) when is_reference(TRef) ->
|
||||
_ = erlang:cancel_timer(TRef),
|
||||
ok;
|
||||
cancel_expiry_timer(_) ->
|
||||
ok.
|
||||
|
||||
set_expiry_timer(#{conninfo := ConnInfo} = Channel) ->
|
||||
case maps:get(expiry_interval, ConnInfo) of
|
||||
?UINT_MAX ->
|
||||
{ok, Channel};
|
||||
I when I > 0 ->
|
||||
Timer = erlang:send_after(timer:seconds(I), self(), expire_session),
|
||||
{ok, Channel#{expiry_timer => Timer}};
|
||||
_ ->
|
||||
{error, should_be_expired}
|
||||
end.
|
||||
|
||||
open_session(ConnInfo, #{clientid := ClientId} = ClientInfo) ->
|
||||
Channel = channel(ConnInfo, ClientInfo),
|
||||
case emqx_cm:open_session(false, ClientInfo, ConnInfo) of
|
||||
{ok, #{present := false}} ->
|
||||
?SLOG(
|
||||
info,
|
||||
#{
|
||||
msg => "no_session",
|
||||
clientid => ClientId,
|
||||
node => node()
|
||||
}
|
||||
),
|
||||
{error, no_session};
|
||||
{ok, #{session := Session, present := true, pendings := Pendings0}} ->
|
||||
?SLOG(
|
||||
info,
|
||||
#{
|
||||
msg => "session_opened",
|
||||
clientid => ClientId,
|
||||
node => node()
|
||||
}
|
||||
),
|
||||
Pendings1 = lists:usort(lists:append(Pendings0, emqx_misc:drain_deliver())),
|
||||
NSession = emqx_session:enqueue(
|
||||
ClientInfo,
|
||||
emqx_session:ignore_local(
|
||||
ClientInfo,
|
||||
emqx_channel:maybe_nack(Pendings1),
|
||||
ClientId,
|
||||
Session
|
||||
),
|
||||
Session
|
||||
),
|
||||
NChannel = Channel#{session => NSession},
|
||||
ok = emqx_cm:insert_channel_info(ClientId, info(NChannel), stats(NChannel)),
|
||||
?SLOG(
|
||||
info,
|
||||
#{
|
||||
msg => "channel_info_updated",
|
||||
clientid => ClientId,
|
||||
node => node()
|
||||
}
|
||||
),
|
||||
{ok, NChannel};
|
||||
{error, Reason} = Error ->
|
||||
?SLOG(
|
||||
error,
|
||||
#{
|
||||
msg => "session_open_failed",
|
||||
clientid => ClientId,
|
||||
node => node(),
|
||||
reason => Reason
|
||||
}
|
||||
),
|
||||
Error
|
||||
end.
|
||||
|
||||
conninfo(OldConnInfo) ->
|
||||
DisconnectedAt = maps:get(disconnected_at, OldConnInfo, erlang:system_time(millisecond)),
|
||||
ConnInfo0 = maps:with(
|
||||
[
|
||||
socktype,
|
||||
sockname,
|
||||
peername,
|
||||
peercert,
|
||||
clientid,
|
||||
clean_start,
|
||||
receive_maximum,
|
||||
expiry_interval,
|
||||
connected_at,
|
||||
disconnected_at,
|
||||
keepalive
|
||||
],
|
||||
OldConnInfo
|
||||
),
|
||||
ConnInfo0#{
|
||||
conn_mod => ?MODULE,
|
||||
connected => false,
|
||||
disconnected_at => DisconnectedAt
|
||||
}.
|
||||
|
||||
clientinfo(OldClientInfo) ->
|
||||
maps:with(
|
||||
[
|
||||
zone,
|
||||
protocol,
|
||||
peerhost,
|
||||
sockport,
|
||||
clientid,
|
||||
username,
|
||||
is_bridge,
|
||||
is_superuser,
|
||||
mountpoint
|
||||
],
|
||||
OldClientInfo
|
||||
).
|
||||
|
||||
channel(ConnInfo, ClientInfo) ->
|
||||
#{
|
||||
conninfo => ConnInfo,
|
||||
clientinfo => ClientInfo,
|
||||
expiry_timer => undefined,
|
||||
takeover => false,
|
||||
resuming => false,
|
||||
pendings => []
|
||||
}.
|
||||
|
||||
persist(Session, #{clientinfo := ClientInfo, conninfo := ConnInfo} = Channel) ->
|
||||
Session1 = emqx_persistent_session:persist(ClientInfo, ConnInfo, Session),
|
||||
Channel#{session => Session1}.
|
||||
|
||||
info(Channel) ->
|
||||
#{
|
||||
conninfo => maps:get(conninfo, Channel, undefined),
|
||||
clientinfo => maps:get(clientinfo, Channel, undefined),
|
||||
session => maybe_apply(
|
||||
fun emqx_session:info/1,
|
||||
maps:get(session, Channel, undefined)
|
||||
),
|
||||
conn_state => disconnected
|
||||
}.
|
||||
|
||||
stats(#{session := Session}) ->
|
||||
lists:append(emqx_session:stats(Session), emqx_pd:get_counters(?CHANNEL_METRICS)).
|
||||
|
||||
bin_clientid(ClientId) when is_binary(ClientId) ->
|
||||
ClientId;
|
||||
bin_clientid(ClientId) when is_atom(ClientId) ->
|
||||
atom_to_binary(ClientId).
|
|
@ -0,0 +1,30 @@
|
|||
%%--------------------------------------------------------------------
|
||||
%% Copyright (c) 2022-2023 EMQ Technologies Co., Ltd. All Rights Reserved.
|
||||
%%--------------------------------------------------------------------
|
||||
|
||||
-module(emqx_eviction_agent_cli).
|
||||
|
||||
%% APIs
|
||||
-export([
|
||||
load/0,
|
||||
unload/0,
|
||||
cli/1
|
||||
]).
|
||||
|
||||
load() ->
|
||||
emqx_ctl:register_command(eviction, {?MODULE, cli}, []).
|
||||
|
||||
unload() ->
|
||||
emqx_ctl:unregister_command(eviction).
|
||||
|
||||
cli(["status"]) ->
|
||||
case emqx_eviction_agent:status() of
|
||||
disabled ->
|
||||
emqx_ctl:print("Eviction status: disabled~n");
|
||||
{enabled, _Stats} ->
|
||||
emqx_ctl:print("Eviction status: enabled~n")
|
||||
end;
|
||||
cli(_) ->
|
||||
emqx_ctl:usage(
|
||||
[{"eviction status", "Get current node eviction status"}]
|
||||
).
|
|
@ -0,0 +1,21 @@
|
|||
%%--------------------------------------------------------------------
|
||||
%% Copyright (c) 2022-2023 EMQ Technologies Co., Ltd. All Rights Reserved.
|
||||
%%--------------------------------------------------------------------
|
||||
|
||||
-module(emqx_eviction_agent_conn_sup).
|
||||
|
||||
-behaviour(supervisor).
|
||||
|
||||
-export([start_link/0]).
|
||||
|
||||
-export([init/1]).
|
||||
|
||||
start_link() ->
|
||||
supervisor:start_link({local, ?MODULE}, ?MODULE, []).
|
||||
|
||||
init([]) ->
|
||||
{ok,
|
||||
{
|
||||
#{strategy => one_for_one, intensity => 10, period => 3600},
|
||||
[]
|
||||
}}.
|
|
@ -0,0 +1,34 @@
|
|||
%%--------------------------------------------------------------------
|
||||
%% Copyright (c) 2022-2023 EMQ Technologies Co., Ltd. All Rights Reserved.
|
||||
%%--------------------------------------------------------------------
|
||||
|
||||
-module(emqx_eviction_agent_sup).
|
||||
|
||||
-behaviour(supervisor).
|
||||
|
||||
-export([start_link/0]).
|
||||
|
||||
-export([init/1]).
|
||||
|
||||
start_link() ->
|
||||
supervisor:start_link({local, ?MODULE}, ?MODULE, []).
|
||||
|
||||
init([]) ->
|
||||
Childs = [
|
||||
child_spec(worker, emqx_eviction_agent, []),
|
||||
child_spec(supervisor, emqx_eviction_agent_conn_sup, [])
|
||||
],
|
||||
{ok, {
|
||||
#{strategy => one_for_one, intensity => 10, period => 3600},
|
||||
Childs
|
||||
}}.
|
||||
|
||||
child_spec(Type, Mod, Args) ->
|
||||
#{
|
||||
id => Mod,
|
||||
start => {Mod, start_link, Args},
|
||||
restart => permanent,
|
||||
shutdown => 5000,
|
||||
type => Type,
|
||||
modules => [Mod]
|
||||
}.
|
|
@ -0,0 +1,27 @@
|
|||
%%--------------------------------------------------------------------
|
||||
%% Copyright (c) 2022-2023 EMQ Technologies Co., Ltd. All Rights Reserved.
|
||||
%%--------------------------------------------------------------------
|
||||
|
||||
-module(emqx_eviction_agent_proto_v1).
|
||||
|
||||
-behaviour(emqx_bpapi).
|
||||
|
||||
-export([
|
||||
introduced_in/0,
|
||||
|
||||
evict_session_channel/4
|
||||
]).
|
||||
|
||||
-include_lib("emqx/include/bpapi.hrl").
|
||||
|
||||
introduced_in() ->
|
||||
"5.0.22".
|
||||
|
||||
-spec evict_session_channel(
|
||||
node(),
|
||||
emqx_types:clientid(),
|
||||
emqx_types:conninfo(),
|
||||
emqx_types:clientinfo()
|
||||
) -> supervisor:startchild_err() | emqx_rpc:badrpc().
|
||||
evict_session_channel(Node, ClientId, ConnInfo, ClientInfo) ->
|
||||
rpc:call(Node, emqx_eviction_agent, evict_session_channel, [ClientId, ConnInfo, ClientInfo]).
|
|
@ -0,0 +1,403 @@
|
|||
%%--------------------------------------------------------------------
|
||||
%% Copyright (c) 2022-2023 EMQ Technologies Co., Ltd. All Rights Reserved.
|
||||
%%--------------------------------------------------------------------
|
||||
|
||||
-module(emqx_eviction_agent_SUITE).
|
||||
|
||||
-compile(export_all).
|
||||
-compile(nowarn_export_all).
|
||||
|
||||
-include_lib("eunit/include/eunit.hrl").
|
||||
-include_lib("common_test/include/ct.hrl").
|
||||
-include_lib("emqx/include/emqx_mqtt.hrl").
|
||||
-include_lib("emqx/include/asserts.hrl").
|
||||
|
||||
-import(
|
||||
emqx_eviction_agent_test_helpers,
|
||||
[emqtt_connect/0, emqtt_connect/1, emqtt_connect/2]
|
||||
).
|
||||
|
||||
-define(assertPrinted(Printed, Code),
|
||||
?assertMatch(
|
||||
{match, _},
|
||||
re:run(Code, Printed)
|
||||
)
|
||||
).
|
||||
|
||||
all() ->
|
||||
emqx_common_test_helpers:all(?MODULE).
|
||||
|
||||
init_per_suite(Config) ->
|
||||
emqx_common_test_helpers:start_apps([emqx_eviction_agent]),
|
||||
Config.
|
||||
|
||||
end_per_suite(_Config) ->
|
||||
emqx_common_test_helpers:stop_apps([emqx_eviction_agent]).
|
||||
|
||||
init_per_testcase(Case, Config) ->
|
||||
_ = emqx_eviction_agent:disable(test_eviction),
|
||||
ok = snabbkaffe:start_trace(),
|
||||
start_slave(Case, Config).
|
||||
|
||||
start_slave(t_explicit_session_takeover, Config) ->
|
||||
ClusterNodes = emqx_eviction_agent_test_helpers:start_cluster(
|
||||
[{evacuate_test1, 2883}, {evacuate_test2, 3883}],
|
||||
[emqx_eviction_agent]
|
||||
),
|
||||
[{evacuate_nodes, ClusterNodes} | Config];
|
||||
start_slave(_Case, Config) ->
|
||||
Config.
|
||||
|
||||
end_per_testcase(TestCase, Config) ->
|
||||
emqx_eviction_agent:disable(test_eviction),
|
||||
ok = snabbkaffe:stop(),
|
||||
stop_slave(TestCase, Config).
|
||||
|
||||
stop_slave(t_explicit_session_takeover, Config) ->
|
||||
emqx_eviction_agent_test_helpers:stop_cluster(
|
||||
?config(evacuate_nodes, Config),
|
||||
[emqx_eviction_agent]
|
||||
);
|
||||
stop_slave(_Case, _Config) ->
|
||||
ok.
|
||||
|
||||
%%--------------------------------------------------------------------
|
||||
%% Tests
|
||||
%%--------------------------------------------------------------------
|
||||
|
||||
t_enable_disable(_Config) ->
|
||||
erlang:process_flag(trap_exit, true),
|
||||
|
||||
?assertMatch(
|
||||
disabled,
|
||||
emqx_eviction_agent:status()
|
||||
),
|
||||
|
||||
{ok, C0} = emqtt_connect(),
|
||||
ok = emqtt:disconnect(C0),
|
||||
|
||||
ok = emqx_eviction_agent:enable(test_eviction, undefined),
|
||||
|
||||
?assertMatch(
|
||||
{error, eviction_agent_busy},
|
||||
emqx_eviction_agent:enable(bar, undefined)
|
||||
),
|
||||
|
||||
?assertMatch(
|
||||
ok,
|
||||
emqx_eviction_agent:enable(test_eviction, <<"srv">>)
|
||||
),
|
||||
|
||||
?assertMatch(
|
||||
{enabled, #{}},
|
||||
emqx_eviction_agent:status()
|
||||
),
|
||||
|
||||
?assertMatch(
|
||||
{error, {use_another_server, #{}}},
|
||||
emqtt_connect()
|
||||
),
|
||||
|
||||
?assertMatch(
|
||||
{error, eviction_agent_busy},
|
||||
emqx_eviction_agent:disable(bar)
|
||||
),
|
||||
|
||||
?assertMatch(
|
||||
ok,
|
||||
emqx_eviction_agent:disable(test_eviction)
|
||||
),
|
||||
|
||||
?assertMatch(
|
||||
{error, disabled},
|
||||
emqx_eviction_agent:disable(test_eviction)
|
||||
),
|
||||
|
||||
?assertMatch(
|
||||
disabled,
|
||||
emqx_eviction_agent:status()
|
||||
),
|
||||
|
||||
{ok, C1} = emqtt_connect(),
|
||||
ok = emqtt:disconnect(C1).
|
||||
|
||||
t_evict_connections_status(_Config) ->
|
||||
erlang:process_flag(trap_exit, true),
|
||||
|
||||
{ok, _C} = emqtt_connect(),
|
||||
|
||||
{error, disabled} = emqx_eviction_agent:evict_connections(1),
|
||||
|
||||
ok = emqx_eviction_agent:enable(test_eviction, undefined),
|
||||
|
||||
?assertMatch(
|
||||
{enabled, #{connections := 1, sessions := _}},
|
||||
emqx_eviction_agent:status()
|
||||
),
|
||||
|
||||
ok = emqx_eviction_agent:evict_connections(1),
|
||||
|
||||
ct:sleep(100),
|
||||
|
||||
?assertMatch(
|
||||
{enabled, #{connections := 0, sessions := _}},
|
||||
emqx_eviction_agent:status()
|
||||
),
|
||||
|
||||
ok = emqx_eviction_agent:disable(test_eviction).
|
||||
|
||||
t_explicit_session_takeover(Config) ->
|
||||
_ = erlang:process_flag(trap_exit, true),
|
||||
ok = restart_emqx(),
|
||||
|
||||
[{Node1, Port1}, {Node2, _Port2}] = ?config(evacuate_nodes, Config),
|
||||
|
||||
{ok, C0} = emqtt_connect([
|
||||
{clientid, <<"client_with_session">>},
|
||||
{clean_start, false},
|
||||
{port, Port1}
|
||||
]),
|
||||
{ok, _, _} = emqtt:subscribe(C0, <<"t1">>),
|
||||
|
||||
ok = rpc:call(Node1, emqx_eviction_agent, enable, [test_eviction, undefined]),
|
||||
|
||||
?assertEqual(
|
||||
1,
|
||||
rpc:call(Node1, emqx_eviction_agent, connection_count, [])
|
||||
),
|
||||
|
||||
[ChanPid] = rpc:call(Node1, emqx_cm, lookup_channels, [<<"client_with_session">>]),
|
||||
|
||||
?assertWaitEvent(
|
||||
begin
|
||||
ok = rpc:call(Node1, emqx_eviction_agent, evict_connections, [1]),
|
||||
receive
|
||||
{'EXIT', C0, {disconnected, ?RC_USE_ANOTHER_SERVER, _}} -> ok
|
||||
after 1000 ->
|
||||
?assert(false, "Connection not evicted")
|
||||
end
|
||||
end,
|
||||
#{?snk_kind := emqx_cm_connected_client_count_dec, chan_pid := ChanPid},
|
||||
2000
|
||||
),
|
||||
|
||||
?assertEqual(
|
||||
0,
|
||||
rpc:call(Node1, emqx_eviction_agent, connection_count, [])
|
||||
),
|
||||
|
||||
?assertEqual(
|
||||
1,
|
||||
rpc:call(Node1, emqx_eviction_agent, session_count, [])
|
||||
),
|
||||
|
||||
%% First, evacuate to the same node
|
||||
|
||||
?assertWaitEvent(
|
||||
rpc:call(Node1, emqx_eviction_agent, evict_sessions, [1, Node1]),
|
||||
#{?snk_kind := emqx_channel_takeover_end, clientid := <<"client_with_session">>},
|
||||
1000
|
||||
),
|
||||
|
||||
ok = rpc:call(Node1, emqx_eviction_agent, disable, [test_eviction]),
|
||||
|
||||
{ok, C1} = emqtt_connect([{port, Port1}]),
|
||||
emqtt:publish(C1, <<"t1">>, <<"MessageToEvictedSession1">>),
|
||||
ok = emqtt:disconnect(C1),
|
||||
|
||||
ok = rpc:call(Node1, emqx_eviction_agent, enable, [test_eviction, undefined]),
|
||||
|
||||
%% Evacuate to another node
|
||||
|
||||
?assertWaitEvent(
|
||||
rpc:call(Node1, emqx_eviction_agent, evict_sessions, [1, Node2]),
|
||||
#{?snk_kind := emqx_channel_takeover_end, clientid := <<"client_with_session">>},
|
||||
1000
|
||||
),
|
||||
|
||||
?assertEqual(
|
||||
0,
|
||||
rpc:call(Node1, emqx_eviction_agent, session_count, [])
|
||||
),
|
||||
|
||||
?assertEqual(
|
||||
1,
|
||||
rpc:call(Node2, emqx_eviction_agent, session_count, [])
|
||||
),
|
||||
|
||||
ok = rpc:call(Node1, emqx_eviction_agent, disable, [test_eviction]),
|
||||
|
||||
%% Session is on Node2, but we connect to Node1
|
||||
{ok, C2} = emqtt_connect([{port, Port1}]),
|
||||
emqtt:publish(C2, <<"t1">>, <<"MessageToEvictedSession2">>),
|
||||
ok = emqtt:disconnect(C2),
|
||||
|
||||
ct:sleep(100),
|
||||
|
||||
%% Session is on Node2, but we connect the subscribed client to Node1
|
||||
%% It should take over the session for the third time and recieve
|
||||
%% previously published messages
|
||||
{ok, C3} = emqtt_connect([
|
||||
{clientid, <<"client_with_session">>},
|
||||
{clean_start, false},
|
||||
{port, Port1}
|
||||
]),
|
||||
|
||||
ok = assert_receive_publish(
|
||||
[
|
||||
#{payload => <<"MessageToEvictedSession1">>, topic => <<"t1">>},
|
||||
#{payload => <<"MessageToEvictedSession2">>, topic => <<"t1">>}
|
||||
]
|
||||
),
|
||||
ok = emqtt:disconnect(C3).
|
||||
|
||||
t_disable_on_restart(_Config) ->
|
||||
ok = emqx_eviction_agent:enable(test_eviction, undefined),
|
||||
|
||||
ok = supervisor:terminate_child(emqx_eviction_agent_sup, emqx_eviction_agent),
|
||||
{ok, _} = supervisor:restart_child(emqx_eviction_agent_sup, emqx_eviction_agent),
|
||||
|
||||
?assertEqual(
|
||||
disabled,
|
||||
emqx_eviction_agent:status()
|
||||
).
|
||||
|
||||
t_session_serialization(_Config) ->
|
||||
_ = erlang:process_flag(trap_exit, true),
|
||||
ok = restart_emqx(),
|
||||
|
||||
{ok, C0} = emqtt_connect(<<"client_with_session">>, false),
|
||||
{ok, _, _} = emqtt:subscribe(C0, <<"t1">>),
|
||||
ok = emqtt:disconnect(C0),
|
||||
|
||||
ok = emqx_eviction_agent:enable(test_eviction, undefined),
|
||||
|
||||
?assertEqual(
|
||||
1,
|
||||
emqx_eviction_agent:session_count()
|
||||
),
|
||||
|
||||
%% Evacuate to the same node
|
||||
|
||||
?assertWaitEvent(
|
||||
emqx_eviction_agent:evict_sessions(1, node()),
|
||||
#{?snk_kind := emqx_channel_takeover_end, clientid := <<"client_with_session">>},
|
||||
1000
|
||||
),
|
||||
|
||||
ok = emqx_eviction_agent:disable(test_eviction),
|
||||
|
||||
?assertEqual(
|
||||
1,
|
||||
emqx_eviction_agent:session_count()
|
||||
),
|
||||
|
||||
?assertMatch(
|
||||
#{data := [#{clientid := <<"client_with_session">>}]},
|
||||
emqx_mgmt_api:cluster_query(
|
||||
emqx_channel_info,
|
||||
#{},
|
||||
[],
|
||||
fun emqx_mgmt_api_clients:qs2ms/2,
|
||||
fun emqx_mgmt_api_clients:format_channel_info/2
|
||||
)
|
||||
),
|
||||
|
||||
mock_print(),
|
||||
|
||||
?assertPrinted(
|
||||
"client_with_session",
|
||||
emqx_mgmt_cli:clients(["list"])
|
||||
),
|
||||
|
||||
?assertPrinted(
|
||||
"client_with_session",
|
||||
emqx_mgmt_cli:clients(["show", "client_with_session"])
|
||||
),
|
||||
|
||||
?assertWaitEvent(
|
||||
emqx_cm:kick_session(<<"client_with_session">>),
|
||||
#{?snk_kind := emqx_cm_clean_down, client_id := <<"client_with_session">>},
|
||||
1000
|
||||
),
|
||||
|
||||
?assertEqual(
|
||||
0,
|
||||
emqx_eviction_agent:session_count()
|
||||
).
|
||||
|
||||
t_will_msg(_Config) ->
|
||||
erlang:process_flag(trap_exit, true),
|
||||
|
||||
WillMsg = <<"will_msg">>,
|
||||
WillTopic = <<"will_topic">>,
|
||||
ClientId = <<"client_with_will">>,
|
||||
|
||||
_ = emqtt_connect([
|
||||
{clean_start, false},
|
||||
{clientid, ClientId},
|
||||
{will_payload, WillMsg},
|
||||
{will_topic, WillTopic}
|
||||
]),
|
||||
|
||||
{ok, C} = emqtt_connect(),
|
||||
{ok, _, _} = emqtt:subscribe(C, WillTopic),
|
||||
|
||||
[ChanPid] = emqx_cm:lookup_channels(ClientId),
|
||||
|
||||
ChanPid !
|
||||
{disconnect, ?RC_USE_ANOTHER_SERVER, use_another_server, #{
|
||||
'Server-Reference' => <<>>
|
||||
}},
|
||||
|
||||
receive
|
||||
{publish, #{
|
||||
payload := WillMsg,
|
||||
topic := WillTopic
|
||||
}} ->
|
||||
ok
|
||||
after 1000 ->
|
||||
ct:fail("Will message not received")
|
||||
end,
|
||||
|
||||
ok = emqtt:disconnect(C).
|
||||
|
||||
%%--------------------------------------------------------------------
|
||||
%% Helpers
|
||||
%%--------------------------------------------------------------------
|
||||
|
||||
% sn_connect_and_subscribe(ClientId, Topic) ->
|
||||
% emqx_eviction_agent_test_helpers:sn_connect_and_subscribe(ClientId, Topic).
|
||||
|
||||
assert_receive_publish([]) ->
|
||||
ok;
|
||||
assert_receive_publish([#{payload := Msg, topic := Topic} | Rest]) ->
|
||||
receive
|
||||
{publish, #{
|
||||
payload := Msg,
|
||||
topic := Topic
|
||||
}} ->
|
||||
assert_receive_publish(Rest)
|
||||
after 1000 ->
|
||||
?assert(false, "Message `" ++ binary_to_list(Msg) ++ "` is lost")
|
||||
end.
|
||||
|
||||
connect_and_publish(Topic, Message) ->
|
||||
{ok, C} = emqtt_connect(),
|
||||
emqtt:publish(C, Topic, Message),
|
||||
ok = emqtt:disconnect(C).
|
||||
|
||||
restart_emqx() ->
|
||||
_ = application:stop(emqx),
|
||||
_ = application:start(emqx),
|
||||
_ = application:stop(emqx_eviction_agent),
|
||||
_ = application:start(emqx_eviction_agent),
|
||||
ok.
|
||||
|
||||
mock_print() ->
|
||||
catch meck:unload(emqx_ctl),
|
||||
meck:new(emqx_ctl, [non_strict, passthrough]),
|
||||
meck:expect(emqx_ctl, print, fun(Arg) -> emqx_ctl:format(Arg, []) end),
|
||||
meck:expect(emqx_ctl, print, fun(Msg, Arg) -> emqx_ctl:format(Msg, Arg) end),
|
||||
meck:expect(emqx_ctl, usage, fun(Usages) -> emqx_ctl:format_usage(Usages) end),
|
||||
meck:expect(emqx_ctl, usage, fun(Cmd, Descr) -> emqx_ctl:format_usage(Cmd, Descr) end).
|
|
@ -0,0 +1,69 @@
|
|||
%%--------------------------------------------------------------------
|
||||
%% Copyright (c) 2022-2023 EMQ Technologies Co., Ltd. All Rights Reserved.
|
||||
%%--------------------------------------------------------------------
|
||||
|
||||
-module(emqx_eviction_agent_api_SUITE).
|
||||
|
||||
-compile(export_all).
|
||||
-compile(nowarn_export_all).
|
||||
|
||||
-include_lib("eunit/include/eunit.hrl").
|
||||
-include_lib("common_test/include/ct.hrl").
|
||||
|
||||
-import(
|
||||
emqx_mgmt_api_test_util,
|
||||
[
|
||||
request_api/2,
|
||||
uri/1
|
||||
]
|
||||
).
|
||||
|
||||
all() ->
|
||||
emqx_common_test_helpers:all(?MODULE).
|
||||
|
||||
init_per_suite(Config) ->
|
||||
emqx_mgmt_api_test_util:init_suite([emqx_eviction_agent]),
|
||||
Config.
|
||||
|
||||
end_per_suite(Config) ->
|
||||
emqx_mgmt_api_test_util:end_suite([emqx_eviction_agent]),
|
||||
Config.
|
||||
|
||||
%%--------------------------------------------------------------------
|
||||
%% Tests
|
||||
%%--------------------------------------------------------------------
|
||||
|
||||
t_status(_Config) ->
|
||||
?assertMatch(
|
||||
{ok, #{<<"status">> := <<"disabled">>}},
|
||||
api_get(["node_eviction", "status"])
|
||||
),
|
||||
|
||||
ok = emqx_eviction_agent:enable(apitest, undefined),
|
||||
|
||||
?assertMatch(
|
||||
{ok, #{
|
||||
<<"status">> := <<"enabled">>,
|
||||
<<"stats">> := #{}
|
||||
}},
|
||||
api_get(["node_eviction", "status"])
|
||||
),
|
||||
|
||||
ok = emqx_eviction_agent:disable(apitest),
|
||||
|
||||
?assertMatch(
|
||||
{ok, #{<<"status">> := <<"disabled">>}},
|
||||
api_get(["node_eviction", "status"])
|
||||
).
|
||||
|
||||
%%--------------------------------------------------------------------
|
||||
%% Helpers
|
||||
%%--------------------------------------------------------------------
|
||||
|
||||
api_get(Path) ->
|
||||
case request_api(get, uri(Path)) of
|
||||
{ok, ResponseBody} ->
|
||||
{ok, jiffy:decode(list_to_binary(ResponseBody), [return_maps])};
|
||||
{error, _} = Error ->
|
||||
Error
|
||||
end.
|
|
@ -0,0 +1,251 @@
|
|||
%%--------------------------------------------------------------------
|
||||
%% Copyright (c) 2022-2023 EMQ Technologies Co., Ltd. All Rights Reserved.
|
||||
%%--------------------------------------------------------------------
|
||||
|
||||
-module(emqx_eviction_agent_channel_SUITE).
|
||||
|
||||
-compile(export_all).
|
||||
-compile(nowarn_export_all).
|
||||
|
||||
-include_lib("eunit/include/eunit.hrl").
|
||||
-include_lib("common_test/include/ct.hrl").
|
||||
-include_lib("emqx/include/emqx_mqtt.hrl").
|
||||
|
||||
-define(CLIENT_ID, <<"client_with_session">>).
|
||||
|
||||
-import(
|
||||
emqx_eviction_agent_test_helpers,
|
||||
[emqtt_connect/0, emqtt_connect/2]
|
||||
).
|
||||
|
||||
all() ->
|
||||
emqx_common_test_helpers:all(?MODULE).
|
||||
|
||||
init_per_suite(Config) ->
|
||||
emqx_common_test_helpers:start_apps([emqx_conf, emqx_eviction_agent]),
|
||||
{ok, _} = emqx:update_config([rpc, port_discovery], manual),
|
||||
Config.
|
||||
|
||||
end_per_suite(_Config) ->
|
||||
emqx_common_test_helpers:stop_apps([emqx_eviction_agent, emqx_conf]).
|
||||
|
||||
init_per_testcase(t_persistence, Config) ->
|
||||
emqx_config:put([persistent_session_store, enabled], true),
|
||||
{ok, _} = emqx_persistent_session_sup:start_link(),
|
||||
emqx_persistent_session:init_db_backend(),
|
||||
?assert(emqx_persistent_session:is_store_enabled()),
|
||||
Config;
|
||||
init_per_testcase(_TestCase, Config) ->
|
||||
Config.
|
||||
|
||||
end_per_testcase(t_persistence, Config) ->
|
||||
emqx_config:put([persistent_session_store, enabled], false),
|
||||
emqx_persistent_session:init_db_backend(),
|
||||
?assertNot(emqx_persistent_session:is_store_enabled()),
|
||||
Config;
|
||||
end_per_testcase(_TestCase, _Config) ->
|
||||
ok.
|
||||
|
||||
%%--------------------------------------------------------------------
|
||||
%% Tests
|
||||
%%--------------------------------------------------------------------
|
||||
|
||||
t_start_no_session(_Config) ->
|
||||
Opts = #{
|
||||
clientinfo => #{
|
||||
clientid => ?CLIENT_ID,
|
||||
zone => internal
|
||||
},
|
||||
conninfo => #{
|
||||
clientid => ?CLIENT_ID,
|
||||
receive_maximum => 32,
|
||||
expiry_interval => 10000
|
||||
}
|
||||
},
|
||||
?assertMatch(
|
||||
{error, {no_session, _}},
|
||||
emqx_eviction_agent_channel:start_supervised(Opts)
|
||||
).
|
||||
|
||||
t_start_no_expire(_Config) ->
|
||||
erlang:process_flag(trap_exit, true),
|
||||
|
||||
_ = emqtt_connect(?CLIENT_ID, false),
|
||||
|
||||
Opts = #{
|
||||
clientinfo => #{
|
||||
clientid => ?CLIENT_ID,
|
||||
zone => internal
|
||||
},
|
||||
conninfo => #{
|
||||
clientid => ?CLIENT_ID,
|
||||
receive_maximum => 32,
|
||||
expiry_interval => 0
|
||||
}
|
||||
},
|
||||
?assertMatch(
|
||||
{error, {should_be_expired, _}},
|
||||
emqx_eviction_agent_channel:start_supervised(Opts)
|
||||
).
|
||||
|
||||
t_start_infinite_expire(_Config) ->
|
||||
erlang:process_flag(trap_exit, true),
|
||||
|
||||
_ = emqtt_connect(?CLIENT_ID, false),
|
||||
|
||||
Opts = #{
|
||||
clientinfo => #{
|
||||
clientid => ?CLIENT_ID,
|
||||
zone => internal
|
||||
},
|
||||
conninfo => #{
|
||||
clientid => ?CLIENT_ID,
|
||||
receive_maximum => 32,
|
||||
expiry_interval => ?UINT_MAX
|
||||
}
|
||||
},
|
||||
?assertMatch(
|
||||
{ok, _},
|
||||
emqx_eviction_agent_channel:start_supervised(Opts)
|
||||
).
|
||||
|
||||
t_kick(_Config) ->
|
||||
erlang:process_flag(trap_exit, true),
|
||||
|
||||
_ = emqtt_connect(?CLIENT_ID, false),
|
||||
Opts = evict_session_opts(?CLIENT_ID),
|
||||
|
||||
{ok, Pid} = emqx_eviction_agent_channel:start_supervised(Opts),
|
||||
|
||||
?assertEqual(
|
||||
ok,
|
||||
emqx_eviction_agent_channel:call(Pid, kick)
|
||||
).
|
||||
|
||||
t_discard(_Config) ->
|
||||
erlang:process_flag(trap_exit, true),
|
||||
|
||||
_ = emqtt_connect(?CLIENT_ID, false),
|
||||
Opts = evict_session_opts(?CLIENT_ID),
|
||||
|
||||
{ok, Pid} = emqx_eviction_agent_channel:start_supervised(Opts),
|
||||
|
||||
?assertEqual(
|
||||
ok,
|
||||
emqx_eviction_agent_channel:call(Pid, discard)
|
||||
).
|
||||
|
||||
t_stop(_Config) ->
|
||||
erlang:process_flag(trap_exit, true),
|
||||
|
||||
_ = emqtt_connect(?CLIENT_ID, false),
|
||||
Opts = evict_session_opts(?CLIENT_ID),
|
||||
|
||||
{ok, Pid} = emqx_eviction_agent_channel:start_supervised(Opts),
|
||||
|
||||
?assertEqual(
|
||||
ok,
|
||||
emqx_eviction_agent_channel:stop(Pid)
|
||||
).
|
||||
|
||||
t_ignored_calls(_Config) ->
|
||||
erlang:process_flag(trap_exit, true),
|
||||
|
||||
_ = emqtt_connect(?CLIENT_ID, false),
|
||||
Opts = evict_session_opts(?CLIENT_ID),
|
||||
|
||||
{ok, Pid} = emqx_eviction_agent_channel:start_supervised(Opts),
|
||||
|
||||
ok = emqx_eviction_agent_channel:cast(Pid, unknown),
|
||||
Pid ! unknown,
|
||||
|
||||
?assertEqual(
|
||||
[],
|
||||
emqx_eviction_agent_channel:call(Pid, list_acl_cache)
|
||||
),
|
||||
|
||||
?assertEqual(
|
||||
ok,
|
||||
emqx_eviction_agent_channel:call(Pid, {quota, quota})
|
||||
),
|
||||
|
||||
?assertEqual(
|
||||
ignored,
|
||||
emqx_eviction_agent_channel:call(Pid, unknown)
|
||||
).
|
||||
|
||||
t_expire(_Config) ->
|
||||
erlang:process_flag(trap_exit, true),
|
||||
|
||||
_ = emqtt_connect(?CLIENT_ID, false),
|
||||
#{conninfo := ConnInfo} = Opts0 = evict_session_opts(?CLIENT_ID),
|
||||
Opts1 = Opts0#{conninfo => ConnInfo#{expiry_interval => 1}},
|
||||
|
||||
{ok, Pid} = emqx_eviction_agent_channel:start_supervised(Opts1),
|
||||
|
||||
ct:sleep(1500),
|
||||
|
||||
?assertNot(is_process_alive(Pid)).
|
||||
|
||||
t_get_connected_client_count(_Config) ->
|
||||
erlang:process_flag(trap_exit, true),
|
||||
|
||||
_ = emqtt_connect(?CLIENT_ID, false),
|
||||
|
||||
?assertEqual(
|
||||
1,
|
||||
emqx_cm:get_connected_client_count()
|
||||
),
|
||||
|
||||
Opts = evict_session_opts(?CLIENT_ID),
|
||||
|
||||
{ok, _} = emqx_eviction_agent_channel:start_supervised(Opts),
|
||||
|
||||
?assertEqual(
|
||||
0,
|
||||
emqx_cm:get_connected_client_count()
|
||||
).
|
||||
|
||||
t_persistence(_Config) ->
|
||||
erlang:process_flag(trap_exit, true),
|
||||
|
||||
Topic = <<"t1">>,
|
||||
Message = <<"message_to_persist">>,
|
||||
|
||||
{ok, C0} = emqtt_connect(?CLIENT_ID, false),
|
||||
{ok, _, _} = emqtt:subscribe(C0, Topic, 0),
|
||||
|
||||
Opts = evict_session_opts(?CLIENT_ID),
|
||||
{ok, Pid} = emqx_eviction_agent_channel:start_supervised(Opts),
|
||||
|
||||
{ok, C1} = emqtt_connect(),
|
||||
{ok, _} = emqtt:publish(C1, Topic, Message, 1),
|
||||
ok = emqtt:disconnect(C1),
|
||||
|
||||
%% Kill channel so that the session is only persisted
|
||||
ok = emqx_eviction_agent_channel:call(Pid, kick),
|
||||
|
||||
%% Should restore session from persistents storage and receive messages
|
||||
{ok, C2} = emqtt_connect(?CLIENT_ID, false),
|
||||
|
||||
receive
|
||||
{publish, #{
|
||||
payload := Message,
|
||||
topic := Topic
|
||||
}} ->
|
||||
ok
|
||||
after 1000 ->
|
||||
ct:fail("message not received")
|
||||
end,
|
||||
|
||||
ok = emqtt:disconnect(C2).
|
||||
|
||||
%%--------------------------------------------------------------------
|
||||
%% Helpers
|
||||
%%--------------------------------------------------------------------
|
||||
|
||||
evict_session_opts(ClientId) ->
|
||||
maps:with(
|
||||
[conninfo, clientinfo],
|
||||
emqx_cm:get_chan_info(ClientId)
|
||||
).
|
|
@ -0,0 +1,39 @@
|
|||
%%--------------------------------------------------------------------
|
||||
%% Copyright (c) 2022-2023 EMQ Technologies Co., Ltd. All Rights Reserved.
|
||||
%%--------------------------------------------------------------------
|
||||
|
||||
-module(emqx_eviction_agent_cli_SUITE).
|
||||
|
||||
-compile(export_all).
|
||||
-compile(nowarn_export_all).
|
||||
|
||||
-include_lib("eunit/include/eunit.hrl").
|
||||
-include_lib("common_test/include/ct.hrl").
|
||||
|
||||
all() ->
|
||||
emqx_common_test_helpers:all(?MODULE).
|
||||
|
||||
init_per_suite(Config) ->
|
||||
emqx_common_test_helpers:start_apps([emqx_eviction_agent]),
|
||||
Config.
|
||||
|
||||
end_per_suite(Config) ->
|
||||
_ = emqx_eviction_agent:disable(foo),
|
||||
emqx_common_test_helpers:stop_apps([emqx_eviction_agent]),
|
||||
Config.
|
||||
|
||||
%%--------------------------------------------------------------------
|
||||
%% Tests
|
||||
%%--------------------------------------------------------------------
|
||||
|
||||
t_status(_Config) ->
|
||||
%% usage
|
||||
ok = emqx_eviction_agent_cli:cli(["foobar"]),
|
||||
|
||||
%% status
|
||||
ok = emqx_eviction_agent_cli:cli(["status"]),
|
||||
|
||||
ok = emqx_eviction_agent:enable(foo, undefined),
|
||||
|
||||
%% status
|
||||
ok = emqx_eviction_agent_cli:cli(["status"]).
|
|
@ -0,0 +1,141 @@
|
|||
%%--------------------------------------------------------------------
|
||||
%% Copyright (c) 2022-2023 EMQ Technologies Co., Ltd. All Rights Reserved.
|
||||
%%--------------------------------------------------------------------
|
||||
|
||||
-module(emqx_eviction_agent_test_helpers).
|
||||
|
||||
-export([
|
||||
emqtt_connect/0,
|
||||
emqtt_connect/1,
|
||||
emqtt_connect/2,
|
||||
emqtt_connect_many/2,
|
||||
stop_many/1,
|
||||
|
||||
emqtt_try_connect/1,
|
||||
|
||||
start_cluster/2,
|
||||
start_cluster/3,
|
||||
stop_cluster/2,
|
||||
|
||||
case_specific_node_name/2,
|
||||
case_specific_node_name/3,
|
||||
concat_atoms/1
|
||||
]).
|
||||
|
||||
emqtt_connect() ->
|
||||
emqtt_connect(<<"client1">>, true).
|
||||
|
||||
emqtt_connect(ClientId, CleanStart) ->
|
||||
emqtt_connect([{clientid, ClientId}, {clean_start, CleanStart}]).
|
||||
|
||||
emqtt_connect(Opts) ->
|
||||
{ok, C} = emqtt:start_link(
|
||||
Opts ++
|
||||
[
|
||||
{proto_ver, v5},
|
||||
{properties, #{'Session-Expiry-Interval' => 600}}
|
||||
]
|
||||
),
|
||||
case emqtt:connect(C) of
|
||||
{ok, _} -> {ok, C};
|
||||
{error, _} = Error -> Error
|
||||
end.
|
||||
|
||||
emqtt_connect_many(Port, Count) ->
|
||||
lists:map(
|
||||
fun(N) ->
|
||||
NBin = integer_to_binary(N),
|
||||
ClientId = <<"client-", NBin/binary>>,
|
||||
{ok, C} = emqtt_connect([{clientid, ClientId}, {clean_start, false}, {port, Port}]),
|
||||
C
|
||||
end,
|
||||
lists:seq(1, Count)
|
||||
).
|
||||
|
||||
stop_many(Clients) ->
|
||||
lists:foreach(
|
||||
fun(C) ->
|
||||
catch emqtt:disconnect(C)
|
||||
end,
|
||||
Clients
|
||||
),
|
||||
ct:sleep(100).
|
||||
|
||||
emqtt_try_connect(Opts) ->
|
||||
case emqtt_connect(Opts) of
|
||||
{ok, C} ->
|
||||
emqtt:disconnect(C),
|
||||
ok;
|
||||
{error, _} = Error ->
|
||||
Error
|
||||
end.
|
||||
|
||||
start_cluster(NamesWithPorts, Apps) ->
|
||||
start_cluster(NamesWithPorts, Apps, []).
|
||||
|
||||
start_cluster(NamesWithPorts, Apps, Env) ->
|
||||
Specs = lists:map(
|
||||
fun({ShortName, Port}) ->
|
||||
{core, ShortName, #{listener_ports => [{tcp, Port}]}}
|
||||
end,
|
||||
NamesWithPorts
|
||||
),
|
||||
Opts0 = [
|
||||
{env, [{emqx, boot_modules, [broker, listeners]}]},
|
||||
{apps, Apps},
|
||||
{conf,
|
||||
[{[listeners, Proto, default, enabled], false} || Proto <- [ssl, ws, wss]] ++
|
||||
[{[rpc, mode], async}]},
|
||||
{env, Env}
|
||||
],
|
||||
Cluster = emqx_common_test_helpers:emqx_cluster(
|
||||
Specs,
|
||||
Opts0
|
||||
),
|
||||
NodesWithPorts = [
|
||||
{
|
||||
emqx_common_test_helpers:start_slave(Name, Opts),
|
||||
proplists:get_value(Name, NamesWithPorts)
|
||||
}
|
||||
|| {Name, Opts} <- Cluster
|
||||
],
|
||||
ok = lists:foreach(
|
||||
fun({Node, _Port}) ->
|
||||
snabbkaffe:forward_trace(Node)
|
||||
end,
|
||||
NodesWithPorts
|
||||
),
|
||||
NodesWithPorts.
|
||||
|
||||
stop_cluster(NodesWithPorts, Apps) ->
|
||||
lists:foreach(
|
||||
fun({Node, _Port}) ->
|
||||
lists:foreach(
|
||||
fun(App) ->
|
||||
rpc:call(Node, application, stop, [App])
|
||||
end,
|
||||
Apps
|
||||
),
|
||||
%% This sleep is just to make logs cleaner
|
||||
ct:sleep(100),
|
||||
_ = rpc:call(Node, emqx_common_test_helpers, stop_apps, []),
|
||||
emqx_common_test_helpers:stop_slave(Node)
|
||||
end,
|
||||
NodesWithPorts
|
||||
).
|
||||
|
||||
case_specific_node_name(Module, Case) ->
|
||||
concat_atoms([Module, '__', Case]).
|
||||
|
||||
case_specific_node_name(Module, Case, Node) ->
|
||||
concat_atoms([Module, '__', Case, '__', Node]).
|
||||
|
||||
concat_atoms(Atoms) ->
|
||||
binary_to_atom(
|
||||
iolist_to_binary(
|
||||
lists:map(
|
||||
fun atom_to_binary/1,
|
||||
Atoms
|
||||
)
|
||||
)
|
||||
).
|
|
@ -0,0 +1,9 @@
|
|||
emqx_node_rebalance
|
||||
=====
|
||||
|
||||
An OTP library
|
||||
|
||||
Build
|
||||
-----
|
||||
|
||||
$ rebar3 compile
|
|
@ -0,0 +1,3 @@
|
|||
##--------------------------------------------------------------------
|
||||
## EMQX Node Rebalance Plugin
|
||||
##--------------------------------------------------------------------
|
|
@ -0,0 +1,490 @@
|
|||
emqx_node_rebalance_api {
|
||||
|
||||
## API Request Fields
|
||||
|
||||
load_rebalance_status {
|
||||
desc {
|
||||
en: "Get rebalance status of the current node"
|
||||
zh: "获取当前节点的rebalance状态"
|
||||
}
|
||||
label {
|
||||
en: "Get rebalance status"
|
||||
zh: "获取rebalance状态"
|
||||
}
|
||||
}
|
||||
|
||||
load_rebalance_global_status {
|
||||
desc {
|
||||
en: "Get status of all rebalance/evacuation processes across the cluster"
|
||||
zh: "获取集群中所有rebalance/evacuation进程的状态"
|
||||
}
|
||||
label {
|
||||
en: "Get global rebalance status"
|
||||
zh: "获取全局rebalance状态"
|
||||
}
|
||||
}
|
||||
|
||||
load_rebalance_availability_check {
|
||||
desc {
|
||||
en: "Check if the node is being evacuated or rebalanced"
|
||||
zh: "检查节点是否正在被evacuate或rebalance"
|
||||
}
|
||||
label {
|
||||
en: "Availability check"
|
||||
zh: "可用性检查"
|
||||
}
|
||||
}
|
||||
|
||||
load_rebalance_start {
|
||||
desc {
|
||||
en: "Start rebalance process"
|
||||
zh: "启动rebalance进程"
|
||||
}
|
||||
label {
|
||||
en: "Start rebalance"
|
||||
zh: "启动rebalance"
|
||||
}
|
||||
}
|
||||
|
||||
load_rebalance_stop {
|
||||
desc {
|
||||
en: "Stop rebalance process"
|
||||
zh: "停止rebalance进程"
|
||||
}
|
||||
label {
|
||||
en: "Stop rebalance"
|
||||
zh: "停止rebalance"
|
||||
}
|
||||
}
|
||||
|
||||
load_rebalance_evacuation_start {
|
||||
desc {
|
||||
en: "Start evacuation process"
|
||||
zh: "启动evacuation进程"
|
||||
}
|
||||
label {
|
||||
en: "Start evacuation"
|
||||
zh: "启动evacuation"
|
||||
}
|
||||
}
|
||||
|
||||
load_rebalance_evacuation_stop {
|
||||
desc {
|
||||
en: "Stop evacuation process"
|
||||
zh: "停止evacuation进程"
|
||||
}
|
||||
label {
|
||||
en: "Stop evacuation"
|
||||
zh: "停止evacuation"
|
||||
}
|
||||
}
|
||||
|
||||
param_node {
|
||||
desc {
|
||||
en: "Node name"
|
||||
zh: "节点名称"
|
||||
}
|
||||
label {
|
||||
en: "Node name"
|
||||
zh: "节点名称"
|
||||
}
|
||||
}
|
||||
|
||||
wait_health_check {
|
||||
desc {
|
||||
en: "Time to wait before starting the rebalance process, in seconds"
|
||||
zh: "启动rebalance进程前等待的时间,单位为秒"
|
||||
}
|
||||
label {
|
||||
en: "Wait health check"
|
||||
zh: "等待健康检查"
|
||||
}
|
||||
}
|
||||
|
||||
conn_evict_rate {
|
||||
desc {
|
||||
en: "The rate of evicting connections, in connections per second"
|
||||
zh: "逐出连接的速率,以每秒连接数表示"
|
||||
}
|
||||
label {
|
||||
en: "Connection eviction rate"
|
||||
zh: "连接驱逐率"
|
||||
}
|
||||
}
|
||||
|
||||
sess_evict_rate {
|
||||
desc {
|
||||
en: "The rate of evicting sessions, in sessions per second"
|
||||
zh: "逐出会话的速率,以每秒会话为单位"
|
||||
}
|
||||
label {
|
||||
en: "Session eviction rate"
|
||||
zh: "会话驱逐率"
|
||||
}
|
||||
}
|
||||
|
||||
abs_conn_threshold {
|
||||
desc {
|
||||
en: "Maximum desired difference between the number of connections on the node and the average number of connections on the recipient nodes"
|
||||
zh: "节点上的连接数与接收节点上的平均连接数之间的最大期望差值"
|
||||
}
|
||||
label {
|
||||
en: "Absolute connection threshold"
|
||||
zh: "绝对连接阈值"
|
||||
}
|
||||
}
|
||||
|
||||
rel_conn_threshold {
|
||||
desc {
|
||||
en: "Maximum desired fraction between the number of connections on the node and the average number of connections on the recipient nodes"
|
||||
zh: "节点上的连接数与接收节点上的平均连接数之间的最大期望分数"
|
||||
}
|
||||
label {
|
||||
en: "Relative connection threshold"
|
||||
zh: "相对连接阈值"
|
||||
}
|
||||
}
|
||||
|
||||
abs_sess_threshold {
|
||||
desc {
|
||||
en: "Maximum desired difference between the number of sessions on the node and the average number of sessions on the recipient nodes"
|
||||
zh: "节点上的会话数与接收节点上的平均会话数之间的最大期望差异"
|
||||
}
|
||||
label {
|
||||
en: "Absolute session threshold"
|
||||
zh: "绝对会话阈值"
|
||||
}
|
||||
}
|
||||
|
||||
rel_sess_threshold {
|
||||
desc {
|
||||
en: "Maximum desired fraction between the number of sessions on the node and the average number of sessions on the recipient nodes"
|
||||
zh: "节点上的会话数与接收节点上的平均会话数之间的最大期望分数"
|
||||
}
|
||||
label {
|
||||
en: "Relative session threshold"
|
||||
zh: "相对会话阈值"
|
||||
}
|
||||
}
|
||||
|
||||
wait_takeover {
|
||||
desc {
|
||||
en: "Time to wait before starting session evacuation process, in seconds"
|
||||
zh: "开始会话疏散过程之前等待的时间,以秒为单位"
|
||||
}
|
||||
label {
|
||||
en: "Wait takeover"
|
||||
zh: "等待接管"
|
||||
}
|
||||
}
|
||||
|
||||
redirect_to {
|
||||
desc {
|
||||
en: "Server reference to redirect clients to (MQTTv5 Server redirection)"
|
||||
zh: "将客户端重定向到的服务器参考(MQTTv5 服务器重定向)"
|
||||
}
|
||||
label {
|
||||
en: "Redirect to"
|
||||
zh: "重定向至"
|
||||
}
|
||||
}
|
||||
|
||||
migrate_to {
|
||||
desc {
|
||||
en: "Nodes to migrate sessions to"
|
||||
zh: "将会话迁移到的节点"
|
||||
}
|
||||
label {
|
||||
en: "Migrate to"
|
||||
zh: "迁移到"
|
||||
}
|
||||
}
|
||||
|
||||
rebalance_nodes {
|
||||
desc {
|
||||
en: "Nodes to participate in rebalance"
|
||||
zh: "参与rebalance的节点"
|
||||
}
|
||||
label {
|
||||
en: "Rebalance nodes"
|
||||
zh: "重新平衡节点"
|
||||
}
|
||||
}
|
||||
|
||||
## API Response Fields
|
||||
|
||||
local_status_enabled {
|
||||
desc {
|
||||
en: "Whether the node is being evacuated"
|
||||
zh: "节点是否正在撤离"
|
||||
}
|
||||
label {
|
||||
en: "Local evacuation status"
|
||||
zh: "当地避难状况"
|
||||
}
|
||||
}
|
||||
|
||||
local_status_process {
|
||||
desc {
|
||||
en: "The process that is being performed on the node: evacuation or rebalance"
|
||||
zh: "正在节点上执行的过程:疏散或重新平衡"
|
||||
}
|
||||
label {
|
||||
en: "Node process"
|
||||
zh: "节点进程"
|
||||
}
|
||||
}
|
||||
|
||||
local_status_state {
|
||||
desc {
|
||||
en: "The state of the process that is being performed on the node"
|
||||
zh: "正在节点上执行的进程的状态"
|
||||
}
|
||||
label {
|
||||
en: "Rebalance/evacuation current state"
|
||||
zh: "重新平衡/疏散当前状态"
|
||||
}
|
||||
}
|
||||
|
||||
local_status_coordinator_node {
|
||||
desc {
|
||||
en: "The node that is coordinating rebalance process"
|
||||
zh: "协调再平衡过程的节点"
|
||||
}
|
||||
label {
|
||||
en: "Coordinator node"
|
||||
zh: "协调节点"
|
||||
}
|
||||
}
|
||||
|
||||
local_status_connection_eviction_rate {
|
||||
desc {
|
||||
en: "The rate of evicting connections, in connections per second"
|
||||
zh: "逐出连接的速率,以每秒连接数表示"
|
||||
}
|
||||
label {
|
||||
en: "Connection eviction rate"
|
||||
zh: "连接驱逐率"
|
||||
}
|
||||
}
|
||||
|
||||
local_status_session_eviction_rate {
|
||||
desc {
|
||||
en: "The rate of evicting sessions, in sessions per second"
|
||||
zh: "逐出会话的速率,以每秒会话为单位"
|
||||
}
|
||||
label {
|
||||
en: "Session eviction rate"
|
||||
zh: "会话驱逐率"
|
||||
}
|
||||
}
|
||||
|
||||
local_status_connection_goal {
|
||||
desc {
|
||||
en: "The number of connections that the node should have after the rebalance/evacuation process"
|
||||
zh: "节点在重新平衡/疏散过程后应该拥有的连接数"
|
||||
}
|
||||
label {
|
||||
en: "Connection goal"
|
||||
zh: "连接目标"
|
||||
}
|
||||
}
|
||||
|
||||
local_status_session_goal {
|
||||
desc {
|
||||
en: "The number of sessions that the node should have after the evacuation process"
|
||||
zh: "疏散过程后节点应有的会话数"
|
||||
}
|
||||
label {
|
||||
en: "Session goal"
|
||||
zh: "会话目标"
|
||||
}
|
||||
}
|
||||
|
||||
local_status_disconnected_session_goal {
|
||||
desc {
|
||||
en: "The number of disconnected sessions that the node should have after the rebalance process"
|
||||
zh: "重新平衡过程后节点应具有的断开连接的会话数"
|
||||
}
|
||||
label {
|
||||
en: "Disconnected session goal"
|
||||
zh: "断开连接的会话目标"
|
||||
}
|
||||
}
|
||||
|
||||
local_status_session_recipients {
|
||||
desc {
|
||||
en: "List of nodes to which sessions are being evacuated"
|
||||
zh: "会话被疏散到的节点列表"
|
||||
}
|
||||
label {
|
||||
en: "Session recipients"
|
||||
zh: "会话收件人"
|
||||
}
|
||||
}
|
||||
|
||||
local_status_recipients {
|
||||
desc {
|
||||
en: "List of nodes to which connections/sessions are being evacuated during rebalance"
|
||||
zh: "在重新平衡期间连接/会话被疏散到的节点列表"
|
||||
}
|
||||
label {
|
||||
en: "Recipients"
|
||||
zh: "收件人"
|
||||
}
|
||||
}
|
||||
|
||||
local_status_stats {
|
||||
desc {
|
||||
en: "Statistics of the evacuation/rebalance process"
|
||||
zh: "疏散/再平衡过程的统计"
|
||||
}
|
||||
label {
|
||||
en: "Statistics"
|
||||
zh: "统计数据"
|
||||
}
|
||||
}
|
||||
|
||||
status_stats_initial_connected {
|
||||
desc {
|
||||
en: "The number of connections on the node before the evacuation/rebalance process"
|
||||
zh: "疏散/重新平衡过程之前节点上的连接数"
|
||||
}
|
||||
label {
|
||||
en: "Initial connected"
|
||||
zh: "初始连接"
|
||||
}
|
||||
}
|
||||
|
||||
status_stats_current_connected {
|
||||
desc {
|
||||
en: "Current number of connections on the node"
|
||||
zh: "节点上的当前连接数"
|
||||
}
|
||||
label {
|
||||
en: "Current connections"
|
||||
zh: "当前连接"
|
||||
}
|
||||
}
|
||||
|
||||
status_stats_initial_sessions {
|
||||
desc {
|
||||
en: "The number of sessions on the node before the evacuation/rebalance process"
|
||||
zh: "疏散/重新平衡过程之前节点上的会话数"
|
||||
}
|
||||
label {
|
||||
en: "Initial sessions"
|
||||
zh: "初始会话"
|
||||
}
|
||||
}
|
||||
|
||||
status_stats_current_sessions {
|
||||
desc {
|
||||
en: "Current number of sessions on the node"
|
||||
zh: "节点上的当前会话数"
|
||||
}
|
||||
label {
|
||||
en: "Current sessions"
|
||||
zh: "当前会话"
|
||||
}
|
||||
}
|
||||
|
||||
status_stats_current_disconnected_sessions {
|
||||
desc {
|
||||
en: "Current number of disconnected sessions on the node"
|
||||
zh: "节点上当前断开连接的会话数"
|
||||
}
|
||||
label {
|
||||
en: "Current disconnected sessions"
|
||||
zh: "当前断开连接的会话"
|
||||
}
|
||||
}
|
||||
|
||||
coordinator_status_donors {
|
||||
desc {
|
||||
en: "List of nodes from which connections/sessions are being evacuated"
|
||||
zh: "正在疏散连接/会话的节点列表"
|
||||
}
|
||||
label {
|
||||
en: "Donors"
|
||||
zh: "捐助者"
|
||||
}
|
||||
}
|
||||
|
||||
coordinator_status_donor_conn_avg {
|
||||
desc {
|
||||
en: "Average number of connections per donor node"
|
||||
zh: "每个供体节点的平均连接数"
|
||||
}
|
||||
label {
|
||||
en: "Donor connections average"
|
||||
zh: "捐助者连接平均值"
|
||||
}
|
||||
}
|
||||
|
||||
coordinator_status_donor_sess_avg {
|
||||
desc {
|
||||
en: "Average number of sessions per donor node"
|
||||
zh: "每个供体节点的平均会话数"
|
||||
}
|
||||
label {
|
||||
en: "Donor sessions average"
|
||||
zh: "平均捐助会议"
|
||||
}
|
||||
}
|
||||
|
||||
coordinator_status_node {
|
||||
desc {
|
||||
en: "The node that is coordinating the evacuation/rebalance process"
|
||||
zh: "协调疏散/再平衡过程的节点"
|
||||
}
|
||||
label {
|
||||
en: "Coordinator node"
|
||||
zh: "协调节点"
|
||||
}
|
||||
}
|
||||
|
||||
evacuation_status_node {
|
||||
desc {
|
||||
en: "The node that is being evacuated"
|
||||
zh: "正在撤离的节点"
|
||||
}
|
||||
label {
|
||||
en: "Evacuated node"
|
||||
zh: "疏散节点"
|
||||
}
|
||||
}
|
||||
|
||||
global_status_evacuations {
|
||||
desc {
|
||||
en: "List of nodes that are being evacuated"
|
||||
zh: "正在撤离的节点列表"
|
||||
}
|
||||
label {
|
||||
en: "Evacuations"
|
||||
zh: "疏散"
|
||||
}
|
||||
}
|
||||
|
||||
global_status_rebalances {
|
||||
desc {
|
||||
en: "List of nodes that coordinate a rebalance"
|
||||
zh: "协调再平衡的节点列表"
|
||||
}
|
||||
label {
|
||||
en: "Rebalances"
|
||||
zh: "再平衡"
|
||||
}
|
||||
}
|
||||
|
||||
empty_response {
|
||||
desc {
|
||||
en: "The response is empty"
|
||||
zh: "响应为空"
|
||||
}
|
||||
label {
|
||||
en: "Empty response"
|
||||
zh: "空响应"
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,33 @@
|
|||
%%--------------------------------------------------------------------
|
||||
%% Copyright (c) 2022-2023 EMQ Technologies Co., Ltd. All Rights Reserved.
|
||||
%%
|
||||
%% Licensed under the Apache License, Version 2.0 (the "License");
|
||||
%% you may not use this file except in compliance with the License.
|
||||
%% You may obtain a copy of the License at
|
||||
%%
|
||||
%% http://www.apache.org/licenses/LICENSE-2.0
|
||||
%%
|
||||
%% Unless required by applicable law or agreed to in writing, software
|
||||
%% distributed under the License is distributed on an "AS IS" BASIS,
|
||||
%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
%% See the License for the specific language governing permissions and
|
||||
%% limitations under the License.
|
||||
%%--------------------------------------------------------------------
|
||||
|
||||
-define(DEFAULT_CONN_EVICT_RATE, 500).
|
||||
-define(DEFAULT_SESS_EVICT_RATE, 500).
|
||||
|
||||
%% sec
|
||||
-define(DEFAULT_WAIT_HEALTH_CHECK, 60).
|
||||
%% sec
|
||||
-define(DEFAULT_WAIT_TAKEOVER, 60).
|
||||
|
||||
-define(DEFAULT_ABS_CONN_THRESHOLD, 1000).
|
||||
-define(DEFAULT_ABS_SESS_THRESHOLD, 1000).
|
||||
|
||||
-define(DEFAULT_REL_CONN_THRESHOLD, 1.1).
|
||||
-define(DEFAULT_REL_SESS_THRESHOLD, 1.1).
|
||||
|
||||
-define(EVICT_INTERVAL, 1000).
|
||||
|
||||
-define(EVACUATION_FILENAME, <<".evacuation">>).
|
|
@ -0,0 +1,2 @@
|
|||
{deps, [{emqx, {path, "../../apps/emqx"}}]}.
|
||||
{project_plugins, [erlfmt]}.
|
|
@ -0,0 +1,22 @@
|
|||
{application, emqx_node_rebalance, [
|
||||
{description, "EMQX Node Rebalance"},
|
||||
{vsn, "5.0.0"},
|
||||
{registered, [
|
||||
emqx_node_rebalance_sup,
|
||||
emqx_node_rebalance,
|
||||
emqx_node_rebalance_agent,
|
||||
emqx_node_rebalance_evacuation
|
||||
]},
|
||||
{applications, [
|
||||
kernel,
|
||||
stdlib
|
||||
]},
|
||||
{mod, {emqx_node_rebalance_app, []}},
|
||||
{env, []},
|
||||
{modules, []},
|
||||
{maintainers, ["EMQX Team <contact@emqx.io>"]},
|
||||
{links, [
|
||||
{"Homepage", "https://emqx.io/"},
|
||||
{"Github", "https://github.com/emqx"}
|
||||
]}
|
||||
]}.
|
|
@ -0,0 +1,3 @@
|
|||
%% -*- mode: erlang -*-
|
||||
%% Unless you know what you are doing, DO NOT edit manually!!
|
||||
{VSN, [{<<".*">>, []}], [{<<".*">>, []}]}.
|
|
@ -0,0 +1,438 @@
|
|||
%%--------------------------------------------------------------------
|
||||
%% Copyright (c) 2022-2023 EMQ Technologies Co., Ltd. All Rights Reserved.
|
||||
%%--------------------------------------------------------------------
|
||||
|
||||
-module(emqx_node_rebalance).
|
||||
|
||||
-include("emqx_node_rebalance.hrl").
|
||||
|
||||
-include_lib("emqx/include/logger.hrl").
|
||||
-include_lib("emqx/include/types.hrl").
|
||||
-include_lib("snabbkaffe/include/snabbkaffe.hrl").
|
||||
|
||||
-export([
|
||||
start/1,
|
||||
status/0,
|
||||
status/1,
|
||||
stop/0
|
||||
]).
|
||||
|
||||
-export([start_link/0]).
|
||||
|
||||
-behaviour(gen_statem).
|
||||
|
||||
-export([
|
||||
init/1,
|
||||
callback_mode/0,
|
||||
handle_event/4,
|
||||
code_change/4
|
||||
]).
|
||||
|
||||
-export([
|
||||
is_node_available/0,
|
||||
available_nodes/1,
|
||||
connection_count/0,
|
||||
session_count/0,
|
||||
disconnected_session_count/0
|
||||
]).
|
||||
|
||||
-export_type([
|
||||
start_opts/0,
|
||||
start_error/0
|
||||
]).
|
||||
|
||||
%%--------------------------------------------------------------------
|
||||
%% APIs
|
||||
%%--------------------------------------------------------------------
|
||||
|
||||
-type start_opts() :: #{
|
||||
conn_evict_rate => pos_integer(),
|
||||
sess_evict_rate => pos_integer(),
|
||||
wait_health_check => pos_integer(),
|
||||
wait_takeover => pos_integer(),
|
||||
abs_conn_threshold => pos_integer(),
|
||||
rel_conn_threshold => number(),
|
||||
abs_sess_threshold => pos_integer(),
|
||||
rel_sess_threshold => number(),
|
||||
nodes => [node()]
|
||||
}.
|
||||
-type start_error() :: already_started | [{node(), term()}].
|
||||
|
||||
-spec start(start_opts()) -> ok_or_error(start_error()).
|
||||
start(StartOpts) ->
|
||||
Opts = maps:merge(default_opts(), StartOpts),
|
||||
gen_statem:call(?MODULE, {start, Opts}).
|
||||
|
||||
-spec stop() -> ok_or_error(not_started).
|
||||
stop() ->
|
||||
gen_statem:call(?MODULE, stop).
|
||||
|
||||
-spec status() -> disabled | {enabled, map()}.
|
||||
status() ->
|
||||
gen_statem:call(?MODULE, status).
|
||||
|
||||
-spec status(pid()) -> disabled | {enabled, map()}.
|
||||
status(Pid) ->
|
||||
gen_statem:call(Pid, status).
|
||||
|
||||
-spec start_link() -> startlink_ret().
|
||||
start_link() ->
|
||||
gen_statem:start_link({local, ?MODULE}, ?MODULE, [], []).
|
||||
|
||||
-spec available_nodes(list(node())) -> list(node()).
|
||||
available_nodes(Nodes) when is_list(Nodes) ->
|
||||
{Available, _} = emqx_node_rebalance_proto_v1:available_nodes(Nodes),
|
||||
lists:filter(fun is_atom/1, Available).
|
||||
|
||||
%%--------------------------------------------------------------------
|
||||
%% gen_statem callbacks
|
||||
%%--------------------------------------------------------------------
|
||||
|
||||
callback_mode() -> handle_event_function.
|
||||
|
||||
%% states: disabled, wait_health_check, evicting_conns, wait_takeover, evicting_sessions
|
||||
|
||||
init([]) ->
|
||||
?tp(debug, emqx_node_rebalance_started, #{}),
|
||||
{ok, disabled, #{}}.
|
||||
|
||||
%% start
|
||||
handle_event(
|
||||
{call, From},
|
||||
{start, #{wait_health_check := WaitHealthCheck} = Opts},
|
||||
disabled,
|
||||
#{} = Data
|
||||
) ->
|
||||
case enable_rebalance(Data#{opts => Opts}) of
|
||||
{ok, NewData} ->
|
||||
?SLOG(warning, #{msg => "node_rebalance_enabled", opts => Opts}),
|
||||
{next_state, wait_health_check, NewData, [
|
||||
{state_timeout, seconds(WaitHealthCheck), evict_conns},
|
||||
{reply, From, ok}
|
||||
]};
|
||||
{error, Reason} ->
|
||||
?SLOG(warning, #{
|
||||
msg => "node_rebalance_enable_failed",
|
||||
reason => Reason
|
||||
}),
|
||||
{keep_state_and_data, [{reply, From, {error, Reason}}]}
|
||||
end;
|
||||
handle_event({call, From}, {start, _Opts}, _State, #{}) ->
|
||||
{keep_state_and_data, [{reply, From, {error, already_started}}]};
|
||||
%% stop
|
||||
handle_event({call, From}, stop, disabled, #{}) ->
|
||||
{keep_state_and_data, [{reply, From, {error, not_started}}]};
|
||||
handle_event({call, From}, stop, _State, Data) ->
|
||||
ok = disable_rebalance(Data),
|
||||
?SLOG(warning, #{msg => "node_rebalance_stopped"}),
|
||||
{next_state, disabled, deinit(Data), [{reply, From, ok}]};
|
||||
%% status
|
||||
handle_event({call, From}, status, disabled, #{}) ->
|
||||
{keep_state_and_data, [{reply, From, disabled}]};
|
||||
handle_event({call, From}, status, State, Data) ->
|
||||
Stats = get_stats(State, Data),
|
||||
{keep_state_and_data, [
|
||||
{reply, From,
|
||||
{enabled, Stats#{
|
||||
state => State,
|
||||
coordinator_node => node()
|
||||
}}}
|
||||
]};
|
||||
%% conn eviction
|
||||
handle_event(
|
||||
state_timeout,
|
||||
evict_conns,
|
||||
wait_health_check,
|
||||
Data
|
||||
) ->
|
||||
?SLOG(warning, #{msg => "node_rebalance_wait_health_check_over"}),
|
||||
{next_state, evicting_conns, Data, [{state_timeout, 0, evict_conns}]};
|
||||
handle_event(
|
||||
state_timeout,
|
||||
evict_conns,
|
||||
evicting_conns,
|
||||
#{
|
||||
opts := #{
|
||||
wait_takeover := WaitTakeover,
|
||||
evict_interval := EvictInterval
|
||||
}
|
||||
} = Data
|
||||
) ->
|
||||
case evict_conns(Data) of
|
||||
ok ->
|
||||
?SLOG(warning, #{msg => "node_rebalance_evict_conns_over"}),
|
||||
{next_state, wait_takeover, Data, [
|
||||
{state_timeout, seconds(WaitTakeover), evict_sessions}
|
||||
]};
|
||||
{continue, NewData} ->
|
||||
{keep_state, NewData, [{state_timeout, EvictInterval, evict_conns}]}
|
||||
end;
|
||||
handle_event(
|
||||
state_timeout,
|
||||
evict_sessions,
|
||||
wait_takeover,
|
||||
Data
|
||||
) ->
|
||||
?SLOG(warning, #{msg => "node_rebalance_wait_takeover_over"}),
|
||||
{next_state, evicting_sessions, Data, [{state_timeout, 0, evict_sessions}]};
|
||||
handle_event(
|
||||
state_timeout,
|
||||
evict_sessions,
|
||||
evicting_sessions,
|
||||
#{opts := #{evict_interval := EvictInterval}} = Data
|
||||
) ->
|
||||
case evict_sessions(Data) of
|
||||
ok ->
|
||||
?tp(debug, emqx_node_rebalance_evict_sess_over, #{}),
|
||||
?SLOG(warning, #{msg => "node_rebalance_evict_sessions_over"}),
|
||||
ok = disable_rebalance(Data),
|
||||
?SLOG(warning, #{msg => "node_rebalance_finished_successfully"}),
|
||||
{next_state, disabled, deinit(Data)};
|
||||
{continue, NewData} ->
|
||||
{keep_state, NewData, [{state_timeout, EvictInterval, evict_sessions}]}
|
||||
end;
|
||||
handle_event({call, From}, Msg, _State, _Data) ->
|
||||
?SLOG(warning, #{msg => "node_rebalance_unknown_call", call => Msg}),
|
||||
{keep_state_and_data, [{reply, From, ignored}]};
|
||||
handle_event(info, Msg, _State, _Data) ->
|
||||
?SLOG(warning, #{msg => "node_rebalance_unknown_info", info => Msg}),
|
||||
keep_state_and_data;
|
||||
handle_event(cast, Msg, _State, _Data) ->
|
||||
?SLOG(warning, #{msg => "node_rebalance_unknown_cast", cast => Msg}),
|
||||
keep_state_and_data.
|
||||
|
||||
code_change(_Vsn, State, Data, _Extra) ->
|
||||
{ok, State, Data}.
|
||||
|
||||
%%--------------------------------------------------------------------
|
||||
%% internal funs
|
||||
%%--------------------------------------------------------------------
|
||||
|
||||
enable_rebalance(#{opts := Opts} = Data) ->
|
||||
Nodes = maps:get(nodes, Opts),
|
||||
ConnCounts = multicall(Nodes, connection_counts, []),
|
||||
SessCounts = multicall(Nodes, session_counts, []),
|
||||
{_, Counts} = lists:unzip(ConnCounts),
|
||||
Avg = avg(Counts),
|
||||
{DonorCounts, RecipientCounts} = lists:partition(
|
||||
fun({_Node, Count}) ->
|
||||
Count >= Avg
|
||||
end,
|
||||
ConnCounts
|
||||
),
|
||||
?SLOG(warning, #{
|
||||
msg => "node_rebalance_enabling",
|
||||
conn_counts => ConnCounts,
|
||||
donor_counts => DonorCounts,
|
||||
recipient_counts => RecipientCounts
|
||||
}),
|
||||
{DonorNodes, _} = lists:unzip(DonorCounts),
|
||||
{RecipientNodes, _} = lists:unzip(RecipientCounts),
|
||||
case need_rebalance(DonorNodes, RecipientNodes, ConnCounts, SessCounts, Opts) of
|
||||
false ->
|
||||
{error, nothing_to_balance};
|
||||
true ->
|
||||
_ = multicall(DonorNodes, enable_rebalance_agent, [self()]),
|
||||
{ok, Data#{
|
||||
donors => DonorNodes,
|
||||
recipients => RecipientNodes,
|
||||
initial_conn_counts => maps:from_list(ConnCounts),
|
||||
initial_sess_counts => maps:from_list(SessCounts)
|
||||
}}
|
||||
end.
|
||||
|
||||
disable_rebalance(#{donors := DonorNodes}) ->
|
||||
_ = multicall(DonorNodes, disable_rebalance_agent, [self()]),
|
||||
ok.
|
||||
|
||||
evict_conns(#{donors := DonorNodes, recipients := RecipientNodes, opts := Opts} = Data) ->
|
||||
DonorNodeCounts = multicall(DonorNodes, connection_counts, []),
|
||||
{_, DonorCounts} = lists:unzip(DonorNodeCounts),
|
||||
RecipientNodeCounts = multicall(RecipientNodes, connection_counts, []),
|
||||
{_, RecipientCounts} = lists:unzip(RecipientNodeCounts),
|
||||
|
||||
DonorAvg = avg(DonorCounts),
|
||||
RecipientAvg = avg(RecipientCounts),
|
||||
Thresholds = thresholds(conn, Opts),
|
||||
NewData = Data#{
|
||||
donor_conn_avg => DonorAvg,
|
||||
recipient_conn_avg => RecipientAvg,
|
||||
donor_conn_counts => maps:from_list(DonorNodeCounts),
|
||||
recipient_conn_counts => maps:from_list(RecipientNodeCounts)
|
||||
},
|
||||
case within_thresholds(DonorAvg, RecipientAvg, Thresholds) of
|
||||
true ->
|
||||
ok;
|
||||
false ->
|
||||
ConnEvictRate = maps:get(conn_evict_rate, Opts),
|
||||
NodesToEvict = nodes_to_evict(RecipientAvg, DonorNodeCounts),
|
||||
?SLOG(warning, #{
|
||||
msg => "node_rebalance_evict_conns",
|
||||
nodes => NodesToEvict,
|
||||
counts => ConnEvictRate
|
||||
}),
|
||||
_ = multicall(NodesToEvict, evict_connections, [ConnEvictRate]),
|
||||
{continue, NewData}
|
||||
end.
|
||||
|
||||
evict_sessions(#{donors := DonorNodes, recipients := RecipientNodes, opts := Opts} = Data) ->
|
||||
DonorNodeCounts = multicall(DonorNodes, disconnected_session_counts, []),
|
||||
{_, DonorCounts} = lists:unzip(DonorNodeCounts),
|
||||
RecipientNodeCounts = multicall(RecipientNodes, disconnected_session_counts, []),
|
||||
{_, RecipientCounts} = lists:unzip(RecipientNodeCounts),
|
||||
|
||||
DonorAvg = avg(DonorCounts),
|
||||
RecipientAvg = avg(RecipientCounts),
|
||||
Thresholds = thresholds(sess, Opts),
|
||||
NewData = Data#{
|
||||
donor_sess_avg => DonorAvg,
|
||||
recipient_sess_avg => RecipientAvg,
|
||||
donor_sess_counts => maps:from_list(DonorNodeCounts),
|
||||
recipient_sess_counts => maps:from_list(RecipientNodeCounts)
|
||||
},
|
||||
case within_thresholds(DonorAvg, RecipientAvg, Thresholds) of
|
||||
true ->
|
||||
ok;
|
||||
false ->
|
||||
SessEvictRate = maps:get(sess_evict_rate, Opts),
|
||||
NodesToEvict = nodes_to_evict(RecipientAvg, DonorNodeCounts),
|
||||
?SLOG(warning, #{
|
||||
msg => "node_rebalance_evict_sessions",
|
||||
nodes => NodesToEvict,
|
||||
counts => SessEvictRate
|
||||
}),
|
||||
_ = multicall(
|
||||
NodesToEvict,
|
||||
evict_sessions,
|
||||
[SessEvictRate, RecipientNodes, disconnected]
|
||||
),
|
||||
{continue, NewData}
|
||||
end.
|
||||
|
||||
need_rebalance([] = _DonorNodes, _RecipientNodes, _ConnCounts, _SessCounts, _Opts) ->
|
||||
false;
|
||||
need_rebalance(_DonorNodes, [] = _RecipientNodes, _ConnCounts, _SessCounts, _Opts) ->
|
||||
false;
|
||||
need_rebalance(DonorNodes, RecipientNodes, ConnCounts, SessCounts, Opts) ->
|
||||
DonorConnAvg = avg_for_nodes(DonorNodes, ConnCounts),
|
||||
RecipientConnAvg = avg_for_nodes(RecipientNodes, ConnCounts),
|
||||
DonorSessAvg = avg_for_nodes(DonorNodes, SessCounts),
|
||||
RecipientSessAvg = avg_for_nodes(RecipientNodes, SessCounts),
|
||||
Result =
|
||||
(not within_thresholds(DonorConnAvg, RecipientConnAvg, thresholds(conn, Opts))) orelse
|
||||
(not within_thresholds(DonorSessAvg, RecipientSessAvg, thresholds(sess, Opts))),
|
||||
?tp(
|
||||
debug,
|
||||
emqx_node_rebalance_need_rebalance,
|
||||
#{
|
||||
donors => DonorNodes,
|
||||
recipients => RecipientNodes,
|
||||
conn_counts => ConnCounts,
|
||||
sess_counts => SessCounts,
|
||||
opts => Opts,
|
||||
result => Result
|
||||
}
|
||||
),
|
||||
Result.
|
||||
|
||||
avg_for_nodes(Nodes, Counts) ->
|
||||
avg(maps:values(maps:with(Nodes, maps:from_list(Counts)))).
|
||||
|
||||
within_thresholds(Value, GoalValue, {AbsThres, RelThres}) ->
|
||||
(Value =< GoalValue + AbsThres) orelse (Value =< GoalValue * RelThres).
|
||||
|
||||
thresholds(conn, #{abs_conn_threshold := Abs, rel_conn_threshold := Rel}) ->
|
||||
{Abs, Rel};
|
||||
thresholds(sess, #{abs_sess_threshold := Abs, rel_sess_threshold := Rel}) ->
|
||||
{Abs, Rel}.
|
||||
|
||||
nodes_to_evict(Goal, NodeCounts) ->
|
||||
{Nodes, _} = lists:unzip(
|
||||
lists:filter(
|
||||
fun({_Node, Count}) ->
|
||||
Count > Goal
|
||||
end,
|
||||
NodeCounts
|
||||
)
|
||||
),
|
||||
Nodes.
|
||||
|
||||
get_stats(disabled, _Data) -> #{};
|
||||
get_stats(_State, Data) -> Data.
|
||||
|
||||
avg(List) when length(List) >= 1 ->
|
||||
lists:sum(List) / length(List).
|
||||
|
||||
multicall(Nodes, F, A) ->
|
||||
case apply(emqx_node_rebalance_proto_v1, F, [Nodes | A]) of
|
||||
{Results, []} ->
|
||||
case lists:partition(fun is_ok/1, lists:zip(Nodes, Results)) of
|
||||
{OkResults, []} ->
|
||||
[{Node, ok_result(Result)} || {Node, Result} <- OkResults];
|
||||
{_, BadResults} ->
|
||||
error({bad_nodes, BadResults})
|
||||
end;
|
||||
{_, [_BadNode | _] = BadNodes} ->
|
||||
error({bad_nodes, BadNodes})
|
||||
end.
|
||||
|
||||
is_ok({_Node, {ok, _}}) -> true;
|
||||
is_ok({_Node, ok}) -> true;
|
||||
is_ok(_) -> false.
|
||||
|
||||
ok_result({ok, Result}) -> Result;
|
||||
ok_result(ok) -> ok.
|
||||
|
||||
connection_count() ->
|
||||
{ok, emqx_eviction_agent:connection_count()}.
|
||||
|
||||
session_count() ->
|
||||
{ok, emqx_eviction_agent:session_count()}.
|
||||
|
||||
disconnected_session_count() ->
|
||||
{ok, emqx_eviction_agent:session_count(disconnected)}.
|
||||
|
||||
default_opts() ->
|
||||
#{
|
||||
conn_evict_rate => ?DEFAULT_CONN_EVICT_RATE,
|
||||
abs_conn_threshold => ?DEFAULT_ABS_CONN_THRESHOLD,
|
||||
rel_conn_threshold => ?DEFAULT_REL_CONN_THRESHOLD,
|
||||
|
||||
sess_evict_rate => ?DEFAULT_SESS_EVICT_RATE,
|
||||
abs_sess_threshold => ?DEFAULT_ABS_SESS_THRESHOLD,
|
||||
rel_sess_threshold => ?DEFAULT_REL_SESS_THRESHOLD,
|
||||
|
||||
wait_health_check => ?DEFAULT_WAIT_HEALTH_CHECK,
|
||||
wait_takeover => ?DEFAULT_WAIT_TAKEOVER,
|
||||
|
||||
evict_interval => ?EVICT_INTERVAL,
|
||||
|
||||
nodes => all_nodes()
|
||||
}.
|
||||
|
||||
deinit(Data) ->
|
||||
Keys = [
|
||||
recipient_conn_avg,
|
||||
recipient_sess_avg,
|
||||
donor_conn_avg,
|
||||
donor_sess_avg,
|
||||
recipient_conn_counts,
|
||||
recipient_sess_counts,
|
||||
donor_conn_counts,
|
||||
donor_sess_counts,
|
||||
initial_conn_counts,
|
||||
initial_sess_counts,
|
||||
opts
|
||||
],
|
||||
maps:without(Keys, Data).
|
||||
|
||||
is_node_available() ->
|
||||
true = is_pid(whereis(emqx_node_rebalance_agent)),
|
||||
disabled = emqx_eviction_agent:status(),
|
||||
node().
|
||||
|
||||
all_nodes() ->
|
||||
mria_mnesia:running_nodes().
|
||||
|
||||
seconds(Sec) ->
|
||||
round(timer:seconds(Sec)).
|
|
@ -0,0 +1,131 @@
|
|||
%%--------------------------------------------------------------------
|
||||
%% Copyright (c) 2022-2023 EMQ Technologies Co., Ltd. All Rights Reserved.
|
||||
%%--------------------------------------------------------------------
|
||||
|
||||
-module(emqx_node_rebalance_agent).
|
||||
|
||||
-include_lib("emqx/include/emqx_mqtt.hrl").
|
||||
-include_lib("emqx/include/logger.hrl").
|
||||
-include_lib("emqx/include/types.hrl").
|
||||
|
||||
-include_lib("stdlib/include/qlc.hrl").
|
||||
-include_lib("snabbkaffe/include/snabbkaffe.hrl").
|
||||
|
||||
-export([
|
||||
start_link/0,
|
||||
enable/1,
|
||||
disable/1,
|
||||
status/0
|
||||
]).
|
||||
|
||||
-export([
|
||||
init/1,
|
||||
handle_call/3,
|
||||
handle_info/2,
|
||||
handle_cast/2,
|
||||
code_change/3
|
||||
]).
|
||||
|
||||
-define(ENABLE_KIND, emqx_node_rebalance).
|
||||
|
||||
%%--------------------------------------------------------------------
|
||||
%% APIs
|
||||
%%--------------------------------------------------------------------
|
||||
|
||||
-type status() :: {enabled, pid()} | disabled.
|
||||
|
||||
-spec start_link() -> startlink_ret().
|
||||
start_link() ->
|
||||
gen_server:start_link({local, ?MODULE}, ?MODULE, [], []).
|
||||
|
||||
-spec enable(pid()) -> ok_or_error(already_enabled | eviction_agent_busy).
|
||||
enable(CoordinatorPid) ->
|
||||
gen_server:call(?MODULE, {enable, CoordinatorPid}).
|
||||
|
||||
-spec disable(pid()) -> ok_or_error(already_disabled | invalid_coordinator).
|
||||
disable(CoordinatorPid) ->
|
||||
gen_server:call(?MODULE, {disable, CoordinatorPid}).
|
||||
|
||||
-spec status() -> status().
|
||||
status() ->
|
||||
gen_server:call(?MODULE, status).
|
||||
|
||||
%%--------------------------------------------------------------------
|
||||
%% gen_server callbacks
|
||||
%%--------------------------------------------------------------------
|
||||
|
||||
init([]) ->
|
||||
{ok, #{}}.
|
||||
|
||||
handle_call({enable, CoordinatorPid}, _From, St) ->
|
||||
case St of
|
||||
#{coordinator_pid := _Pid} ->
|
||||
{reply, {error, already_enabled}, St};
|
||||
_ ->
|
||||
true = link(CoordinatorPid),
|
||||
EvictionAgentPid = whereis(emqx_eviction_agent),
|
||||
true = link(EvictionAgentPid),
|
||||
case emqx_eviction_agent:enable(?ENABLE_KIND, undefined) of
|
||||
ok ->
|
||||
{reply, ok, #{
|
||||
coordinator_pid => CoordinatorPid,
|
||||
eviction_agent_pid => EvictionAgentPid
|
||||
}};
|
||||
{error, eviction_agent_busy} ->
|
||||
true = unlink(EvictionAgentPid),
|
||||
true = unlink(CoordinatorPid),
|
||||
{reply, {error, eviction_agent_busy}, St}
|
||||
end
|
||||
end;
|
||||
handle_call({disable, CoordinatorPid}, _From, St) ->
|
||||
case St of
|
||||
#{
|
||||
coordinator_pid := CoordinatorPid,
|
||||
eviction_agent_pid := EvictionAgentPid
|
||||
} ->
|
||||
_ = emqx_eviction_agent:disable(?ENABLE_KIND),
|
||||
true = unlink(EvictionAgentPid),
|
||||
true = unlink(CoordinatorPid),
|
||||
NewSt = maps:without(
|
||||
[coordinator_pid, eviction_agent_pid],
|
||||
St
|
||||
),
|
||||
{reply, ok, NewSt};
|
||||
#{coordinator_pid := _CoordinatorPid} ->
|
||||
{reply, {error, invalid_coordinator}, St};
|
||||
#{} ->
|
||||
{reply, {error, already_disabled}, St}
|
||||
end;
|
||||
handle_call(status, _From, St) ->
|
||||
case St of
|
||||
#{coordinator_pid := Pid} ->
|
||||
{reply, {enabled, Pid}, St};
|
||||
_ ->
|
||||
{reply, disabled, St}
|
||||
end;
|
||||
handle_call(Msg, _From, St) ->
|
||||
?SLOG(warning, #{
|
||||
msg => "unknown_call",
|
||||
call => Msg,
|
||||
state => St
|
||||
}),
|
||||
{reply, ignored, St}.
|
||||
|
||||
handle_info(Msg, St) ->
|
||||
?SLOG(warning, #{
|
||||
msg => "unknown_info",
|
||||
info => Msg,
|
||||
state => St
|
||||
}),
|
||||
{noreply, St}.
|
||||
|
||||
handle_cast(Msg, St) ->
|
||||
?SLOG(warning, #{
|
||||
msg => "unknown_cast",
|
||||
cast => Msg,
|
||||
state => St
|
||||
}),
|
||||
{noreply, St}.
|
||||
|
||||
code_change(_Vsn, State, _Extra) ->
|
||||
{ok, State}.
|
|
@ -0,0 +1,738 @@
|
|||
%%--------------------------------------------------------------------
|
||||
%% Copyright (c) 2022-2023 EMQ Technologies Co., Ltd. All Rights Reserved.
|
||||
%%--------------------------------------------------------------------
|
||||
-module(emqx_node_rebalance_api).
|
||||
|
||||
-behaviour(minirest_api).
|
||||
|
||||
-include_lib("typerefl/include/types.hrl").
|
||||
-include_lib("hocon/include/hoconsc.hrl").
|
||||
-include_lib("emqx/include/logger.hrl").
|
||||
|
||||
%% Swagger specs from hocon schema
|
||||
-export([
|
||||
api_spec/0,
|
||||
paths/0,
|
||||
schema/1,
|
||||
namespace/0
|
||||
]).
|
||||
|
||||
-export([
|
||||
fields/1,
|
||||
roots/0
|
||||
]).
|
||||
|
||||
%% API callbacks
|
||||
-export([
|
||||
'/load_rebalance/status'/2,
|
||||
'/load_rebalance/global_status'/2,
|
||||
'/load_rebalance/availability_check'/2,
|
||||
'/load_rebalance/:node/start'/2,
|
||||
'/load_rebalance/:node/stop'/2,
|
||||
'/load_rebalance/:node/evacuation/start'/2,
|
||||
'/load_rebalance/:node/evacuation/stop'/2
|
||||
]).
|
||||
|
||||
%% Schema examples
|
||||
-export([
|
||||
rebalance_example/0,
|
||||
rebalance_evacuation_example/0,
|
||||
translate/2
|
||||
]).
|
||||
|
||||
-import(hoconsc, [mk/2, ref/1, ref/2]).
|
||||
-import(emqx_dashboard_swagger, [error_codes/2]).
|
||||
|
||||
-define(BAD_REQUEST, 'BAD_REQUEST').
|
||||
-define(NODE_UNAVAILABLE, 'NODE_UNAVAILABLE').
|
||||
-define(NODE_EVACUATING, 'NODE_EVACUATING').
|
||||
-define(RPC_ERROR, 'RPC_ERROR').
|
||||
|
||||
%%--------------------------------------------------------------------
|
||||
%% API Spec
|
||||
%%--------------------------------------------------------------------
|
||||
|
||||
namespace() -> "load_rebalance".
|
||||
|
||||
api_spec() ->
|
||||
emqx_dashboard_swagger:spec(?MODULE, #{check_schema => true}).
|
||||
|
||||
paths() ->
|
||||
[
|
||||
"/load_rebalance/status",
|
||||
"/load_rebalance/global_status",
|
||||
"/load_rebalance/availability_check",
|
||||
"/load_rebalance/:node/start",
|
||||
"/load_rebalance/:node/stop",
|
||||
"/load_rebalance/:node/evacuation/start",
|
||||
"/load_rebalance/:node/evacuation/stop"
|
||||
].
|
||||
|
||||
schema("/load_rebalance/status") ->
|
||||
#{
|
||||
'operationId' => '/load_rebalance/status',
|
||||
get => #{
|
||||
tags => [<<"load_rebalance">>],
|
||||
summary => <<"Get rebalance status">>,
|
||||
description => ?DESC("load_rebalance_status"),
|
||||
responses => #{
|
||||
200 => local_status_response_schema()
|
||||
}
|
||||
}
|
||||
};
|
||||
schema("/load_rebalance/global_status") ->
|
||||
#{
|
||||
'operationId' => '/load_rebalance/global_status',
|
||||
get => #{
|
||||
tags => [<<"load_rebalance">>],
|
||||
summary => <<"Get global rebalance status">>,
|
||||
description => ?DESC("load_rebalance_global_status"),
|
||||
responses => #{
|
||||
200 => response_schema()
|
||||
}
|
||||
}
|
||||
};
|
||||
schema("/load_rebalance/availability_check") ->
|
||||
#{
|
||||
'operationId' => '/load_rebalance/availability_check',
|
||||
get => #{
|
||||
tags => [<<"load_rebalance">>],
|
||||
summary => <<"Node rebalance availability check">>,
|
||||
description => ?DESC("load_rebalance_availability_check"),
|
||||
responses => #{
|
||||
200 => response_schema(),
|
||||
503 => error_codes([?NODE_EVACUATING], <<"Node Evacuating">>)
|
||||
}
|
||||
}
|
||||
};
|
||||
schema("/load_rebalance/:node/start") ->
|
||||
#{
|
||||
'operationId' => '/load_rebalance/:node/start',
|
||||
post => #{
|
||||
tags => [<<"load_rebalance">>],
|
||||
summary => <<"Start rebalancing with the node as coordinator">>,
|
||||
description => ?DESC("load_rebalance_start"),
|
||||
parameters => [param_node()],
|
||||
'requestBody' =>
|
||||
emqx_dashboard_swagger:schema_with_examples(
|
||||
ref(rebalance_start),
|
||||
rebalance_example()
|
||||
),
|
||||
responses => #{
|
||||
200 => response_schema(),
|
||||
400 => error_codes([?BAD_REQUEST, ?NODE_UNAVAILABLE], <<"Bad Request">>)
|
||||
}
|
||||
}
|
||||
};
|
||||
schema("/load_rebalance/:node/stop") ->
|
||||
#{
|
||||
'operationId' => '/load_rebalance/:node/stop',
|
||||
post => #{
|
||||
tags => [<<"load_rebalance">>],
|
||||
summary => <<"Stop rebalancing coordinated by the node">>,
|
||||
description => ?DESC("load_rebalance_stop"),
|
||||
parameters => [param_node()],
|
||||
responses => #{
|
||||
200 => response_schema(),
|
||||
400 => error_codes([?BAD_REQUEST, ?NODE_UNAVAILABLE], <<"Bad Request">>)
|
||||
}
|
||||
}
|
||||
};
|
||||
schema("/load_rebalance/:node/evacuation/start") ->
|
||||
#{
|
||||
'operationId' => '/load_rebalance/:node/evacuation/start',
|
||||
post => #{
|
||||
tags => [<<"load_rebalance">>],
|
||||
summary => <<"Start evacuation on a node">>,
|
||||
description => ?DESC("load_rebalance_evacuation_start"),
|
||||
parameters => [param_node()],
|
||||
'requestBody' =>
|
||||
emqx_dashboard_swagger:schema_with_examples(
|
||||
ref(rebalance_evacuation_start),
|
||||
rebalance_evacuation_example()
|
||||
),
|
||||
responses => #{
|
||||
200 => response_schema(),
|
||||
400 => error_codes([?BAD_REQUEST, ?NODE_UNAVAILABLE], <<"Bad Request">>)
|
||||
}
|
||||
}
|
||||
};
|
||||
schema("/load_rebalance/:node/evacuation/stop") ->
|
||||
#{
|
||||
'operationId' => '/load_rebalance/:node/evacuation/stop',
|
||||
post => #{
|
||||
tags => [<<"load_rebalance">>],
|
||||
summary => <<"Stop evacuation on a node">>,
|
||||
description => ?DESC("load_rebalance_evacuation_stop"),
|
||||
parameters => [param_node()],
|
||||
responses => #{
|
||||
200 => response_schema(),
|
||||
400 => error_codes([?BAD_REQUEST, ?NODE_UNAVAILABLE], <<"Bad Request">>)
|
||||
}
|
||||
}
|
||||
}.
|
||||
|
||||
%%--------------------------------------------------------------------
|
||||
%% Handlers
|
||||
%%--------------------------------------------------------------------
|
||||
|
||||
'/load_rebalance/status'(get, #{}) ->
|
||||
case emqx_node_rebalance_status:local_status() of
|
||||
disabled ->
|
||||
{200, #{status => disabled}};
|
||||
{rebalance, Stats} ->
|
||||
{200, format_status(rebalance, Stats)};
|
||||
{evacuation, Stats} ->
|
||||
{200, format_status(evacuation, Stats)}
|
||||
end.
|
||||
|
||||
'/load_rebalance/global_status'(get, #{}) ->
|
||||
#{
|
||||
evacuations := Evacuations,
|
||||
rebalances := Rebalances
|
||||
} = emqx_node_rebalance_status:global_status(),
|
||||
{200, #{
|
||||
evacuations => format_as_map_list(Evacuations),
|
||||
rebalances => format_as_map_list(Rebalances)
|
||||
}}.
|
||||
|
||||
'/load_rebalance/availability_check'(get, #{}) ->
|
||||
case emqx_eviction_agent:status() of
|
||||
disabled ->
|
||||
{200, #{}};
|
||||
{enabled, _Stats} ->
|
||||
error_response(503, ?NODE_EVACUATING, <<"Node Evacuating">>)
|
||||
end.
|
||||
|
||||
'/load_rebalance/:node/start'(post, #{bindings := #{node := NodeBin}, body := Params0}) ->
|
||||
with_node(NodeBin, fun(Node) ->
|
||||
Params1 = translate(rebalance_start, Params0),
|
||||
with_nodes_at_key(nodes, Params1, fun(Params2) ->
|
||||
wrap_rpc(
|
||||
Node, emqx_node_rebalance_api_proto_v1:node_rebalance_start(Node, Params2)
|
||||
)
|
||||
end)
|
||||
end).
|
||||
|
||||
'/load_rebalance/:node/stop'(post, #{bindings := #{node := NodeBin}}) ->
|
||||
with_node(NodeBin, fun(Node) ->
|
||||
wrap_rpc(
|
||||
Node, emqx_node_rebalance_api_proto_v1:node_rebalance_stop(Node)
|
||||
)
|
||||
end).
|
||||
|
||||
'/load_rebalance/:node/evacuation/start'(post, #{
|
||||
bindings := #{node := NodeBin}, body := Params0
|
||||
}) ->
|
||||
with_node(NodeBin, fun(Node) ->
|
||||
Params1 = translate(rebalance_evacuation_start, Params0),
|
||||
with_nodes_at_key(migrate_to, Params1, fun(Params2) ->
|
||||
wrap_rpc(
|
||||
Node,
|
||||
emqx_node_rebalance_api_proto_v1:node_rebalance_evacuation_start(
|
||||
Node, Params2
|
||||
)
|
||||
)
|
||||
end)
|
||||
end).
|
||||
|
||||
'/load_rebalance/:node/evacuation/stop'(post, #{bindings := #{node := NodeBin}}) ->
|
||||
with_node(NodeBin, fun(Node) ->
|
||||
wrap_rpc(
|
||||
Node, emqx_node_rebalance_api_proto_v1:node_rebalance_evacuation_stop(Node)
|
||||
)
|
||||
end).
|
||||
|
||||
%%--------------------------------------------------------------------
|
||||
%% Helpers
|
||||
%%--------------------------------------------------------------------
|
||||
|
||||
wrap_rpc(Node, RPCResult) ->
|
||||
case RPCResult of
|
||||
ok ->
|
||||
{200, #{}};
|
||||
{error, Reason} ->
|
||||
error_response(
|
||||
400, ?BAD_REQUEST, io_lib:format("error on node ~p: ~p", [Node, Reason])
|
||||
);
|
||||
{badrpc, Reason} ->
|
||||
error_response(
|
||||
503, ?RPC_ERROR, io_lib:format("RPC error on node ~p: ~p", [Node, Reason])
|
||||
)
|
||||
end.
|
||||
|
||||
format_status(Process, Stats) ->
|
||||
Stats#{process => Process, status => enabled}.
|
||||
|
||||
validate_nodes(Key, Params) when is_map_key(Key, Params) ->
|
||||
BinNodes = maps:get(Key, Params),
|
||||
{ValidNodes, InvalidNodes} = lists:foldl(
|
||||
fun(BinNode, {Nodes, UnknownNodes}) ->
|
||||
case parse_node(BinNode) of
|
||||
{ok, Node} -> {[Node | Nodes], UnknownNodes};
|
||||
{error, _} -> {Nodes, [BinNode | UnknownNodes]}
|
||||
end
|
||||
end,
|
||||
{[], []},
|
||||
BinNodes
|
||||
),
|
||||
case InvalidNodes of
|
||||
[] ->
|
||||
case emqx_node_rebalance_evacuation:available_nodes(ValidNodes) of
|
||||
ValidNodes -> {ok, Params#{Key => ValidNodes}};
|
||||
OtherNodes -> {error, {unavailable, ValidNodes -- OtherNodes}}
|
||||
end;
|
||||
_ ->
|
||||
{error, {invalid, InvalidNodes}}
|
||||
end;
|
||||
validate_nodes(_Key, Params) ->
|
||||
{ok, Params}.
|
||||
|
||||
with_node(BinNode, Fun) ->
|
||||
case parse_node(BinNode) of
|
||||
{ok, Node} -> Fun(Node);
|
||||
{error, _} -> error_response(400, ?BAD_REQUEST, [<<"Invalid node: ">>, BinNode])
|
||||
end.
|
||||
|
||||
with_nodes_at_key(Key, Params, Fun) ->
|
||||
Res = validate_nodes(Key, Params),
|
||||
case Res of
|
||||
{ok, Params1} ->
|
||||
Fun(Params1);
|
||||
{error, {unavailable, Nodes}} ->
|
||||
error_response(400, ?NODE_UNAVAILABLE, io_lib:format("Nodes unavailable: ~p", [Nodes]));
|
||||
{error, {invalid, Nodes}} ->
|
||||
error_response(400, ?BAD_REQUEST, io_lib:format("Invalid nodes: ~p", [Nodes]))
|
||||
end.
|
||||
|
||||
parse_node(Bin) when is_binary(Bin) ->
|
||||
try
|
||||
{ok, binary_to_existing_atom(Bin)}
|
||||
catch
|
||||
error:badarg ->
|
||||
{error, {unknown, Bin}}
|
||||
end.
|
||||
|
||||
format_as_map_list(List) ->
|
||||
lists:map(
|
||||
fun({Node, Info}) ->
|
||||
Info#{node => Node}
|
||||
end,
|
||||
List
|
||||
).
|
||||
|
||||
error_response(HttpCode, Code, Message) ->
|
||||
{HttpCode, #{
|
||||
code => atom_to_binary(Code),
|
||||
message => iolist_to_binary(Message)
|
||||
}}.
|
||||
|
||||
without(Keys, Props) ->
|
||||
lists:filter(
|
||||
fun({Key, _}) ->
|
||||
not lists:member(Key, Keys)
|
||||
end,
|
||||
Props
|
||||
).
|
||||
|
||||
%%------------------------------------------------------------------------------
|
||||
%% Schema
|
||||
%%------------------------------------------------------------------------------
|
||||
|
||||
translate(Ref, Conf) ->
|
||||
Options = #{atom_key => true},
|
||||
#{Ref := TranslatedConf} = hocon_tconf:check_plain(
|
||||
?MODULE, #{atom_to_binary(Ref) => Conf}, Options, [Ref]
|
||||
),
|
||||
TranslatedConf.
|
||||
|
||||
param_node() ->
|
||||
{
|
||||
node,
|
||||
mk(binary(), #{
|
||||
in => path,
|
||||
desc => ?DESC(param_node),
|
||||
required => true
|
||||
})
|
||||
}.
|
||||
|
||||
fields(rebalance_start) ->
|
||||
[
|
||||
{"wait_health_check",
|
||||
mk(
|
||||
emqx_schema:duration_s(),
|
||||
#{
|
||||
desc => ?DESC(wait_health_check),
|
||||
required => false
|
||||
}
|
||||
)},
|
||||
{"conn_evict_rate",
|
||||
mk(
|
||||
pos_integer(),
|
||||
#{
|
||||
desc => ?DESC(conn_evict_rate),
|
||||
required => false
|
||||
}
|
||||
)},
|
||||
{"sess_evict_rate",
|
||||
mk(
|
||||
pos_integer(),
|
||||
#{
|
||||
desc => ?DESC(sess_evict_rate),
|
||||
required => false
|
||||
}
|
||||
)},
|
||||
{"abs_conn_threshold",
|
||||
mk(
|
||||
pos_integer(),
|
||||
#{
|
||||
desc => ?DESC(abs_conn_threshold),
|
||||
required => false
|
||||
}
|
||||
)},
|
||||
{"rel_conn_threshold",
|
||||
mk(
|
||||
number(),
|
||||
#{
|
||||
desc => ?DESC(rel_conn_threshold),
|
||||
required => false,
|
||||
validator => [fun(Value) -> Value > 1.0 end]
|
||||
}
|
||||
)},
|
||||
{"abs_sess_threshold",
|
||||
mk(
|
||||
pos_integer(),
|
||||
#{
|
||||
desc => ?DESC(abs_sess_threshold),
|
||||
required => false
|
||||
}
|
||||
)},
|
||||
{"rel_sess_threshold",
|
||||
mk(
|
||||
number(),
|
||||
#{
|
||||
desc => ?DESC(rel_sess_threshold),
|
||||
required => false,
|
||||
validator => [fun(Value) -> Value > 1.0 end]
|
||||
}
|
||||
)},
|
||||
{"wait_takeover",
|
||||
mk(
|
||||
emqx_schema:duration_s(),
|
||||
#{
|
||||
desc => ?DESC(wait_takeover),
|
||||
required => false
|
||||
}
|
||||
)},
|
||||
{"nodes",
|
||||
mk(
|
||||
list(binary()),
|
||||
#{
|
||||
desc => ?DESC(rebalance_nodes),
|
||||
required => false,
|
||||
validator => [fun(Values) -> length(Values) > 0 end]
|
||||
}
|
||||
)}
|
||||
];
|
||||
fields(rebalance_evacuation_start) ->
|
||||
[
|
||||
{"conn_evict_rate",
|
||||
mk(
|
||||
pos_integer(),
|
||||
#{
|
||||
desc => ?DESC(conn_evict_rate),
|
||||
required => false
|
||||
}
|
||||
)},
|
||||
{"sess_evict_rate",
|
||||
mk(
|
||||
pos_integer(),
|
||||
#{
|
||||
desc => ?DESC(sess_evict_rate),
|
||||
required => false
|
||||
}
|
||||
)},
|
||||
{"redirect_to",
|
||||
mk(
|
||||
binary(),
|
||||
#{
|
||||
desc => ?DESC(redirect_to),
|
||||
required => false
|
||||
}
|
||||
)},
|
||||
{"wait_takeover",
|
||||
mk(
|
||||
pos_integer(),
|
||||
#{
|
||||
desc => ?DESC(wait_takeover),
|
||||
required => false
|
||||
}
|
||||
)},
|
||||
{"migrate_to",
|
||||
mk(
|
||||
list(binary()),
|
||||
#{
|
||||
desc => ?DESC(migrate_to),
|
||||
required => false,
|
||||
validator => [fun(Values) -> length(Values) > 0 end]
|
||||
}
|
||||
)}
|
||||
];
|
||||
fields(local_status_disabled) ->
|
||||
[
|
||||
{"status",
|
||||
mk(
|
||||
disabled,
|
||||
#{
|
||||
desc => ?DESC(local_status_enabled),
|
||||
required => true
|
||||
}
|
||||
)}
|
||||
];
|
||||
fields(local_status_enabled) ->
|
||||
[
|
||||
{"status",
|
||||
mk(
|
||||
enabled,
|
||||
#{
|
||||
desc => ?DESC(local_status_enabled),
|
||||
required => true
|
||||
}
|
||||
)},
|
||||
{"process",
|
||||
mk(
|
||||
hoconsc:union([rebalance, evacuation]),
|
||||
#{
|
||||
desc => ?DESC(local_status_process),
|
||||
required => true
|
||||
}
|
||||
)},
|
||||
{"state",
|
||||
mk(
|
||||
atom(),
|
||||
#{
|
||||
desc => ?DESC(local_status_state),
|
||||
required => true
|
||||
}
|
||||
)},
|
||||
{"coordinator_node",
|
||||
mk(
|
||||
binary(),
|
||||
#{
|
||||
desc => ?DESC(local_status_coordinator_node),
|
||||
required => false
|
||||
}
|
||||
)},
|
||||
{"connection_eviction_rate",
|
||||
mk(
|
||||
pos_integer(),
|
||||
#{
|
||||
desc => ?DESC(local_status_connection_eviction_rate),
|
||||
required => false
|
||||
}
|
||||
)},
|
||||
{"session_eviction_rate",
|
||||
mk(
|
||||
pos_integer(),
|
||||
#{
|
||||
desc => ?DESC(local_status_session_eviction_rate),
|
||||
required => false
|
||||
}
|
||||
)},
|
||||
{"connection_goal",
|
||||
mk(
|
||||
non_neg_integer(),
|
||||
#{
|
||||
desc => ?DESC(local_status_connection_goal),
|
||||
required => false
|
||||
}
|
||||
)},
|
||||
{"session_goal",
|
||||
mk(
|
||||
non_neg_integer(),
|
||||
#{
|
||||
desc => ?DESC(local_status_session_goal),
|
||||
required => false
|
||||
}
|
||||
)},
|
||||
{"disconnected_session_goal",
|
||||
mk(
|
||||
non_neg_integer(),
|
||||
#{
|
||||
desc => ?DESC(local_status_disconnected_session_goal),
|
||||
required => false
|
||||
}
|
||||
)},
|
||||
{"session_recipients",
|
||||
mk(
|
||||
list(binary()),
|
||||
#{
|
||||
desc => ?DESC(local_status_session_recipients),
|
||||
required => false
|
||||
}
|
||||
)},
|
||||
{"recipients",
|
||||
mk(
|
||||
list(binary()),
|
||||
#{
|
||||
desc => ?DESC(local_status_recipients),
|
||||
required => false
|
||||
}
|
||||
)},
|
||||
{"stats",
|
||||
mk(
|
||||
ref(status_stats),
|
||||
#{
|
||||
desc => ?DESC(local_status_stats),
|
||||
required => false
|
||||
}
|
||||
)}
|
||||
];
|
||||
fields(status_stats) ->
|
||||
[
|
||||
{"initial_connected",
|
||||
mk(
|
||||
non_neg_integer(),
|
||||
#{
|
||||
desc => ?DESC(status_stats_initial_connected),
|
||||
required => true
|
||||
}
|
||||
)},
|
||||
{"current_connected",
|
||||
mk(
|
||||
non_neg_integer(),
|
||||
#{
|
||||
desc => ?DESC(status_stats_current_connected),
|
||||
required => true
|
||||
}
|
||||
)},
|
||||
{"initial_sessions",
|
||||
mk(
|
||||
non_neg_integer(),
|
||||
#{
|
||||
desc => ?DESC(status_stats_initial_sessions),
|
||||
required => true
|
||||
}
|
||||
)},
|
||||
{"current_sessions",
|
||||
mk(
|
||||
non_neg_integer(),
|
||||
#{
|
||||
desc => ?DESC(status_stats_current_sessions),
|
||||
required => true
|
||||
}
|
||||
)},
|
||||
{"current_disconnected_sessions",
|
||||
mk(
|
||||
non_neg_integer(),
|
||||
#{
|
||||
desc => ?DESC(status_stats_current_disconnected_sessions),
|
||||
required => false
|
||||
}
|
||||
)}
|
||||
];
|
||||
fields(global_coordinator_status) ->
|
||||
without(
|
||||
["status", "process", "session_goal", "session_recipients", "stats"],
|
||||
fields(local_status_enabled)
|
||||
) ++
|
||||
[
|
||||
{"donors",
|
||||
mk(
|
||||
list(binary()),
|
||||
#{
|
||||
desc => ?DESC(coordinator_status_donors),
|
||||
required => false
|
||||
}
|
||||
)},
|
||||
{"donor_conn_avg",
|
||||
mk(
|
||||
non_neg_integer(),
|
||||
#{
|
||||
desc => ?DESC(coordinator_status_donor_conn_avg),
|
||||
required => false
|
||||
}
|
||||
)},
|
||||
{"donor_sess_avg",
|
||||
mk(
|
||||
non_neg_integer(),
|
||||
#{
|
||||
desc => ?DESC(coordinator_status_donor_sess_avg),
|
||||
required => false
|
||||
}
|
||||
)},
|
||||
{"node",
|
||||
mk(
|
||||
binary(),
|
||||
#{
|
||||
desc => ?DESC(coordinator_status_node),
|
||||
required => true
|
||||
}
|
||||
)}
|
||||
];
|
||||
fields(global_evacuation_status) ->
|
||||
without(["status", "process"], fields(local_status_enabled)) ++
|
||||
[
|
||||
{"node",
|
||||
mk(
|
||||
binary(),
|
||||
#{
|
||||
desc => ?DESC(evacuation_status_node),
|
||||
required => true
|
||||
}
|
||||
)}
|
||||
];
|
||||
fields(global_status) ->
|
||||
[
|
||||
{"evacuations",
|
||||
mk(
|
||||
hoconsc:array(ref(global_evacuation_status)),
|
||||
#{
|
||||
desc => ?DESC(global_status_evacuations),
|
||||
required => false
|
||||
}
|
||||
)},
|
||||
{"rebalances",
|
||||
mk(
|
||||
hoconsc:array(ref(global_coordinator_status)),
|
||||
#{
|
||||
desc => ?DESC(global_status_rebalances),
|
||||
required => false
|
||||
}
|
||||
)}
|
||||
].
|
||||
|
||||
rebalance_example() ->
|
||||
#{
|
||||
wait_health_check => 10,
|
||||
conn_evict_rate => 10,
|
||||
sess_evict_rate => 20,
|
||||
abs_conn_threshold => 10,
|
||||
rel_conn_threshold => 1.5,
|
||||
abs_sess_threshold => 10,
|
||||
rel_sess_threshold => 1.5,
|
||||
wait_takeover => 10,
|
||||
nodes => [<<"othernode@127.0.0.1">>]
|
||||
}.
|
||||
|
||||
rebalance_evacuation_example() ->
|
||||
#{
|
||||
conn_evict_rate => 100,
|
||||
sess_evict_rate => 100,
|
||||
redirect_to => <<"othernode:1883">>,
|
||||
wait_takeover => 10,
|
||||
migrate_to => [<<"othernode@127.0.0.1">>]
|
||||
}.
|
||||
|
||||
local_status_response_schema() ->
|
||||
hoconsc:union([ref(local_status_disabled), ref(local_status_enabled)]).
|
||||
|
||||
response_schema() ->
|
||||
mk(
|
||||
map(),
|
||||
#{
|
||||
desc => ?DESC(empty_response)
|
||||
}
|
||||
).
|
||||
|
||||
roots() -> [].
|
|
@ -0,0 +1,22 @@
|
|||
%%--------------------------------------------------------------------
|
||||
%% Copyright (c) 2022-2023 EMQ Technologies Co., Ltd. All Rights Reserved.
|
||||
%%--------------------------------------------------------------------
|
||||
|
||||
-module(emqx_node_rebalance_app).
|
||||
|
||||
-behaviour(application).
|
||||
|
||||
-emqx_plugin(?MODULE).
|
||||
|
||||
-export([
|
||||
start/2,
|
||||
stop/1
|
||||
]).
|
||||
|
||||
start(_Type, _Args) ->
|
||||
{ok, Sup} = emqx_node_rebalance_sup:start_link(),
|
||||
ok = emqx_node_rebalance_cli:load(),
|
||||
{ok, Sup}.
|
||||
|
||||
stop(_State) ->
|
||||
emqx_node_rebalance_cli:unload().
|
|
@ -0,0 +1,305 @@
|
|||
%%--------------------------------------------------------------------
|
||||
%% Copyright (c) 2022-2023 EMQ Technologies Co., Ltd. All Rights Reserved.
|
||||
%%--------------------------------------------------------------------
|
||||
|
||||
-module(emqx_node_rebalance_cli).
|
||||
|
||||
%% APIs
|
||||
-export([
|
||||
load/0,
|
||||
unload/0,
|
||||
cli/1
|
||||
]).
|
||||
|
||||
load() ->
|
||||
emqx_ctl:register_command(rebalance, {?MODULE, cli}, []).
|
||||
|
||||
unload() ->
|
||||
emqx_ctl:unregister_command(rebalance).
|
||||
|
||||
cli(["start" | StartArgs]) ->
|
||||
case start_args(StartArgs) of
|
||||
{evacuation, Opts} ->
|
||||
case emqx_node_rebalance_evacuation:status() of
|
||||
disabled ->
|
||||
ok = emqx_node_rebalance_evacuation:start(Opts),
|
||||
emqx_ctl:print("Rebalance(evacuation) started~n"),
|
||||
true;
|
||||
{enabled, _} ->
|
||||
emqx_ctl:print("Rebalance is already enabled~n"),
|
||||
false
|
||||
end;
|
||||
{rebalance, Opts} ->
|
||||
case emqx_node_rebalance:start(Opts) of
|
||||
ok ->
|
||||
emqx_ctl:print("Rebalance started~n"),
|
||||
true;
|
||||
{error, Reason} ->
|
||||
emqx_ctl:print("Rebalance start error: ~p~n", [Reason]),
|
||||
false
|
||||
end;
|
||||
{error, Error} ->
|
||||
emqx_ctl:print("Rebalance start error: ~s~n", [Error]),
|
||||
false
|
||||
end;
|
||||
cli(["node-status", NodeStr]) ->
|
||||
case emqx_misc:safe_to_existing_atom(NodeStr, utf8) of
|
||||
{ok, Node} ->
|
||||
node_status(emqx_node_rebalance_status:local_status(Node));
|
||||
{error, _} ->
|
||||
emqx_ctl:print("Node status error: invalid node~n"),
|
||||
false
|
||||
end;
|
||||
cli(["node-status"]) ->
|
||||
node_status(emqx_node_rebalance_status:local_status());
|
||||
cli(["status"]) ->
|
||||
#{
|
||||
evacuations := Evacuations,
|
||||
rebalances := Rebalances
|
||||
} = emqx_node_rebalance_status:global_status(),
|
||||
lists:foreach(
|
||||
fun({Node, Status}) ->
|
||||
emqx_ctl:print(
|
||||
"--------------------------------------------------------------------~n"
|
||||
),
|
||||
emqx_ctl:print(
|
||||
"Node ~p: evacuation~n~s",
|
||||
[Node, emqx_node_rebalance_status:format_local_status(Status)]
|
||||
)
|
||||
end,
|
||||
Evacuations
|
||||
),
|
||||
lists:foreach(
|
||||
fun({Node, Status}) ->
|
||||
emqx_ctl:print(
|
||||
"--------------------------------------------------------------------~n"
|
||||
),
|
||||
emqx_ctl:print(
|
||||
"Node ~p: rebalance coordinator~n~s",
|
||||
[Node, emqx_node_rebalance_status:format_coordinator_status(Status)]
|
||||
)
|
||||
end,
|
||||
Rebalances
|
||||
);
|
||||
cli(["stop"]) ->
|
||||
case emqx_node_rebalance_evacuation:status() of
|
||||
{enabled, _} ->
|
||||
ok = emqx_node_rebalance_evacuation:stop(),
|
||||
emqx_ctl:print("Rebalance(evacuation) stopped~n"),
|
||||
true;
|
||||
disabled ->
|
||||
case emqx_node_rebalance:status() of
|
||||
{enabled, _} ->
|
||||
ok = emqx_node_rebalance:stop(),
|
||||
emqx_ctl:print("Rebalance stopped~n"),
|
||||
true;
|
||||
disabled ->
|
||||
emqx_ctl:print("Rebalance is already disabled~n"),
|
||||
false
|
||||
end
|
||||
end;
|
||||
cli(_) ->
|
||||
emqx_ctl:usage(
|
||||
[
|
||||
{
|
||||
"rebalance start --evacuation \\\n"
|
||||
" [--redirect-to \"Host1:Port1 Host2:Port2 ...\"] \\\n"
|
||||
" [--conn-evict-rate CountPerSec] \\\n"
|
||||
" [--migrate-to \"node1@host1 node2@host2 ...\"] \\\n"
|
||||
" [--wait-takeover Secs] \\\n"
|
||||
" [--sess-evict-rate CountPerSec]",
|
||||
"Start current node evacuation with optional server redirect to the specified servers"
|
||||
},
|
||||
|
||||
{
|
||||
"rebalance start \\\n"
|
||||
" [--nodes \"node1@host1 node2@host2\"] \\\n"
|
||||
" [--wait-health-check Secs] \\\n"
|
||||
" [--conn-evict-rate ConnPerSec] \\\n"
|
||||
" [--abs-conn-threshold Count] \\\n"
|
||||
" [--rel-conn-threshold Fraction] \\\n"
|
||||
" [--conn-evict-rate ConnPerSec] \\\n"
|
||||
" [--wait-takeover Secs] \\\n"
|
||||
" [--sess-evict-rate CountPerSec] \\\n"
|
||||
" [--abs-sess-threshold Count] \\\n"
|
||||
" [--rel-sess-threshold Fraction]",
|
||||
"Start rebalance on the specified nodes using the current node as the coordinator"
|
||||
},
|
||||
|
||||
{"rebalance node-status", "Get current node rebalance status"},
|
||||
|
||||
{"rebalance node-status \"node1@host1\"", "Get remote node rebalance status"},
|
||||
|
||||
{"rebalance status",
|
||||
"Get statuses of all current rebalance/evacuation processes across the cluster"},
|
||||
|
||||
{"rebalance stop", "Stop node rebalance"}
|
||||
]
|
||||
).
|
||||
|
||||
node_status(NodeStatus) ->
|
||||
case NodeStatus of
|
||||
{Process, Status} when Process =:= evacuation orelse Process =:= rebalance ->
|
||||
emqx_ctl:print(
|
||||
"Rebalance type: ~p~n~s~n",
|
||||
[Process, emqx_node_rebalance_status:format_local_status(Status)]
|
||||
);
|
||||
disabled ->
|
||||
emqx_ctl:print("Rebalance disabled~n");
|
||||
Other ->
|
||||
emqx_ctl:print("Error detecting rebalance status: ~p~n", [Other])
|
||||
end.
|
||||
|
||||
start_args(Args) ->
|
||||
case collect_args(Args, #{}) of
|
||||
{ok, #{"--evacuation" := true} = Collected} ->
|
||||
case validate_evacuation(maps:to_list(Collected), #{}) of
|
||||
{ok, Validated} ->
|
||||
{evacuation, Validated};
|
||||
{error, _} = Error ->
|
||||
Error
|
||||
end;
|
||||
{ok, #{} = Collected} ->
|
||||
case validate_rebalance(maps:to_list(Collected), #{}) of
|
||||
{ok, Validated} ->
|
||||
{rebalance, Validated};
|
||||
{error, _} = Error ->
|
||||
Error
|
||||
end;
|
||||
{error, _} = Error ->
|
||||
Error
|
||||
end.
|
||||
|
||||
collect_args([], Map) ->
|
||||
{ok, Map};
|
||||
%% evacuation
|
||||
collect_args(["--evacuation" | Args], Map) ->
|
||||
collect_args(Args, Map#{"--evacuation" => true});
|
||||
collect_args(["--redirect-to", ServerReference | Args], Map) ->
|
||||
collect_args(Args, Map#{"--redirect-to" => ServerReference});
|
||||
collect_args(["--migrate-to", MigrateTo | Args], Map) ->
|
||||
collect_args(Args, Map#{"--migrate-to" => MigrateTo});
|
||||
%% rebalance
|
||||
collect_args(["--nodes", Nodes | Args], Map) ->
|
||||
collect_args(Args, Map#{"--nodes" => Nodes});
|
||||
collect_args(["--wait-health-check", WaitHealthCheck | Args], Map) ->
|
||||
collect_args(Args, Map#{"--wait-health-check" => WaitHealthCheck});
|
||||
collect_args(["--abs-conn-threshold", AbsConnThres | Args], Map) ->
|
||||
collect_args(Args, Map#{"--abs-conn-threshold" => AbsConnThres});
|
||||
collect_args(["--rel-conn-threshold", RelConnThres | Args], Map) ->
|
||||
collect_args(Args, Map#{"--rel-conn-threshold" => RelConnThres});
|
||||
collect_args(["--abs-sess-threshold", AbsSessThres | Args], Map) ->
|
||||
collect_args(Args, Map#{"--abs-sess-threshold" => AbsSessThres});
|
||||
collect_args(["--rel-sess-threshold", RelSessThres | Args], Map) ->
|
||||
collect_args(Args, Map#{"--rel-sess-threshold" => RelSessThres});
|
||||
%% common
|
||||
collect_args(["--conn-evict-rate", ConnEvictRate | Args], Map) ->
|
||||
collect_args(Args, Map#{"--conn-evict-rate" => ConnEvictRate});
|
||||
collect_args(["--wait-takeover", WaitTakeover | Args], Map) ->
|
||||
collect_args(Args, Map#{"--wait-takeover" => WaitTakeover});
|
||||
collect_args(["--sess-evict-rate", SessEvictRate | Args], Map) ->
|
||||
collect_args(Args, Map#{"--sess-evict-rate" => SessEvictRate});
|
||||
%% fallback
|
||||
collect_args(Args, _Map) ->
|
||||
{error, io_lib:format("unknown arguments: ~p", [Args])}.
|
||||
|
||||
validate_evacuation([], Map) ->
|
||||
{ok, Map};
|
||||
validate_evacuation([{"--evacuation", _} | Rest], Map) ->
|
||||
validate_evacuation(Rest, Map);
|
||||
validate_evacuation([{"--redirect-to", ServerReference} | Rest], Map) ->
|
||||
validate_evacuation(Rest, Map#{server_reference => list_to_binary(ServerReference)});
|
||||
validate_evacuation([{"--conn-evict-rate", _} | _] = Opts, Map) ->
|
||||
validate_pos_int(conn_evict_rate, Opts, Map, fun validate_evacuation/2);
|
||||
validate_evacuation([{"--sess-evict-rate", _} | _] = Opts, Map) ->
|
||||
validate_pos_int(sess_evict_rate, Opts, Map, fun validate_evacuation/2);
|
||||
validate_evacuation([{"--wait-takeover", _} | _] = Opts, Map) ->
|
||||
validate_pos_int(wait_takeover, Opts, Map, fun validate_evacuation/2);
|
||||
validate_evacuation([{"--migrate-to", MigrateTo} | Rest], Map) ->
|
||||
case strings_to_atoms(string:tokens(MigrateTo, ", ")) of
|
||||
{_, Invalid} when Invalid =/= [] ->
|
||||
{error, io_lib:format("invalid --migrate-to, invalid nodes: ~p", [Invalid])};
|
||||
{Nodes, []} ->
|
||||
case emqx_node_rebalance_evacuation:available_nodes(Nodes) of
|
||||
[] ->
|
||||
{error, "invalid --migrate-to, no nodes"};
|
||||
Nodes ->
|
||||
validate_evacuation(Rest, Map#{migrate_to => Nodes});
|
||||
OtherNodes ->
|
||||
{error,
|
||||
io_lib:format(
|
||||
"invalid --migrate-to, unavailable nodes: ~p",
|
||||
[Nodes -- OtherNodes]
|
||||
)}
|
||||
end
|
||||
end;
|
||||
validate_evacuation(Rest, _Map) ->
|
||||
{error, io_lib:format("unknown evacuation arguments: ~p", [Rest])}.
|
||||
|
||||
validate_rebalance([], Map) ->
|
||||
{ok, Map};
|
||||
validate_rebalance([{"--wait-health-check", _} | _] = Opts, Map) ->
|
||||
validate_pos_int(wait_health_check, Opts, Map, fun validate_rebalance/2);
|
||||
validate_rebalance([{"--conn-evict-rate", _} | _] = Opts, Map) ->
|
||||
validate_pos_int(conn_evict_rate, Opts, Map, fun validate_rebalance/2);
|
||||
validate_rebalance([{"--sess-evict-rate", _} | _] = Opts, Map) ->
|
||||
validate_pos_int(sess_evict_rate, Opts, Map, fun validate_rebalance/2);
|
||||
validate_rebalance([{"--abs-conn-threshold", _} | _] = Opts, Map) ->
|
||||
validate_pos_int(abs_conn_threshold, Opts, Map, fun validate_rebalance/2);
|
||||
validate_rebalance([{"--rel-conn-threshold", _} | _] = Opts, Map) ->
|
||||
validate_fraction(rel_conn_threshold, Opts, Map, fun validate_rebalance/2);
|
||||
validate_rebalance([{"--abs-sess-threshold", _} | _] = Opts, Map) ->
|
||||
validate_pos_int(abs_sess_threshold, Opts, Map, fun validate_rebalance/2);
|
||||
validate_rebalance([{"--rel-sess-threshold", _} | _] = Opts, Map) ->
|
||||
validate_fraction(rel_sess_threshold, Opts, Map, fun validate_rebalance/2);
|
||||
validate_rebalance([{"--wait-takeover", _} | _] = Opts, Map) ->
|
||||
validate_pos_int(wait_takeover, Opts, Map, fun validate_rebalance/2);
|
||||
validate_rebalance([{"--nodes", NodeStr} | Rest], Map) ->
|
||||
case strings_to_atoms(string:tokens(NodeStr, ", ")) of
|
||||
{_, Invalid} when Invalid =/= [] ->
|
||||
{error, io_lib:format("invalid --nodes, invalid nodes: ~p", [Invalid])};
|
||||
{Nodes, []} ->
|
||||
case emqx_node_rebalance:available_nodes(Nodes) of
|
||||
[] ->
|
||||
{error, "invalid --nodes, no nodes"};
|
||||
Nodes ->
|
||||
validate_rebalance(Rest, Map#{nodes => Nodes});
|
||||
OtherNodes ->
|
||||
{error,
|
||||
io_lib:format(
|
||||
"invalid --nodes, unavailable nodes: ~p",
|
||||
[Nodes -- OtherNodes]
|
||||
)}
|
||||
end
|
||||
end;
|
||||
validate_rebalance(Rest, _Map) ->
|
||||
{error, io_lib:format("unknown rebalance arguments: ~p", [Rest])}.
|
||||
|
||||
validate_fraction(Name, [{OptionName, Value} | Rest], Map, Next) ->
|
||||
case string:to_float(Value) of
|
||||
{Num, ""} when Num > 1.0 ->
|
||||
Next(Rest, Map#{Name => Num});
|
||||
_ ->
|
||||
{error, "invalid " ++ OptionName ++ " value"}
|
||||
end.
|
||||
|
||||
validate_pos_int(Name, [{OptionName, Value} | Rest], Map, Next) ->
|
||||
case string:to_integer(Value) of
|
||||
{Int, ""} when Int > 0 ->
|
||||
Next(Rest, Map#{Name => Int});
|
||||
_ ->
|
||||
{error, "invalid " ++ OptionName ++ " value"}
|
||||
end.
|
||||
|
||||
strings_to_atoms(Strings) ->
|
||||
strings_to_atoms(Strings, [], []).
|
||||
|
||||
strings_to_atoms([], Atoms, Invalid) ->
|
||||
{lists:reverse(Atoms), lists:reverse(Invalid)};
|
||||
strings_to_atoms([Str | Rest], Atoms, Invalid) ->
|
||||
case emqx_misc:safe_to_existing_atom(Str, utf8) of
|
||||
{ok, Atom} ->
|
||||
strings_to_atoms(Rest, [Atom | Atoms], Invalid);
|
||||
{error, _} ->
|
||||
strings_to_atoms(Rest, Atoms, [Str | Invalid])
|
||||
end.
|
|
@ -0,0 +1,308 @@
|
|||
%%--------------------------------------------------------------------
|
||||
%% Copyright (c) 2022-2023 EMQ Technologies Co., Ltd. All Rights Reserved.
|
||||
%%--------------------------------------------------------------------
|
||||
|
||||
-module(emqx_node_rebalance_evacuation).
|
||||
|
||||
-include("emqx_node_rebalance.hrl").
|
||||
|
||||
-include_lib("emqx/include/logger.hrl").
|
||||
-include_lib("emqx/include/types.hrl").
|
||||
-include_lib("snabbkaffe/include/snabbkaffe.hrl").
|
||||
|
||||
-export([
|
||||
start/1,
|
||||
status/0,
|
||||
stop/0
|
||||
]).
|
||||
|
||||
-export([start_link/0]).
|
||||
|
||||
-behaviour(gen_statem).
|
||||
|
||||
-export([
|
||||
init/1,
|
||||
callback_mode/0,
|
||||
handle_event/4,
|
||||
code_change/4
|
||||
]).
|
||||
|
||||
-export([
|
||||
is_node_available/0,
|
||||
available_nodes/1
|
||||
]).
|
||||
|
||||
-export_type([
|
||||
start_opts/0,
|
||||
start_error/0
|
||||
]).
|
||||
|
||||
-ifdef(TEST).
|
||||
-export([migrate_to/1]).
|
||||
-endif.
|
||||
|
||||
%%--------------------------------------------------------------------
|
||||
%% APIs
|
||||
%%--------------------------------------------------------------------
|
||||
|
||||
-define(EVICT_INTERVAL_NO_NODES, 30000).
|
||||
|
||||
-type migrate_to() :: [node()] | undefined.
|
||||
|
||||
-type start_opts() :: #{
|
||||
server_reference => emqx_eviction_agent:server_reference(),
|
||||
conn_evict_rate => pos_integer(),
|
||||
sess_evict_rate => pos_integer(),
|
||||
wait_takeover => pos_integer(),
|
||||
migrate_to => migrate_to()
|
||||
}.
|
||||
-type start_error() :: already_started | eviction_agent_busy.
|
||||
-type stats() :: #{
|
||||
initial_conns := non_neg_integer(),
|
||||
initial_sessions := non_neg_integer(),
|
||||
current_conns := non_neg_integer(),
|
||||
current_sessions := non_neg_integer(),
|
||||
conn_evict_rate := pos_integer(),
|
||||
sess_evict_rate := pos_integer(),
|
||||
server_reference := emqx_eviction_agent:server_reference(),
|
||||
migrate_to := migrate_to()
|
||||
}.
|
||||
-type status() :: {enabled, stats()} | disabled.
|
||||
|
||||
-spec start(start_opts()) -> ok_or_error(start_error()).
|
||||
start(StartOpts) ->
|
||||
Opts = maps:merge(default_opts(), StartOpts),
|
||||
gen_statem:call(?MODULE, {start, Opts}).
|
||||
|
||||
-spec stop() -> ok_or_error(not_started).
|
||||
stop() ->
|
||||
gen_statem:call(?MODULE, stop).
|
||||
|
||||
-spec status() -> status().
|
||||
status() ->
|
||||
gen_statem:call(?MODULE, status).
|
||||
|
||||
-spec start_link() -> startlink_ret().
|
||||
start_link() ->
|
||||
gen_statem:start_link({local, ?MODULE}, ?MODULE, [], []).
|
||||
|
||||
-spec available_nodes(list(node())) -> list(node()).
|
||||
available_nodes(Nodes) when is_list(Nodes) ->
|
||||
{Available, _} = emqx_node_rebalance_evacuation_proto_v1:available_nodes(Nodes),
|
||||
lists:filter(fun is_atom/1, Available).
|
||||
|
||||
%%--------------------------------------------------------------------
|
||||
%% gen_statem callbacks
|
||||
%%--------------------------------------------------------------------
|
||||
|
||||
callback_mode() -> handle_event_function.
|
||||
|
||||
%% states: disabled, evicting_conns, waiting_takeover, evicting_sessions, prohibiting
|
||||
|
||||
init([]) ->
|
||||
case emqx_node_rebalance_evacuation_persist:read(default_opts()) of
|
||||
{ok, #{server_reference := ServerReference} = Opts} ->
|
||||
?SLOG(warning, #{msg => "restoring_evacuation_state", opts => Opts}),
|
||||
case emqx_eviction_agent:enable(?MODULE, ServerReference) of
|
||||
ok ->
|
||||
Data = init_data(#{}, Opts),
|
||||
ok = warn_enabled(),
|
||||
{ok, evicting_conns, Data, [{state_timeout, 0, evict_conns}]};
|
||||
{error, eviction_agent_busy} ->
|
||||
emqx_node_rebalance_evacuation_persist:clear(),
|
||||
{ok, disabled, #{}}
|
||||
end;
|
||||
none ->
|
||||
{ok, disabled, #{}}
|
||||
end.
|
||||
|
||||
%% start
|
||||
handle_event(
|
||||
{call, From},
|
||||
{start, #{server_reference := ServerReference} = Opts},
|
||||
disabled,
|
||||
#{} = Data
|
||||
) ->
|
||||
case emqx_eviction_agent:enable(?MODULE, ServerReference) of
|
||||
ok ->
|
||||
NewData = init_data(Data, Opts),
|
||||
ok = emqx_node_rebalance_evacuation_persist:save(Opts),
|
||||
?SLOG(warning, #{
|
||||
msg => "node_evacuation_started",
|
||||
opts => Opts
|
||||
}),
|
||||
{next_state, evicting_conns, NewData, [
|
||||
{state_timeout, 0, evict_conns},
|
||||
{reply, From, ok}
|
||||
]};
|
||||
{error, eviction_agent_busy} ->
|
||||
{keep_state_and_data, [{reply, From, {error, eviction_agent_busy}}]}
|
||||
end;
|
||||
handle_event({call, From}, {start, _Opts}, _State, #{}) ->
|
||||
{keep_state_and_data, [{reply, From, {error, already_started}}]};
|
||||
%% stop
|
||||
handle_event({call, From}, stop, disabled, #{}) ->
|
||||
{keep_state_and_data, [{reply, From, {error, not_started}}]};
|
||||
handle_event({call, From}, stop, _State, Data) ->
|
||||
ok = emqx_node_rebalance_evacuation_persist:clear(),
|
||||
_ = emqx_eviction_agent:disable(?MODULE),
|
||||
?SLOG(warning, #{msg => "node_evacuation_stopped"}),
|
||||
{next_state, disabled, deinit(Data), [{reply, From, ok}]};
|
||||
%% status
|
||||
handle_event({call, From}, status, disabled, #{}) ->
|
||||
{keep_state_and_data, [{reply, From, disabled}]};
|
||||
handle_event({call, From}, status, State, #{migrate_to := MigrateTo} = Data) ->
|
||||
Stats = maps:with(
|
||||
[
|
||||
initial_conns,
|
||||
current_conns,
|
||||
initial_sessions,
|
||||
current_sessions,
|
||||
server_reference,
|
||||
conn_evict_rate,
|
||||
sess_evict_rate
|
||||
],
|
||||
Data
|
||||
),
|
||||
{keep_state_and_data, [
|
||||
{reply, From, {enabled, Stats#{state => State, migrate_to => migrate_to(MigrateTo)}}}
|
||||
]};
|
||||
%% conn eviction
|
||||
handle_event(
|
||||
state_timeout,
|
||||
evict_conns,
|
||||
evicting_conns,
|
||||
#{
|
||||
conn_evict_rate := ConnEvictRate,
|
||||
wait_takeover := WaitTakeover
|
||||
} = Data
|
||||
) ->
|
||||
case emqx_eviction_agent:status() of
|
||||
{enabled, #{connections := Conns}} when Conns > 0 ->
|
||||
ok = emqx_eviction_agent:evict_connections(ConnEvictRate),
|
||||
?tp(debug, node_evacuation_evict_conn, #{conn_evict_rate => ConnEvictRate}),
|
||||
?SLOG(
|
||||
warning,
|
||||
#{
|
||||
msg => "node_evacuation_evict_conns",
|
||||
count => Conns,
|
||||
conn_evict_rate => ConnEvictRate
|
||||
}
|
||||
),
|
||||
NewData = Data#{current_conns => Conns},
|
||||
{keep_state, NewData, [{state_timeout, ?EVICT_INTERVAL, evict_conns}]};
|
||||
{enabled, #{connections := 0}} ->
|
||||
NewData = Data#{current_conns => 0},
|
||||
?SLOG(warning, #{msg => "node_evacuation_evict_conns_done"}),
|
||||
{next_state, waiting_takeover, NewData, [
|
||||
{state_timeout, timer:seconds(WaitTakeover), evict_sessions}
|
||||
]}
|
||||
end;
|
||||
handle_event(
|
||||
state_timeout,
|
||||
evict_sessions,
|
||||
waiting_takeover,
|
||||
Data
|
||||
) ->
|
||||
?SLOG(warning, #{msg => "node_evacuation_waiting_takeover_done"}),
|
||||
{next_state, evicting_sessions, Data, [{state_timeout, 0, evict_sessions}]};
|
||||
%% session eviction
|
||||
handle_event(
|
||||
state_timeout,
|
||||
evict_sessions,
|
||||
evicting_sessions,
|
||||
#{
|
||||
sess_evict_rate := SessEvictRate,
|
||||
migrate_to := MigrateTo,
|
||||
current_sessions := CurrSessCount
|
||||
} = Data
|
||||
) ->
|
||||
case emqx_eviction_agent:status() of
|
||||
{enabled, #{sessions := SessCount}} when SessCount > 0 ->
|
||||
case migrate_to(MigrateTo) of
|
||||
[] ->
|
||||
?SLOG(warning, #{
|
||||
msg => "no_nodes_to_evacuate_sessions", session_count => CurrSessCount
|
||||
}),
|
||||
{keep_state_and_data, [
|
||||
{state_timeout, ?EVICT_INTERVAL_NO_NODES, evict_sessions}
|
||||
]};
|
||||
Nodes ->
|
||||
ok = emqx_eviction_agent:evict_sessions(SessEvictRate, Nodes),
|
||||
?SLOG(
|
||||
warning,
|
||||
#{
|
||||
msg => "node_evacuation_evict_sessions",
|
||||
session_count => SessCount,
|
||||
session_evict_rate => SessEvictRate,
|
||||
target_nodes => Nodes
|
||||
}
|
||||
),
|
||||
NewData = Data#{current_sessions => SessCount},
|
||||
{keep_state, NewData, [{state_timeout, ?EVICT_INTERVAL, evict_sessions}]}
|
||||
end;
|
||||
{enabled, #{sessions := 0}} ->
|
||||
?tp(debug, node_evacuation_evict_sess_over, #{}),
|
||||
?SLOG(warning, #{msg => "node_evacuation_evict_sessions_over"}),
|
||||
NewData = Data#{current_sessions => 0},
|
||||
{next_state, prohibiting, NewData}
|
||||
end;
|
||||
handle_event({call, From}, Msg, State, Data) ->
|
||||
?SLOG(warning, #{msg => "unknown_call", call => Msg, state => State, data => Data}),
|
||||
{keep_state_and_data, [{reply, From, ignored}]};
|
||||
handle_event(info, Msg, State, Data) ->
|
||||
?SLOG(warning, #{msg => "unknown_info", info => Msg, state => State, data => Data}),
|
||||
keep_state_and_data;
|
||||
handle_event(cast, Msg, State, Data) ->
|
||||
?SLOG(warning, #{msg => "unknown_cast", cast => Msg, state => State, data => Data}),
|
||||
keep_state_and_data.
|
||||
|
||||
code_change(_Vsn, State, Data, _Extra) ->
|
||||
{ok, State, Data}.
|
||||
|
||||
%%--------------------------------------------------------------------
|
||||
%% internal funs
|
||||
%%--------------------------------------------------------------------
|
||||
|
||||
default_opts() ->
|
||||
#{
|
||||
server_reference => undefined,
|
||||
conn_evict_rate => ?DEFAULT_CONN_EVICT_RATE,
|
||||
sess_evict_rate => ?DEFAULT_SESS_EVICT_RATE,
|
||||
wait_takeover => ?DEFAULT_WAIT_TAKEOVER,
|
||||
migrate_to => undefined
|
||||
}.
|
||||
|
||||
init_data(Data0, Opts) ->
|
||||
Data1 = maps:merge(Data0, Opts),
|
||||
{enabled, #{connections := ConnCount, sessions := SessCount}} = emqx_eviction_agent:status(),
|
||||
Data1#{
|
||||
initial_conns => ConnCount,
|
||||
current_conns => ConnCount,
|
||||
initial_sessions => SessCount,
|
||||
current_sessions => SessCount
|
||||
}.
|
||||
|
||||
deinit(Data) ->
|
||||
Keys =
|
||||
[initial_conns, current_conns, initial_sessions, current_sessions] ++
|
||||
maps:keys(default_opts()),
|
||||
maps:without(Keys, Data).
|
||||
|
||||
warn_enabled() ->
|
||||
?SLOG(warning, #{msg => "node_evacuation_enabled"}),
|
||||
io:format(
|
||||
standard_error, "Node evacuation is enabled. The node will not receive connections.~n", []
|
||||
).
|
||||
|
||||
migrate_to(undefined) ->
|
||||
migrate_to(all_nodes());
|
||||
migrate_to(Nodes) when is_list(Nodes) ->
|
||||
available_nodes(Nodes).
|
||||
|
||||
is_node_available() ->
|
||||
disabled = emqx_eviction_agent:status(),
|
||||
node().
|
||||
|
||||
all_nodes() ->
|
||||
mria_mnesia:running_nodes() -- [node()].
|
|
@ -0,0 +1,120 @@
|
|||
%%--------------------------------------------------------------------
|
||||
%% Copyright (c) 2022-2023 EMQ Technologies Co., Ltd. All Rights Reserved.
|
||||
%%--------------------------------------------------------------------
|
||||
|
||||
-module(emqx_node_rebalance_evacuation_persist).
|
||||
|
||||
-export([
|
||||
save/1,
|
||||
clear/0,
|
||||
read/1
|
||||
]).
|
||||
|
||||
-ifdef(TEST).
|
||||
-export([evacuation_filepath/0]).
|
||||
-endif.
|
||||
|
||||
-include("emqx_node_rebalance.hrl").
|
||||
-include_lib("emqx/include/types.hrl").
|
||||
|
||||
%%--------------------------------------------------------------------
|
||||
%% APIs
|
||||
%%--------------------------------------------------------------------
|
||||
|
||||
%% do not persist `migrate_to`:
|
||||
%% * after restart there is nothing to migrate
|
||||
%% * this value may be invalid after node was offline
|
||||
-type persisted_start_opts() :: #{
|
||||
server_reference => emqx_eviction_agent:server_reference(),
|
||||
conn_evict_rate => pos_integer(),
|
||||
sess_evict_rate => pos_integer(),
|
||||
wait_takeover => pos_integer()
|
||||
}.
|
||||
-type start_opts() :: #{
|
||||
server_reference => emqx_eviction_agent:server_reference(),
|
||||
conn_evict_rate => pos_integer(),
|
||||
sess_evict_rate => pos_integer(),
|
||||
wait_takeover => pos_integer(),
|
||||
migrate_to => emqx_node_rebalance_evacuation:migrate_to()
|
||||
}.
|
||||
|
||||
-spec save(persisted_start_opts()) -> ok_or_error(term()).
|
||||
save(
|
||||
#{
|
||||
server_reference := ServerReference,
|
||||
conn_evict_rate := ConnEvictRate,
|
||||
sess_evict_rate := SessEvictRate,
|
||||
wait_takeover := WaitTakeover
|
||||
} = Data
|
||||
) when
|
||||
(is_binary(ServerReference) orelse ServerReference =:= undefined) andalso
|
||||
is_integer(ConnEvictRate) andalso ConnEvictRate > 0 andalso
|
||||
is_integer(SessEvictRate) andalso SessEvictRate > 0 andalso
|
||||
is_integer(WaitTakeover) andalso WaitTakeover >= 0
|
||||
->
|
||||
Filepath = evacuation_filepath(),
|
||||
case filelib:ensure_dir(Filepath) of
|
||||
ok ->
|
||||
JsonData = emqx_json:encode(
|
||||
prepare_for_encode(maps:with(persist_keys(), Data)),
|
||||
[pretty]
|
||||
),
|
||||
file:write_file(Filepath, JsonData);
|
||||
{error, _} = Error ->
|
||||
Error
|
||||
end.
|
||||
|
||||
-spec clear() -> ok.
|
||||
clear() ->
|
||||
file:delete(evacuation_filepath()).
|
||||
|
||||
-spec read(start_opts()) -> {ok, start_opts()} | none.
|
||||
read(DefaultOpts) ->
|
||||
case file:read_file(evacuation_filepath()) of
|
||||
{ok, Data} ->
|
||||
case emqx_json:safe_decode(Data, [return_maps]) of
|
||||
{ok, Map} when is_map(Map) ->
|
||||
{ok, map_to_opts(DefaultOpts, Map)};
|
||||
_NotAMap ->
|
||||
{ok, DefaultOpts}
|
||||
end;
|
||||
{error, _} ->
|
||||
none
|
||||
end.
|
||||
|
||||
%%--------------------------------------------------------------------
|
||||
%% Internal funcs
|
||||
%%--------------------------------------------------------------------
|
||||
|
||||
persist_keys() ->
|
||||
[
|
||||
server_reference,
|
||||
conn_evict_rate,
|
||||
sess_evict_rate,
|
||||
wait_takeover
|
||||
].
|
||||
|
||||
prepare_for_encode(#{server_reference := undefined} = Data) ->
|
||||
Data#{server_reference => null};
|
||||
prepare_for_encode(Data) ->
|
||||
Data.
|
||||
|
||||
format_after_decode(#{server_reference := null} = Data) ->
|
||||
Data#{server_reference => undefined};
|
||||
format_after_decode(Data) ->
|
||||
Data.
|
||||
|
||||
map_to_opts(DefaultOpts, Map) ->
|
||||
format_after_decode(
|
||||
map_to_opts(
|
||||
maps:to_list(DefaultOpts), Map, #{}
|
||||
)
|
||||
).
|
||||
|
||||
map_to_opts([], _Map, Opts) ->
|
||||
Opts;
|
||||
map_to_opts([{Key, DefaultVal} | Rest], Map, Opts) ->
|
||||
map_to_opts(Rest, Map, Opts#{Key => maps:get(atom_to_binary(Key), Map, DefaultVal)}).
|
||||
|
||||
evacuation_filepath() ->
|
||||
filename:join([emqx:data_dir(), ?EVACUATION_FILENAME]).
|
|
@ -0,0 +1,238 @@
|
|||
%%--------------------------------------------------------------------
|
||||
%% Copyright (c) 2022-2023 EMQ Technologies Co., Ltd. All Rights Reserved.
|
||||
%%--------------------------------------------------------------------
|
||||
|
||||
-module(emqx_node_rebalance_status).
|
||||
|
||||
-export([
|
||||
local_status/0,
|
||||
local_status/1,
|
||||
global_status/0,
|
||||
format_local_status/1,
|
||||
format_coordinator_status/1
|
||||
]).
|
||||
|
||||
%% For RPC
|
||||
-export([
|
||||
evacuation_status/0,
|
||||
rebalance_status/0
|
||||
]).
|
||||
|
||||
%%--------------------------------------------------------------------
|
||||
%% APIs
|
||||
%%--------------------------------------------------------------------
|
||||
|
||||
-spec local_status() -> disabled | {evacuation, map()} | {rebalance, map()}.
|
||||
local_status() ->
|
||||
case emqx_node_rebalance_evacuation:status() of
|
||||
{enabled, Status} ->
|
||||
{evacuation, evacuation(Status)};
|
||||
disabled ->
|
||||
case emqx_node_rebalance_agent:status() of
|
||||
{enabled, CoordinatorPid} ->
|
||||
case emqx_node_rebalance:status(CoordinatorPid) of
|
||||
{enabled, Status} ->
|
||||
local_rebalance(Status, node());
|
||||
disabled ->
|
||||
disabled
|
||||
end;
|
||||
disabled ->
|
||||
disabled
|
||||
end
|
||||
end.
|
||||
|
||||
-spec local_status(node()) -> disabled | {evacuation, map()} | {rebalance, map()}.
|
||||
local_status(Node) ->
|
||||
emqx_node_rebalance_status_proto_v1:local_status(Node).
|
||||
|
||||
-spec format_local_status(map()) -> iodata().
|
||||
format_local_status(Status) ->
|
||||
format_status(Status, local_status_field_format_order()).
|
||||
|
||||
-spec global_status() -> #{rebalances := [{node(), map()}], evacuations := [{node(), map()}]}.
|
||||
global_status() ->
|
||||
Nodes = mria_mnesia:running_nodes(),
|
||||
{RebalanceResults, _} = emqx_node_rebalance_status_proto_v1:rebalance_status(Nodes),
|
||||
Rebalances = [
|
||||
{Node, coordinator_rebalance(Status)}
|
||||
|| {Node, {enabled, Status}} <- RebalanceResults
|
||||
],
|
||||
{EvacuatioResults, _} = emqx_node_rebalance_status_proto_v1:evacuation_status(Nodes),
|
||||
Evacuations = [{Node, evacuation(Status)} || {Node, {enabled, Status}} <- EvacuatioResults],
|
||||
#{rebalances => Rebalances, evacuations => Evacuations}.
|
||||
|
||||
-spec format_coordinator_status(map()) -> iodata().
|
||||
format_coordinator_status(Status) ->
|
||||
format_status(Status, coordinator_status_field_format_order()).
|
||||
|
||||
%%--------------------------------------------------------------------
|
||||
%% Internal functions
|
||||
%%--------------------------------------------------------------------
|
||||
|
||||
evacuation(Status) ->
|
||||
#{
|
||||
state => maps:get(state, Status),
|
||||
connection_eviction_rate => maps:get(conn_evict_rate, Status),
|
||||
session_eviction_rate => maps:get(sess_evict_rate, Status),
|
||||
connection_goal => 0,
|
||||
session_goal => 0,
|
||||
session_recipients => maps:get(migrate_to, Status),
|
||||
stats => #{
|
||||
initial_connected => maps:get(initial_conns, Status),
|
||||
current_connected => maps:get(current_conns, Status),
|
||||
initial_sessions => maps:get(initial_sessions, Status),
|
||||
current_sessions => maps:get(current_sessions, Status)
|
||||
}
|
||||
}.
|
||||
|
||||
local_rebalance(#{donors := Donors} = Stats, Node) ->
|
||||
case lists:member(Node, Donors) of
|
||||
true -> {rebalance, donor_rebalance(Stats, Node)};
|
||||
false -> disabled
|
||||
end.
|
||||
|
||||
donor_rebalance(Status, Node) ->
|
||||
Opts = maps:get(opts, Status),
|
||||
InitialConnCounts = maps:get(initial_conn_counts, Status),
|
||||
InitialSessCounts = maps:get(initial_sess_counts, Status),
|
||||
|
||||
CurrentStats = #{
|
||||
initial_connected => maps:get(Node, InitialConnCounts),
|
||||
initial_sessions => maps:get(Node, InitialSessCounts),
|
||||
current_connected => emqx_eviction_agent:connection_count(),
|
||||
current_sessions => emqx_eviction_agent:session_count(),
|
||||
current_disconnected_sessions => emqx_eviction_agent:session_count(
|
||||
disconnected
|
||||
)
|
||||
},
|
||||
maps:from_list(
|
||||
[
|
||||
{state, maps:get(state, Status)},
|
||||
{coordinator_node, maps:get(coordinator_node, Status)},
|
||||
{connection_eviction_rate, maps:get(conn_evict_rate, Opts)},
|
||||
{session_eviction_rate, maps:get(sess_evict_rate, Opts)},
|
||||
{recipients, maps:get(recipients, Status)},
|
||||
{stats, CurrentStats}
|
||||
] ++
|
||||
[
|
||||
{connection_goal, maps:get(recipient_conn_avg, Status)}
|
||||
|| maps:is_key(recipient_conn_avg, Status)
|
||||
] ++
|
||||
[
|
||||
{disconnected_session_goal, maps:get(recipient_sess_avg, Status)}
|
||||
|| maps:is_key(recipient_sess_avg, Status)
|
||||
]
|
||||
).
|
||||
|
||||
coordinator_rebalance(Status) ->
|
||||
Opts = maps:get(opts, Status),
|
||||
maps:from_list(
|
||||
[
|
||||
{state, maps:get(state, Status)},
|
||||
{coordinator_node, maps:get(coordinator_node, Status)},
|
||||
{connection_eviction_rate, maps:get(conn_evict_rate, Opts)},
|
||||
{session_eviction_rate, maps:get(sess_evict_rate, Opts)},
|
||||
{recipients, maps:get(recipients, Status)},
|
||||
{donors, maps:get(donors, Status)}
|
||||
] ++
|
||||
[
|
||||
{connection_goal, maps:get(recipient_conn_avg, Status)}
|
||||
|| maps:is_key(recipient_conn_avg, Status)
|
||||
] ++
|
||||
[
|
||||
{disconnected_session_goal, maps:get(recipient_sess_avg, Status)}
|
||||
|| maps:is_key(recipient_sess_avg, Status)
|
||||
] ++
|
||||
[
|
||||
{donor_conn_avg, maps:get(donor_conn_avg, Status)}
|
||||
|| maps:is_key(donor_conn_avg, Status)
|
||||
] ++
|
||||
[
|
||||
{donor_sess_avg, maps:get(donor_sess_avg, Status)}
|
||||
|| maps:is_key(donor_sess_avg, Status)
|
||||
]
|
||||
).
|
||||
|
||||
local_status_field_format_order() ->
|
||||
[
|
||||
state,
|
||||
coordinator_node,
|
||||
connection_eviction_rate,
|
||||
session_eviction_rate,
|
||||
connection_goal,
|
||||
session_goal,
|
||||
disconnected_session_goal,
|
||||
session_recipients,
|
||||
recipients,
|
||||
stats
|
||||
].
|
||||
|
||||
coordinator_status_field_format_order() ->
|
||||
[
|
||||
state,
|
||||
coordinator_node,
|
||||
donors,
|
||||
recipients,
|
||||
connection_eviction_rate,
|
||||
session_eviction_rate,
|
||||
connection_goal,
|
||||
disconnected_session_goal,
|
||||
donor_conn_avg,
|
||||
donor_sess_avg
|
||||
].
|
||||
|
||||
format_status(Status, FieldOrder) ->
|
||||
Fields = lists:flatmap(
|
||||
fun(FieldName) ->
|
||||
maps:to_list(maps:with([FieldName], Status))
|
||||
end,
|
||||
FieldOrder
|
||||
),
|
||||
lists:map(
|
||||
fun format_local_status_field/1,
|
||||
Fields
|
||||
).
|
||||
|
||||
format_local_status_field({state, State}) ->
|
||||
io_lib:format("Rebalance state: ~p~n", [State]);
|
||||
format_local_status_field({coordinator_node, Node}) ->
|
||||
io_lib:format("Coordinator node: ~p~n", [Node]);
|
||||
format_local_status_field({connection_eviction_rate, ConnEvictRate}) ->
|
||||
io_lib:format("Connection eviction rate: ~p connections/second~n", [ConnEvictRate]);
|
||||
format_local_status_field({session_eviction_rate, SessEvictRate}) ->
|
||||
io_lib:format("Session eviction rate: ~p sessions/second~n", [SessEvictRate]);
|
||||
format_local_status_field({connection_goal, ConnGoal}) ->
|
||||
io_lib:format("Connection goal: ~p~n", [ConnGoal]);
|
||||
format_local_status_field({session_goal, SessGoal}) ->
|
||||
io_lib:format("Session goal: ~p~n", [SessGoal]);
|
||||
format_local_status_field({disconnected_session_goal, DisconnSessGoal}) ->
|
||||
io_lib:format("Disconnected session goal: ~p~n", [DisconnSessGoal]);
|
||||
format_local_status_field({session_recipients, SessionRecipients}) ->
|
||||
io_lib:format("Session recipient nodes: ~p~n", [SessionRecipients]);
|
||||
format_local_status_field({recipients, Recipients}) ->
|
||||
io_lib:format("Recipient nodes: ~p~n", [Recipients]);
|
||||
format_local_status_field({donors, Donors}) ->
|
||||
io_lib:format("Donor nodes: ~p~n", [Donors]);
|
||||
format_local_status_field({donor_conn_avg, DonorConnAvg}) ->
|
||||
io_lib:format("Current average donor node connection count: ~p~n", [DonorConnAvg]);
|
||||
format_local_status_field({donor_sess_avg, DonorSessAvg}) ->
|
||||
io_lib:format("Current average donor node disconnected session count: ~p~n", [DonorSessAvg]);
|
||||
format_local_status_field({stats, Stats}) ->
|
||||
format_local_stats(Stats).
|
||||
|
||||
format_local_stats(Stats) ->
|
||||
[
|
||||
"Channel statistics:\n"
|
||||
| lists:map(
|
||||
fun({Name, Value}) ->
|
||||
io_lib:format(" ~p: ~p~n", [Name, Value])
|
||||
end,
|
||||
maps:to_list(Stats)
|
||||
)
|
||||
].
|
||||
|
||||
evacuation_status() ->
|
||||
{node(), emqx_node_rebalance_evacuation:status()}.
|
||||
|
||||
rebalance_status() ->
|
||||
{node(), emqx_node_rebalance:status()}.
|
|
@ -0,0 +1,35 @@
|
|||
%%--------------------------------------------------------------------
|
||||
%% Copyright (c) 2022-2023 EMQ Technologies Co., Ltd. All Rights Reserved.
|
||||
%%--------------------------------------------------------------------
|
||||
|
||||
-module(emqx_node_rebalance_sup).
|
||||
|
||||
-behaviour(supervisor).
|
||||
|
||||
-export([start_link/0]).
|
||||
|
||||
-export([init/1]).
|
||||
|
||||
start_link() ->
|
||||
supervisor:start_link({local, ?MODULE}, ?MODULE, []).
|
||||
|
||||
init([]) ->
|
||||
Childs = [
|
||||
child_spec(emqx_node_rebalance_evacuation, []),
|
||||
child_spec(emqx_node_rebalance_agent, []),
|
||||
child_spec(emqx_node_rebalance, [])
|
||||
],
|
||||
{ok, {
|
||||
#{strategy => one_for_one, intensity => 10, period => 3600},
|
||||
Childs
|
||||
}}.
|
||||
|
||||
child_spec(Mod, Args) ->
|
||||
#{
|
||||
id => Mod,
|
||||
start => {Mod, start_link, Args},
|
||||
restart => permanent,
|
||||
shutdown => 5000,
|
||||
type => worker,
|
||||
modules => [Mod]
|
||||
}.
|
|
@ -0,0 +1,43 @@
|
|||
%%--------------------------------------------------------------------
|
||||
%% Copyright (c) 2022-2023 EMQ Technologies Co., Ltd. All Rights Reserved.
|
||||
%%--------------------------------------------------------------------
|
||||
|
||||
-module(emqx_node_rebalance_api_proto_v1).
|
||||
|
||||
-behaviour(emqx_bpapi).
|
||||
|
||||
-export([
|
||||
introduced_in/0,
|
||||
|
||||
node_rebalance_evacuation_start/2,
|
||||
node_rebalance_evacuation_stop/1,
|
||||
|
||||
node_rebalance_start/2,
|
||||
node_rebalance_stop/1
|
||||
]).
|
||||
|
||||
-include_lib("emqx/include/bpapi.hrl").
|
||||
-include_lib("emqx/include/types.hrl").
|
||||
|
||||
introduced_in() ->
|
||||
"5.0.22".
|
||||
|
||||
-spec node_rebalance_evacuation_start(node(), emqx_node_rebalance_evacuation:start_opts()) ->
|
||||
emqx_rpc:badrpc() | ok_or_error(emqx_node_rebalance_evacuation:start_error()).
|
||||
node_rebalance_evacuation_start(Node, #{} = Opts) ->
|
||||
rpc:call(Node, emqx_node_rebalance_evacuation, start, [Opts]).
|
||||
|
||||
-spec node_rebalance_evacuation_stop(node()) ->
|
||||
emqx_rpc:badrpc() | ok_or_error(not_started).
|
||||
node_rebalance_evacuation_stop(Node) ->
|
||||
rpc:call(Node, emqx_node_rebalance_evacuation, stop, []).
|
||||
|
||||
-spec node_rebalance_start(node(), emqx_node_rebalance:start_opts()) ->
|
||||
emqx_rpc:badrpc() | ok_or_error(emqx_node_rebalance:start_error()).
|
||||
node_rebalance_start(Node, Opts) ->
|
||||
rpc:call(Node, emqx_node_rebalance, start, [Opts]).
|
||||
|
||||
-spec node_rebalance_stop(node()) ->
|
||||
emqx_rpc:badrpc() | ok_or_error(not_started).
|
||||
node_rebalance_stop(Node) ->
|
||||
rpc:call(Node, emqx_node_rebalance, stop, []).
|
|
@ -0,0 +1,22 @@
|
|||
%%--------------------------------------------------------------------
|
||||
%% Copyright (c) 2022-2023 EMQ Technologies Co., Ltd. All Rights Reserved.
|
||||
%%--------------------------------------------------------------------
|
||||
|
||||
-module(emqx_node_rebalance_evacuation_proto_v1).
|
||||
|
||||
-behaviour(emqx_bpapi).
|
||||
|
||||
-export([
|
||||
introduced_in/0,
|
||||
|
||||
available_nodes/1
|
||||
]).
|
||||
|
||||
-include_lib("emqx/include/bpapi.hrl").
|
||||
|
||||
introduced_in() ->
|
||||
"5.0.22".
|
||||
|
||||
-spec available_nodes([node()]) -> emqx_rpc:multicall_result(node()).
|
||||
available_nodes(Nodes) ->
|
||||
rpc:multicall(Nodes, emqx_node_rebalance_evacuation, is_node_available, []).
|
|
@ -0,0 +1,62 @@
|
|||
%%--------------------------------------------------------------------
|
||||
%% Copyright (c) 2022-2023 EMQ Technologies Co., Ltd. All Rights Reserved.
|
||||
%%--------------------------------------------------------------------
|
||||
|
||||
-module(emqx_node_rebalance_proto_v1).
|
||||
|
||||
-behaviour(emqx_bpapi).
|
||||
|
||||
-export([
|
||||
introduced_in/0,
|
||||
|
||||
available_nodes/1,
|
||||
evict_connections/2,
|
||||
evict_sessions/4,
|
||||
connection_counts/1,
|
||||
session_counts/1,
|
||||
enable_rebalance_agent/2,
|
||||
disable_rebalance_agent/2,
|
||||
disconnected_session_counts/1
|
||||
]).
|
||||
|
||||
-include_lib("emqx/include/bpapi.hrl").
|
||||
-include_lib("emqx/include/types.hrl").
|
||||
|
||||
introduced_in() ->
|
||||
"5.0.22".
|
||||
|
||||
-spec available_nodes([node()]) -> emqx_rpc:multicall_result(node()).
|
||||
available_nodes(Nodes) ->
|
||||
rpc:multicall(Nodes, emqx_node_rebalance, is_node_available, []).
|
||||
|
||||
-spec evict_connections([node()], non_neg_integer()) ->
|
||||
emqx_rpc:multicall_result(ok_or_error(disabled)).
|
||||
evict_connections(Nodes, Count) ->
|
||||
rpc:multicall(Nodes, emqx_eviction_agent, evict_connections, [Count]).
|
||||
|
||||
-spec evict_sessions([node()], non_neg_integer(), [node()], emqx_channel:conn_state()) ->
|
||||
emqx_rpc:multicall_result(ok_or_error(disabled)).
|
||||
evict_sessions(Nodes, Count, RecipientNodes, ConnState) ->
|
||||
rpc:multicall(Nodes, emqx_eviction_agent, evict_sessions, [Count, RecipientNodes, ConnState]).
|
||||
|
||||
-spec connection_counts([node()]) -> emqx_rpc:multicall_result({ok, non_neg_integer()}).
|
||||
connection_counts(Nodes) ->
|
||||
rpc:multicall(Nodes, emqx_node_rebalance, connection_count, []).
|
||||
|
||||
-spec session_counts([node()]) -> emqx_rpc:multicall_result({ok, non_neg_integer()}).
|
||||
session_counts(Nodes) ->
|
||||
rpc:multicall(Nodes, emqx_node_rebalance, session_count, []).
|
||||
|
||||
-spec enable_rebalance_agent([node()], pid()) ->
|
||||
emqx_rpc:multicall_result(ok_or_error(already_enabled | eviction_agent_busy)).
|
||||
enable_rebalance_agent(Nodes, OwnerPid) ->
|
||||
rpc:multicall(Nodes, emqx_node_rebalance_agent, enable, [OwnerPid]).
|
||||
|
||||
-spec disable_rebalance_agent([node()], pid()) ->
|
||||
emqx_rpc:multicall_result(ok_or_error(already_disabled | invalid_coordinator)).
|
||||
disable_rebalance_agent(Nodes, OwnerPid) ->
|
||||
rpc:multicall(Nodes, emqx_node_rebalance_agent, disable, [OwnerPid]).
|
||||
|
||||
-spec disconnected_session_counts([node()]) -> emqx_rpc:multicall_result({ok, non_neg_integer()}).
|
||||
disconnected_session_counts(Nodes) ->
|
||||
rpc:multicall(Nodes, emqx_node_rebalance, disconnected_session_count, []).
|
|
@ -0,0 +1,36 @@
|
|||
%%--------------------------------------------------------------------
|
||||
%% Copyright (c) 2022-2023 EMQ Technologies Co., Ltd. All Rights Reserved.
|
||||
%%--------------------------------------------------------------------
|
||||
|
||||
-module(emqx_node_rebalance_status_proto_v1).
|
||||
|
||||
-behaviour(emqx_bpapi).
|
||||
|
||||
-export([
|
||||
introduced_in/0,
|
||||
|
||||
local_status/1,
|
||||
rebalance_status/1,
|
||||
evacuation_status/1
|
||||
]).
|
||||
|
||||
-include_lib("emqx/include/bpapi.hrl").
|
||||
-include_lib("emqx/include/types.hrl").
|
||||
|
||||
introduced_in() ->
|
||||
"5.0.22".
|
||||
|
||||
-spec local_status(node()) ->
|
||||
emqx_rpc:badrpc() | disabled | {evacuation, map()} | {rebalance, map()}.
|
||||
local_status(Node) ->
|
||||
rpc:call(Node, emqx_node_rebalance_status, local_status, []).
|
||||
|
||||
-spec rebalance_status([node()]) ->
|
||||
emqx_rpc:multicall_result({node(), map()}).
|
||||
rebalance_status(Nodes) ->
|
||||
rpc:multicall(Nodes, emqx_node_rebalance_status, rebalance_status, []).
|
||||
|
||||
-spec evacuation_status([node()]) ->
|
||||
emqx_rpc:multicall_result({node(), map()}).
|
||||
evacuation_status(Nodes) ->
|
||||
rpc:multicall(Nodes, emqx_node_rebalance_status, evacuation_status, []).
|
|
@ -0,0 +1,229 @@
|
|||
%%--------------------------------------------------------------------
|
||||
%% Copyright (c) 2022-2023 EMQ Technologies Co., Ltd. All Rights Reserved.
|
||||
%%--------------------------------------------------------------------
|
||||
|
||||
-module(emqx_node_rebalance_SUITE).
|
||||
|
||||
-compile(export_all).
|
||||
-compile(nowarn_export_all).
|
||||
|
||||
-include_lib("emqx/include/emqx.hrl").
|
||||
-include_lib("emqx/include/emqx_mqtt.hrl").
|
||||
-include_lib("emqx/include/asserts.hrl").
|
||||
-include_lib("eunit/include/eunit.hrl").
|
||||
-include_lib("common_test/include/ct.hrl").
|
||||
-include_lib("snabbkaffe/include/snabbkaffe.hrl").
|
||||
|
||||
-import(
|
||||
emqx_eviction_agent_test_helpers,
|
||||
[emqtt_connect_many/1, emqtt_connect_many/2, stop_many/1, case_specific_node_name/3]
|
||||
).
|
||||
|
||||
-define(START_APPS, [emqx_eviction_agent, emqx_node_rebalance]).
|
||||
|
||||
all() ->
|
||||
emqx_common_test_helpers:all(?MODULE).
|
||||
|
||||
init_per_suite(Config) ->
|
||||
ok = emqx_common_test_helpers:start_apps([]),
|
||||
Config.
|
||||
|
||||
end_per_suite(_Config) ->
|
||||
ok = emqx_common_test_helpers:stop_apps([]),
|
||||
ok.
|
||||
|
||||
init_per_testcase(Case, Config) ->
|
||||
ClusterNodes = emqx_eviction_agent_test_helpers:start_cluster(
|
||||
[
|
||||
{case_specific_node_name(?MODULE, Case, '_donor'), 2883},
|
||||
{case_specific_node_name(?MODULE, Case, '_recipient'), 3883}
|
||||
],
|
||||
?START_APPS
|
||||
),
|
||||
ok = snabbkaffe:start_trace(),
|
||||
[{cluster_nodes, ClusterNodes} | Config].
|
||||
|
||||
end_per_testcase(_Case, Config) ->
|
||||
ok = snabbkaffe:stop(),
|
||||
ok = emqx_eviction_agent_test_helpers:stop_cluster(
|
||||
?config(cluster_nodes, Config),
|
||||
?START_APPS
|
||||
).
|
||||
|
||||
%%--------------------------------------------------------------------
|
||||
%% Tests
|
||||
%%--------------------------------------------------------------------
|
||||
|
||||
t_rebalance(Config) ->
|
||||
process_flag(trap_exit, true),
|
||||
|
||||
[{DonorNode, DonorPort}, {RecipientNode, _RecipientPort}] = ?config(cluster_nodes, Config),
|
||||
|
||||
Nodes = [DonorNode, RecipientNode],
|
||||
|
||||
Conns = emqtt_connect_many(DonorPort, 500),
|
||||
|
||||
Opts = #{
|
||||
conn_evict_rate => 10,
|
||||
sess_evict_rate => 10,
|
||||
evict_interval => 10,
|
||||
abs_conn_threshold => 50,
|
||||
abs_sess_threshold => 50,
|
||||
rel_conn_threshold => 1.0,
|
||||
rel_sess_threshold => 1.0,
|
||||
wait_health_check => 0.01,
|
||||
wait_takeover => 0.01,
|
||||
nodes => Nodes
|
||||
},
|
||||
|
||||
?assertWaitEvent(
|
||||
ok = rpc:call(DonorNode, emqx_node_rebalance, start, [Opts]),
|
||||
#{?snk_kind := emqx_node_rebalance_evict_sess_over},
|
||||
10000
|
||||
),
|
||||
|
||||
DonorConnCount = rpc:call(DonorNode, emqx_eviction_agent, connection_count, []),
|
||||
DonorSessCount = rpc:call(DonorNode, emqx_eviction_agent, session_count, []),
|
||||
DonorDSessCount = rpc:call(DonorNode, emqx_eviction_agent, session_count, [disconnected]),
|
||||
|
||||
RecipientConnCount = rpc:call(RecipientNode, emqx_eviction_agent, connection_count, []),
|
||||
RecipientSessCount = rpc:call(RecipientNode, emqx_eviction_agent, session_count, []),
|
||||
RecipientDSessCount = rpc:call(RecipientNode, emqx_eviction_agent, session_count, [disconnected]),
|
||||
|
||||
ct:pal(
|
||||
"Donor: conn=~p, sess=~p, dsess=~p",
|
||||
[DonorConnCount, DonorSessCount, DonorDSessCount]
|
||||
),
|
||||
ct:pal(
|
||||
"Recipient: conn=~p, sess=~p, dsess=~p",
|
||||
[RecipientConnCount, RecipientSessCount, RecipientDSessCount]
|
||||
),
|
||||
|
||||
?assert(DonorConnCount - 50 =< RecipientConnCount),
|
||||
?assert(DonorDSessCount - 50 =< RecipientDSessCount),
|
||||
|
||||
ok = stop_many(Conns).
|
||||
|
||||
t_rebalance_node_crash(Config) ->
|
||||
process_flag(trap_exit, true),
|
||||
|
||||
[{DonorNode, DonorPort}, {RecipientNode, _RecipientPort}] = ?config(cluster_nodes, Config),
|
||||
|
||||
Nodes = [DonorNode, RecipientNode],
|
||||
|
||||
Conns = emqtt_connect_many(DonorPort, 500),
|
||||
|
||||
Opts = #{
|
||||
conn_evict_rate => 10,
|
||||
sess_evict_rate => 10,
|
||||
evict_interval => 10,
|
||||
abs_conn_threshold => 50,
|
||||
abs_sess_threshold => 50,
|
||||
rel_conn_threshold => 1.0,
|
||||
rel_sess_threshold => 1.0,
|
||||
wait_health_check => 0.01,
|
||||
wait_takeover => 0.01,
|
||||
nodes => Nodes
|
||||
},
|
||||
|
||||
?assertWaitEvent(
|
||||
begin
|
||||
ok = rpc:call(DonorNode, emqx_node_rebalance, start, [Opts]),
|
||||
emqx_common_test_helpers:stop_slave(RecipientNode)
|
||||
end,
|
||||
#{?snk_kind := emqx_node_rebalance_started},
|
||||
1000
|
||||
),
|
||||
|
||||
?assertEqual(
|
||||
disabled,
|
||||
rpc:call(DonorNode, emqx_node_rebalance, status, [])
|
||||
),
|
||||
|
||||
ok = stop_many(Conns).
|
||||
|
||||
t_no_need_to_rebalance(Config) ->
|
||||
process_flag(trap_exit, true),
|
||||
|
||||
[{DonorNode, DonorPort}, {RecipientNode, _RecipientPort}] = ?config(cluster_nodes, Config),
|
||||
|
||||
Nodes = [DonorNode, RecipientNode],
|
||||
|
||||
Opts = #{
|
||||
conn_evict_rate => 10,
|
||||
sess_evict_rate => 10,
|
||||
evict_interval => 10,
|
||||
abs_conn_threshold => 50,
|
||||
abs_sess_threshold => 50,
|
||||
rel_conn_threshold => 1.0,
|
||||
rel_sess_threshold => 1.0,
|
||||
wait_health_check => 0.01,
|
||||
wait_takeover => 0.01,
|
||||
nodes => Nodes
|
||||
},
|
||||
|
||||
?assertEqual(
|
||||
{error, nothing_to_balance},
|
||||
rpc:call(DonorNode, emqx_node_rebalance, start, [Opts])
|
||||
),
|
||||
|
||||
Conns = emqtt_connect_many(DonorPort, 50),
|
||||
|
||||
?assertEqual(
|
||||
{error, nothing_to_balance},
|
||||
rpc:call(DonorNode, emqx_node_rebalance, start, [Opts])
|
||||
),
|
||||
|
||||
ok = stop_many(Conns).
|
||||
|
||||
t_unknown_mesages(Config) ->
|
||||
process_flag(trap_exit, true),
|
||||
[{DonorNode, DonorPort}, {RecipientNode, _RecipientPort}] = ?config(cluster_nodes, Config),
|
||||
|
||||
Nodes = [DonorNode, RecipientNode],
|
||||
|
||||
Conns = emqtt_connect_many(DonorPort, 500),
|
||||
|
||||
Opts = #{
|
||||
wait_health_check => 100,
|
||||
abs_conn_threshold => 50,
|
||||
nodes => Nodes
|
||||
},
|
||||
|
||||
Pid = rpc:call(DonorNode, erlang, whereis, [emqx_node_rebalance]),
|
||||
|
||||
Pid ! unknown,
|
||||
ok = gen_server:cast(Pid, unknown),
|
||||
?assertEqual(
|
||||
ignored,
|
||||
gen_server:call(Pid, unknown)
|
||||
),
|
||||
|
||||
ok = rpc:call(DonorNode, emqx_node_rebalance, start, [Opts]),
|
||||
|
||||
Pid ! unknown,
|
||||
ok = gen_server:cast(Pid, unknown),
|
||||
?assertEqual(
|
||||
ignored,
|
||||
gen_server:call(Pid, unknown)
|
||||
),
|
||||
|
||||
ok = stop_many(Conns).
|
||||
|
||||
t_available_nodes(Config) ->
|
||||
[{DonorNode, _DonorPort}, {RecipientNode, _RecipientPort}] = ?config(cluster_nodes, Config),
|
||||
|
||||
%% Start eviction agent on RecipientNode so that it will be "occupied"
|
||||
%% and not available for rebalance
|
||||
ok = rpc:call(RecipientNode, emqx_eviction_agent, enable, [test_rebalance, undefined]),
|
||||
|
||||
%% Only DonorNode should be is available for rebalance, since RecipientNode is "occupied"
|
||||
?assertEqual(
|
||||
[DonorNode],
|
||||
rpc:call(
|
||||
DonorNode,
|
||||
emqx_node_rebalance,
|
||||
available_nodes,
|
||||
[[DonorNode, RecipientNode]]
|
||||
)
|
||||
).
|
|
@ -0,0 +1,214 @@
|
|||
%%--------------------------------------------------------------------
|
||||
%% Copyright (c) 2022-2023 EMQ Technologies Co., Ltd. All Rights Reserved.
|
||||
%%--------------------------------------------------------------------
|
||||
|
||||
-module(emqx_node_rebalance_agent_SUITE).
|
||||
|
||||
-compile(export_all).
|
||||
-compile(nowarn_export_all).
|
||||
|
||||
-include_lib("emqx/include/emqx.hrl").
|
||||
-include_lib("emqx/include/emqx_mqtt.hrl").
|
||||
-include_lib("eunit/include/eunit.hrl").
|
||||
-include_lib("common_test/include/ct.hrl").
|
||||
-include_lib("snabbkaffe/include/snabbkaffe.hrl").
|
||||
|
||||
-import(
|
||||
emqx_eviction_agent_test_helpers,
|
||||
[case_specific_node_name/2]
|
||||
).
|
||||
|
||||
all() ->
|
||||
[
|
||||
{group, local},
|
||||
{group, cluster}
|
||||
].
|
||||
|
||||
groups() ->
|
||||
[
|
||||
{local, [], [
|
||||
t_enable_disable,
|
||||
t_enable_egent_busy,
|
||||
t_unknown_messages
|
||||
]},
|
||||
{cluster, [], [
|
||||
t_rebalance_agent_coordinator_fail,
|
||||
t_rebalance_agent_fail
|
||||
]}
|
||||
].
|
||||
|
||||
init_per_suite(Config) ->
|
||||
ok = emqx_common_test_helpers:start_apps([emqx_eviction_agent, emqx_node_rebalance]),
|
||||
Config.
|
||||
|
||||
end_per_suite(_Config) ->
|
||||
ok = emqx_common_test_helpers:stop_apps([emqx_eviction_agent, emqx_node_rebalance]),
|
||||
ok.
|
||||
|
||||
init_per_group(local, Config) ->
|
||||
[{cluster, false} | Config];
|
||||
init_per_group(cluster, Config) ->
|
||||
[{cluster, true} | Config].
|
||||
|
||||
end_per_group(_Group, _Config) ->
|
||||
ok.
|
||||
|
||||
init_per_testcase(Case, Config) ->
|
||||
case ?config(cluster, Config) of
|
||||
true ->
|
||||
ClusterNodes = emqx_eviction_agent_test_helpers:start_cluster(
|
||||
[{case_specific_node_name(?MODULE, Case), 2883}],
|
||||
[emqx_eviction_agent, emqx_node_rebalance]
|
||||
),
|
||||
[{cluster_nodes, ClusterNodes} | Config];
|
||||
false ->
|
||||
Config
|
||||
end.
|
||||
|
||||
end_per_testcase(_Case, Config) ->
|
||||
case ?config(cluster, Config) of
|
||||
true ->
|
||||
emqx_eviction_agent_test_helpers:stop_cluster(
|
||||
?config(cluster_nodes, Config),
|
||||
[emqx_eviction_agent, emqx_node_rebalance]
|
||||
);
|
||||
false ->
|
||||
ok
|
||||
end.
|
||||
|
||||
%%--------------------------------------------------------------------
|
||||
%% Tests
|
||||
%%--------------------------------------------------------------------
|
||||
|
||||
%% Local tests
|
||||
|
||||
t_enable_disable(_Config) ->
|
||||
?assertEqual(
|
||||
disabled,
|
||||
emqx_node_rebalance_agent:status()
|
||||
),
|
||||
|
||||
?assertEqual(
|
||||
ok,
|
||||
emqx_node_rebalance_agent:enable(self())
|
||||
),
|
||||
|
||||
?assertEqual(
|
||||
{error, already_enabled},
|
||||
emqx_node_rebalance_agent:enable(self())
|
||||
),
|
||||
|
||||
?assertEqual(
|
||||
{enabled, self()},
|
||||
emqx_node_rebalance_agent:status()
|
||||
),
|
||||
|
||||
?assertEqual(
|
||||
{error, invalid_coordinator},
|
||||
emqx_node_rebalance_agent:disable(spawn_link(fun() -> ok end))
|
||||
),
|
||||
|
||||
?assertEqual(
|
||||
ok,
|
||||
emqx_node_rebalance_agent:disable(self())
|
||||
),
|
||||
|
||||
?assertEqual(
|
||||
{error, already_disabled},
|
||||
emqx_node_rebalance_agent:disable(self())
|
||||
),
|
||||
|
||||
?assertEqual(
|
||||
disabled,
|
||||
emqx_node_rebalance_agent:status()
|
||||
).
|
||||
|
||||
t_enable_egent_busy(_Config) ->
|
||||
ok = emqx_eviction_agent:enable(rebalance_test, undefined),
|
||||
|
||||
?assertEqual(
|
||||
{error, eviction_agent_busy},
|
||||
emqx_node_rebalance_agent:enable(self())
|
||||
),
|
||||
|
||||
ok = emqx_eviction_agent:disable(rebalance_test).
|
||||
|
||||
t_unknown_messages(_Config) ->
|
||||
Pid = whereis(emqx_node_rebalance_agent),
|
||||
|
||||
ok = gen_server:cast(Pid, unknown),
|
||||
|
||||
Pid ! unknown,
|
||||
|
||||
ignored = gen_server:call(Pid, unknown).
|
||||
|
||||
%% Cluster tests
|
||||
|
||||
% The following tests verify that emqx_node_rebalance_agent correctly links
|
||||
% coordinator process with emqx_eviction_agent-s.
|
||||
|
||||
t_rebalance_agent_coordinator_fail(Config) ->
|
||||
process_flag(trap_exit, true),
|
||||
|
||||
[{Node, _}] = ?config(cluster_nodes, Config),
|
||||
|
||||
CoordinatorPid = spawn_link(
|
||||
fun() ->
|
||||
receive
|
||||
done -> ok
|
||||
end
|
||||
end
|
||||
),
|
||||
|
||||
?assertEqual(
|
||||
disabled,
|
||||
rpc:call(Node, emqx_eviction_agent, status, [])
|
||||
),
|
||||
|
||||
?assertEqual(
|
||||
ok,
|
||||
rpc:call(Node, emqx_node_rebalance_agent, enable, [CoordinatorPid])
|
||||
),
|
||||
|
||||
?assertMatch(
|
||||
{enabled, _},
|
||||
rpc:call(Node, emqx_eviction_agent, status, [])
|
||||
),
|
||||
|
||||
EvictionAgentPid = rpc:call(Node, erlang, whereis, [emqx_eviction_agent]),
|
||||
true = link(EvictionAgentPid),
|
||||
|
||||
true = exit(CoordinatorPid, kill),
|
||||
|
||||
receive
|
||||
{'EXIT', EvictionAgentPid, _} -> true
|
||||
after 1000 ->
|
||||
ct:fail("emqx_eviction_agent did not exit")
|
||||
end.
|
||||
|
||||
t_rebalance_agent_fail(Config) ->
|
||||
process_flag(trap_exit, true),
|
||||
|
||||
[{Node, _}] = ?config(cluster_nodes, Config),
|
||||
|
||||
CoordinatorPid = spawn_link(
|
||||
fun() ->
|
||||
receive
|
||||
done -> ok
|
||||
end
|
||||
end
|
||||
),
|
||||
|
||||
?assertEqual(
|
||||
ok,
|
||||
rpc:call(Node, emqx_node_rebalance_agent, enable, [CoordinatorPid])
|
||||
),
|
||||
|
||||
EvictionAgentPid = rpc:call(Node, erlang, whereis, [emqx_eviction_agent]),
|
||||
true = exit(EvictionAgentPid, kill),
|
||||
|
||||
receive
|
||||
{'EXIT', CoordinatorPid, _} -> true
|
||||
after 1000 ->
|
||||
ct:fail("emqx_node_rebalance_agent did not exit")
|
||||
end.
|
|
@ -0,0 +1,444 @@
|
|||
%%--------------------------------------------------------------------
|
||||
%% Copyright (c) 2022-2023 EMQ Technologies Co., Ltd. All Rights Reserved.
|
||||
%%--------------------------------------------------------------------
|
||||
|
||||
-module(emqx_node_rebalance_api_SUITE).
|
||||
|
||||
-compile(export_all).
|
||||
-compile(nowarn_export_all).
|
||||
|
||||
-include_lib("eunit/include/eunit.hrl").
|
||||
-include_lib("common_test/include/ct.hrl").
|
||||
|
||||
-import(
|
||||
emqx_mgmt_api_test_util,
|
||||
[
|
||||
request/2,
|
||||
request/3,
|
||||
uri/1
|
||||
]
|
||||
).
|
||||
|
||||
-import(
|
||||
emqx_eviction_agent_test_helpers,
|
||||
[emqtt_connect_many/2, stop_many/1, case_specific_node_name/3]
|
||||
).
|
||||
|
||||
-define(START_APPS, [emqx_eviction_agent, emqx_node_rebalance]).
|
||||
|
||||
all() ->
|
||||
emqx_common_test_helpers:all(?MODULE).
|
||||
|
||||
init_per_suite(Config) ->
|
||||
ok = emqx_common_test_helpers:start_apps(?START_APPS),
|
||||
Config.
|
||||
|
||||
end_per_suite(_Config) ->
|
||||
ok = emqx_common_test_helpers:stop_apps(?START_APPS),
|
||||
ok.
|
||||
|
||||
init_per_testcase(Case, Config) ->
|
||||
[{DonorNode, _} | _] =
|
||||
ClusterNodes = emqx_eviction_agent_test_helpers:start_cluster(
|
||||
[
|
||||
{case_specific_node_name(?MODULE, Case, '_donor'), 2883},
|
||||
{case_specific_node_name(?MODULE, Case, '_recipient'), 3883}
|
||||
],
|
||||
?START_APPS,
|
||||
[{emqx, data_dir, case_specific_data_dir(Case, Config)}]
|
||||
),
|
||||
|
||||
ok = rpc:call(DonorNode, emqx_mgmt_api_test_util, init_suite, []),
|
||||
ok = take_auth_header_from(DonorNode),
|
||||
|
||||
[{cluster_nodes, ClusterNodes} | Config].
|
||||
end_per_testcase(_Case, Config) ->
|
||||
_ = emqx_eviction_agent_test_helpers:stop_cluster(
|
||||
?config(cluster_nodes, Config),
|
||||
?START_APPS
|
||||
).
|
||||
|
||||
%%--------------------------------------------------------------------
|
||||
%% Tests
|
||||
%%--------------------------------------------------------------------
|
||||
|
||||
t_start_evacuation_validation(Config) ->
|
||||
[{DonorNode, _}, {RecipientNode, _}] = ?config(cluster_nodes, Config),
|
||||
BadOpts = [
|
||||
#{conn_evict_rate => <<"conn">>},
|
||||
#{sess_evict_rate => <<"sess">>},
|
||||
#{redirect_to => 123},
|
||||
#{wait_takeover => <<"wait">>},
|
||||
#{migrate_to => []},
|
||||
#{migrate_to => <<"migrate_to">>},
|
||||
#{migrate_to => [<<"bad_node">>]},
|
||||
#{migrate_to => [<<"bad_node">>, atom_to_binary(DonorNode)]},
|
||||
#{unknown => <<"Value">>}
|
||||
],
|
||||
lists:foreach(
|
||||
fun(Opts) ->
|
||||
?assertMatch(
|
||||
{ok, 400, #{}},
|
||||
api_post(
|
||||
["load_rebalance", atom_to_list(DonorNode), "evacuation", "start"],
|
||||
Opts
|
||||
)
|
||||
)
|
||||
end,
|
||||
BadOpts
|
||||
),
|
||||
?assertMatch(
|
||||
{ok, 400, #{}},
|
||||
api_post(
|
||||
["load_rebalance", "bad@node", "evacuation", "start"],
|
||||
#{}
|
||||
)
|
||||
),
|
||||
|
||||
?assertMatch(
|
||||
{ok, 200, #{}},
|
||||
api_post(
|
||||
["load_rebalance", atom_to_list(DonorNode), "evacuation", "start"],
|
||||
#{
|
||||
conn_evict_rate => 10,
|
||||
sess_evict_rate => 10,
|
||||
wait_takeover => 10,
|
||||
redirect_to => <<"srv">>,
|
||||
migrate_to => [atom_to_binary(RecipientNode)]
|
||||
}
|
||||
)
|
||||
),
|
||||
|
||||
DonorNodeBin = atom_to_binary(DonorNode),
|
||||
?assertMatch(
|
||||
{ok, 200, #{<<"evacuations">> := [#{<<"node">> := DonorNodeBin}]}},
|
||||
api_get(["load_rebalance", "global_status"])
|
||||
).
|
||||
|
||||
t_start_rebalance_validation(Config) ->
|
||||
process_flag(trap_exit, true),
|
||||
|
||||
[{DonorNode, DonorPort}, {RecipientNode, _}] = ?config(cluster_nodes, Config),
|
||||
|
||||
BadOpts = [
|
||||
#{conn_evict_rate => <<"conn">>},
|
||||
#{sess_evict_rate => <<"sess">>},
|
||||
#{abs_conn_threshold => <<"act">>},
|
||||
#{rel_conn_threshold => <<"rct">>},
|
||||
#{abs_sess_threshold => <<"act">>},
|
||||
#{rel_sess_threshold => <<"rct">>},
|
||||
#{wait_takeover => <<"wait">>},
|
||||
#{wait_health_check => <<"wait">>},
|
||||
#{nodes => <<"nodes">>},
|
||||
#{nodes => []},
|
||||
#{nodes => [<<"bad_node">>]},
|
||||
#{nodes => [<<"bad_node">>, atom_to_binary(DonorNode)]},
|
||||
#{unknown => <<"Value">>}
|
||||
],
|
||||
lists:foreach(
|
||||
fun(Opts) ->
|
||||
?assertMatch(
|
||||
{ok, 400, #{}},
|
||||
api_post(
|
||||
["load_rebalance", atom_to_list(DonorNode), "start"],
|
||||
Opts
|
||||
)
|
||||
)
|
||||
end,
|
||||
BadOpts
|
||||
),
|
||||
?assertMatch(
|
||||
{ok, 400, #{}},
|
||||
api_post(
|
||||
["load_rebalance", "bad@node", "start"],
|
||||
#{}
|
||||
)
|
||||
),
|
||||
|
||||
Conns = emqtt_connect_many(DonorPort, 50),
|
||||
|
||||
?assertMatch(
|
||||
{ok, 200, #{}},
|
||||
api_post(
|
||||
["load_rebalance", atom_to_list(DonorNode), "start"],
|
||||
#{
|
||||
conn_evict_rate => 10,
|
||||
sess_evict_rate => 10,
|
||||
wait_takeover => 10,
|
||||
wait_health_check => 10,
|
||||
abs_conn_threshold => 10,
|
||||
rel_conn_threshold => 1.001,
|
||||
abs_sess_threshold => 10,
|
||||
rel_sess_threshold => 1.001,
|
||||
nodes => [
|
||||
atom_to_binary(DonorNode),
|
||||
atom_to_binary(RecipientNode)
|
||||
]
|
||||
}
|
||||
)
|
||||
),
|
||||
|
||||
DonorNodeBin = atom_to_binary(DonorNode),
|
||||
?assertMatch(
|
||||
{ok, 200, #{<<"rebalances">> := [#{<<"node">> := DonorNodeBin}]}},
|
||||
api_get(["load_rebalance", "global_status"])
|
||||
),
|
||||
|
||||
ok = stop_many(Conns).
|
||||
|
||||
t_start_stop_evacuation(Config) ->
|
||||
[{DonorNode, _}, {RecipientNode, _}] = ?config(cluster_nodes, Config),
|
||||
|
||||
StartOpts = maps:merge(
|
||||
emqx_node_rebalance_api:rebalance_evacuation_example(),
|
||||
#{migrate_to => [atom_to_binary(RecipientNode)]}
|
||||
),
|
||||
|
||||
?assertMatch(
|
||||
{ok, 200, #{}},
|
||||
api_post(
|
||||
["load_rebalance", atom_to_list(DonorNode), "evacuation", "start"],
|
||||
StartOpts
|
||||
)
|
||||
),
|
||||
|
||||
StatusResponse = api_get(["load_rebalance", "status"]),
|
||||
|
||||
?assertMatch(
|
||||
{ok, 200, _},
|
||||
StatusResponse
|
||||
),
|
||||
|
||||
{ok, 200, Status} = StatusResponse,
|
||||
|
||||
?assertMatch(
|
||||
#{
|
||||
process := evacuation,
|
||||
connection_eviction_rate := 100,
|
||||
session_eviction_rate := 100,
|
||||
connection_goal := 0,
|
||||
session_goal := 0,
|
||||
stats := #{
|
||||
initial_connected := _,
|
||||
current_connected := _,
|
||||
initial_sessions := _,
|
||||
current_sessions := _
|
||||
}
|
||||
},
|
||||
emqx_node_rebalance_api:translate(local_status_enabled, Status)
|
||||
),
|
||||
|
||||
DonorNodeBin = atom_to_binary(DonorNode),
|
||||
|
||||
GlobalStatusResponse = api_get(["load_rebalance", "global_status"]),
|
||||
|
||||
?assertMatch(
|
||||
{ok, 200, _},
|
||||
GlobalStatusResponse
|
||||
),
|
||||
|
||||
{ok, 200, GlobalStatus} = GlobalStatusResponse,
|
||||
|
||||
?assertMatch(
|
||||
#{
|
||||
rebalances := [],
|
||||
evacuations := [
|
||||
#{
|
||||
node := DonorNodeBin,
|
||||
connection_eviction_rate := 100,
|
||||
session_eviction_rate := 100,
|
||||
connection_goal := 0,
|
||||
session_goal := 0,
|
||||
stats := #{
|
||||
initial_connected := _,
|
||||
current_connected := _,
|
||||
initial_sessions := _,
|
||||
current_sessions := _
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
emqx_node_rebalance_api:translate(global_status, GlobalStatus)
|
||||
),
|
||||
|
||||
?assertMatch(
|
||||
{ok, 200, #{}},
|
||||
api_post(
|
||||
["load_rebalance", atom_to_list(DonorNode), "evacuation", "stop"],
|
||||
#{}
|
||||
)
|
||||
),
|
||||
|
||||
?assertMatch(
|
||||
{ok, 200, #{<<"status">> := <<"disabled">>}},
|
||||
api_get(["load_rebalance", "status"])
|
||||
),
|
||||
|
||||
?assertMatch(
|
||||
{ok, 200, #{<<"evacuations">> := [], <<"rebalances">> := []}},
|
||||
api_get(["load_rebalance", "global_status"])
|
||||
).
|
||||
|
||||
t_start_stop_rebalance(Config) ->
|
||||
process_flag(trap_exit, true),
|
||||
|
||||
[{DonorNode, DonorPort}, {RecipientNode, _}] = ?config(cluster_nodes, Config),
|
||||
|
||||
?assertMatch(
|
||||
{ok, 200, #{<<"status">> := <<"disabled">>}},
|
||||
api_get(["load_rebalance", "status"])
|
||||
),
|
||||
|
||||
Conns = emqtt_connect_many(DonorPort, 100),
|
||||
|
||||
StartOpts = maps:without(
|
||||
[nodes],
|
||||
emqx_node_rebalance_api:rebalance_example()
|
||||
),
|
||||
|
||||
?assertMatch(
|
||||
{ok, 200, #{}},
|
||||
api_post(
|
||||
["load_rebalance", atom_to_list(DonorNode), "start"],
|
||||
StartOpts
|
||||
)
|
||||
),
|
||||
|
||||
StatusResponse = api_get(["load_rebalance", "status"]),
|
||||
|
||||
?assertMatch(
|
||||
{ok, 200, _},
|
||||
StatusResponse
|
||||
),
|
||||
|
||||
{ok, 200, Status} = StatusResponse,
|
||||
|
||||
?assertMatch(
|
||||
#{process := rebalance, connection_eviction_rate := 10, session_eviction_rate := 20},
|
||||
emqx_node_rebalance_api:translate(local_status_enabled, Status)
|
||||
),
|
||||
|
||||
DonorNodeBin = atom_to_binary(DonorNode),
|
||||
RecipientNodeBin = atom_to_binary(RecipientNode),
|
||||
|
||||
GlobalStatusResponse = api_get(["load_rebalance", "global_status"]),
|
||||
|
||||
?assertMatch(
|
||||
{ok, 200, _},
|
||||
GlobalStatusResponse
|
||||
),
|
||||
|
||||
{ok, 200, GlobalStatus} = GlobalStatusResponse,
|
||||
|
||||
?assertMatch(
|
||||
{ok, 200, #{
|
||||
<<"evacuations">> := [],
|
||||
<<"rebalances">> :=
|
||||
[
|
||||
#{
|
||||
<<"state">> := _,
|
||||
<<"node">> := DonorNodeBin,
|
||||
<<"coordinator_node">> := _,
|
||||
<<"connection_eviction_rate">> := 10,
|
||||
<<"session_eviction_rate">> := 20,
|
||||
<<"donors">> := [DonorNodeBin],
|
||||
<<"recipients">> := [RecipientNodeBin]
|
||||
}
|
||||
]
|
||||
}},
|
||||
api_get(["load_rebalance", "global_status"])
|
||||
),
|
||||
|
||||
?assertMatch(
|
||||
#{
|
||||
evacuations := [],
|
||||
rebalances := [
|
||||
#{
|
||||
state := _,
|
||||
node := DonorNodeBin,
|
||||
coordinator_node := _,
|
||||
connection_eviction_rate := 10,
|
||||
session_eviction_rate := 20,
|
||||
donors := [DonorNodeBin],
|
||||
recipients := [RecipientNodeBin]
|
||||
}
|
||||
]
|
||||
},
|
||||
emqx_node_rebalance_api:translate(global_status, GlobalStatus)
|
||||
),
|
||||
|
||||
?assertMatch(
|
||||
{ok, 200, #{}},
|
||||
api_post(
|
||||
["load_rebalance", atom_to_list(DonorNode), "stop"],
|
||||
#{}
|
||||
)
|
||||
),
|
||||
|
||||
?assertMatch(
|
||||
{ok, 200, #{<<"status">> := <<"disabled">>}},
|
||||
api_get(["load_rebalance", "status"])
|
||||
),
|
||||
|
||||
?assertMatch(
|
||||
{ok, 200, #{<<"evacuations">> := [], <<"rebalances">> := []}},
|
||||
api_get(["load_rebalance", "global_status"])
|
||||
),
|
||||
|
||||
ok = stop_many(Conns).
|
||||
|
||||
t_availability_check(Config) ->
|
||||
[{DonorNode, _} | _] = ?config(cluster_nodes, Config),
|
||||
?assertMatch(
|
||||
{ok, 200, #{}},
|
||||
api_get(["load_rebalance", "availability_check"])
|
||||
),
|
||||
|
||||
ok = rpc:call(DonorNode, emqx_node_rebalance_evacuation, start, [#{}]),
|
||||
|
||||
?assertMatch(
|
||||
{ok, 503, _},
|
||||
api_get(["load_rebalance", "availability_check"])
|
||||
),
|
||||
|
||||
ok = rpc:call(DonorNode, emqx_node_rebalance_evacuation, stop, []),
|
||||
|
||||
?assertMatch(
|
||||
{ok, 200, #{}},
|
||||
api_get(["load_rebalance", "availability_check"])
|
||||
).
|
||||
|
||||
%%--------------------------------------------------------------------
|
||||
%% Helpers
|
||||
%%--------------------------------------------------------------------
|
||||
|
||||
api_get(Path) ->
|
||||
case request(get, uri(Path)) of
|
||||
{ok, Code, ResponseBody} ->
|
||||
{ok, Code, jiffy:decode(ResponseBody, [return_maps])};
|
||||
{error, _} = Error ->
|
||||
Error
|
||||
end.
|
||||
|
||||
api_post(Path, Data) ->
|
||||
case request(post, uri(Path), Data) of
|
||||
{ok, Code, ResponseBody} ->
|
||||
{ok, Code, jiffy:decode(ResponseBody, [return_maps])};
|
||||
{error, _} = Error ->
|
||||
Error
|
||||
end.
|
||||
|
||||
take_auth_header_from(Node) ->
|
||||
meck:new(emqx_common_test_http, [passthrough]),
|
||||
meck:expect(
|
||||
emqx_common_test_http,
|
||||
default_auth_header,
|
||||
fun() -> rpc:call(Node, emqx_common_test_http, default_auth_header, []) end
|
||||
),
|
||||
ok.
|
||||
|
||||
case_specific_data_dir(Case, Config) ->
|
||||
case ?config(priv_dir, Config) of
|
||||
undefined -> undefined;
|
||||
PrivDir -> filename:join(PrivDir, atom_to_list(Case))
|
||||
end.
|
|
@ -0,0 +1,291 @@
|
|||
%%--------------------------------------------------------------------
|
||||
%% Copyright (c) 2022-2023 EMQ Technologies Co., Ltd. All Rights Reserved.
|
||||
%%%--------------------------------------------------------------------
|
||||
|
||||
-module(emqx_node_rebalance_cli_SUITE).
|
||||
|
||||
-compile(export_all).
|
||||
-compile(nowarn_export_all).
|
||||
|
||||
-include_lib("eunit/include/eunit.hrl").
|
||||
-include_lib("common_test/include/ct.hrl").
|
||||
|
||||
-import(
|
||||
emqx_eviction_agent_test_helpers,
|
||||
[emqtt_connect_many/2, stop_many/1, case_specific_node_name/3]
|
||||
).
|
||||
|
||||
-define(START_APPS, [emqx_eviction_agent, emqx_node_rebalance]).
|
||||
|
||||
all() ->
|
||||
emqx_common_test_helpers:all(?MODULE).
|
||||
|
||||
init_per_suite(Config) ->
|
||||
emqx_common_test_helpers:start_apps(?START_APPS),
|
||||
Config.
|
||||
|
||||
end_per_suite(Config) ->
|
||||
emqx_common_test_helpers:stop_apps(lists:reverse(?START_APPS)),
|
||||
Config.
|
||||
|
||||
init_per_testcase(Case = t_rebalance, Config) ->
|
||||
_ = emqx_node_rebalance_evacuation:stop(),
|
||||
ClusterNodes = emqx_eviction_agent_test_helpers:start_cluster(
|
||||
[
|
||||
{case_specific_node_name(?MODULE, Case, '_donor'), 2883},
|
||||
{case_specific_node_name(?MODULE, Case, '_recipient'), 3883}
|
||||
],
|
||||
?START_APPS
|
||||
),
|
||||
[{cluster_nodes, ClusterNodes} | Config];
|
||||
init_per_testcase(_Case, Config) ->
|
||||
_ = emqx_node_rebalance_evacuation:stop(),
|
||||
_ = emqx_node_rebalance:stop(),
|
||||
Config.
|
||||
|
||||
end_per_testcase(t_rebalance, Config) ->
|
||||
_ = emqx_node_rebalance_evacuation:stop(),
|
||||
_ = emqx_node_rebalance:stop(),
|
||||
_ = emqx_eviction_agent_test_helpers:stop_cluster(
|
||||
?config(cluster_nodes, Config),
|
||||
?START_APPS
|
||||
);
|
||||
end_per_testcase(_Case, _Config) ->
|
||||
_ = emqx_node_rebalance_evacuation:stop(),
|
||||
_ = emqx_node_rebalance:stop().
|
||||
|
||||
%%--------------------------------------------------------------------
|
||||
%% Tests
|
||||
%%--------------------------------------------------------------------
|
||||
|
||||
t_evacuation(_Config) ->
|
||||
%% usage
|
||||
ok = emqx_node_rebalance_cli:cli(["foobar"]),
|
||||
|
||||
%% status
|
||||
ok = emqx_node_rebalance_cli:cli(["status"]),
|
||||
ok = emqx_node_rebalance_cli:cli(["node-status"]),
|
||||
ok = emqx_node_rebalance_cli:cli(["node-status", atom_to_list(node())]),
|
||||
|
||||
%% start with invalid args
|
||||
?assertNot(
|
||||
emqx_node_rebalance_cli:cli(["start", "--evacuation", "--foo-bar"])
|
||||
),
|
||||
|
||||
?assertNot(
|
||||
emqx_node_rebalance_cli:cli(["start", "--evacuation", "--conn-evict-rate", "foobar"])
|
||||
),
|
||||
|
||||
?assertNot(
|
||||
emqx_node_rebalance_cli:cli(["start", "--evacuation", "--sess-evict-rate", "foobar"])
|
||||
),
|
||||
|
||||
?assertNot(
|
||||
emqx_node_rebalance_cli:cli(["start", "--evacuation", "--wait-takeover", "foobar"])
|
||||
),
|
||||
|
||||
?assertNot(
|
||||
emqx_node_rebalance_cli:cli([
|
||||
"start",
|
||||
"--evacuation",
|
||||
"--migrate-to",
|
||||
"nonexistent@node"
|
||||
])
|
||||
),
|
||||
?assertNot(
|
||||
emqx_node_rebalance_cli:cli([
|
||||
"start",
|
||||
"--evacuation",
|
||||
"--migrate-to",
|
||||
""
|
||||
])
|
||||
),
|
||||
?assertNot(
|
||||
emqx_node_rebalance_cli:cli([
|
||||
"start",
|
||||
"--evacuation",
|
||||
"--unknown-arg"
|
||||
])
|
||||
),
|
||||
?assert(
|
||||
emqx_node_rebalance_cli:cli([
|
||||
"start",
|
||||
"--evacuation",
|
||||
"--conn-evict-rate",
|
||||
"10",
|
||||
"--sess-evict-rate",
|
||||
"10",
|
||||
"--wait-takeover",
|
||||
"10",
|
||||
"--migrate-to",
|
||||
atom_to_list(node()),
|
||||
"--redirect-to",
|
||||
"srv"
|
||||
])
|
||||
),
|
||||
|
||||
%% status
|
||||
ok = emqx_node_rebalance_cli:cli(["status"]),
|
||||
ok = emqx_node_rebalance_cli:cli(["node-status"]),
|
||||
ok = emqx_node_rebalance_cli:cli(["node-status", atom_to_list(node())]),
|
||||
|
||||
?assertMatch(
|
||||
{enabled, #{}},
|
||||
emqx_node_rebalance_evacuation:status()
|
||||
),
|
||||
|
||||
%% already enabled
|
||||
?assertNot(
|
||||
emqx_node_rebalance_cli:cli([
|
||||
"start",
|
||||
"--evacuation",
|
||||
"--conn-evict-rate",
|
||||
"10",
|
||||
"--redirect-to",
|
||||
"srv"
|
||||
])
|
||||
),
|
||||
|
||||
%% stop
|
||||
true = emqx_node_rebalance_cli:cli(["stop"]),
|
||||
|
||||
false = emqx_node_rebalance_cli:cli(["stop"]),
|
||||
|
||||
?assertEqual(
|
||||
disabled,
|
||||
emqx_node_rebalance_evacuation:status()
|
||||
).
|
||||
|
||||
t_rebalance(Config) ->
|
||||
process_flag(trap_exit, true),
|
||||
|
||||
[{DonorNode, DonorPort}, {RecipientNode, _}] = ?config(cluster_nodes, Config),
|
||||
|
||||
%% start with invalid args
|
||||
?assertNot(
|
||||
emqx_node_rebalance_cli(DonorNode, ["start", "--foo-bar"])
|
||||
),
|
||||
|
||||
?assertNot(
|
||||
emqx_node_rebalance_cli(DonorNode, ["start", "--conn-evict-rate", "foobar"])
|
||||
),
|
||||
|
||||
?assertNot(
|
||||
emqx_node_rebalance_cli(DonorNode, ["start", "--abs-conn-threshold", "foobar"])
|
||||
),
|
||||
|
||||
?assertNot(
|
||||
emqx_node_rebalance_cli(DonorNode, ["start", "--rel-conn-threshold", "foobar"])
|
||||
),
|
||||
|
||||
?assertNot(
|
||||
emqx_node_rebalance_cli(DonorNode, ["start", "--sess-evict-rate", "foobar"])
|
||||
),
|
||||
|
||||
?assertNot(
|
||||
emqx_node_rebalance_cli(DonorNode, ["start", "--abs-sess-threshold", "foobar"])
|
||||
),
|
||||
|
||||
?assertNot(
|
||||
emqx_node_rebalance_cli(DonorNode, ["start", "--rel-sess-threshold", "foobar"])
|
||||
),
|
||||
|
||||
?assertNot(
|
||||
emqx_node_rebalance_cli(DonorNode, ["start", "--wait-takeover", "foobar"])
|
||||
),
|
||||
|
||||
?assertNot(
|
||||
emqx_node_rebalance_cli(DonorNode, ["start", "--wait-health-check", "foobar"])
|
||||
),
|
||||
|
||||
?assertNot(
|
||||
emqx_node_rebalance_cli(DonorNode, [
|
||||
"start",
|
||||
"--nodes",
|
||||
"nonexistent@node"
|
||||
])
|
||||
),
|
||||
?assertNot(
|
||||
emqx_node_rebalance_cli(DonorNode, [
|
||||
"start",
|
||||
"--nodes",
|
||||
""
|
||||
])
|
||||
),
|
||||
?assertNot(
|
||||
emqx_node_rebalance_cli(DonorNode, [
|
||||
"start",
|
||||
"--nodes",
|
||||
atom_to_list(RecipientNode)
|
||||
])
|
||||
),
|
||||
?assertNot(
|
||||
emqx_node_rebalance_cli(DonorNode, [
|
||||
"start",
|
||||
"--unknown-arg"
|
||||
])
|
||||
),
|
||||
|
||||
Conns = emqtt_connect_many(DonorPort, 20),
|
||||
|
||||
?assert(
|
||||
emqx_node_rebalance_cli(DonorNode, [
|
||||
"start",
|
||||
"--conn-evict-rate",
|
||||
"10",
|
||||
"--abs-conn-threshold",
|
||||
"10",
|
||||
"--rel-conn-threshold",
|
||||
"1.1",
|
||||
"--sess-evict-rate",
|
||||
"10",
|
||||
"--abs-sess-threshold",
|
||||
"10",
|
||||
"--rel-sess-threshold",
|
||||
"1.1",
|
||||
"--wait-takeover",
|
||||
"10",
|
||||
"--nodes",
|
||||
atom_to_list(DonorNode) ++ "," ++
|
||||
atom_to_list(RecipientNode)
|
||||
])
|
||||
),
|
||||
|
||||
%% status
|
||||
ok = emqx_node_rebalance_cli(DonorNode, ["status"]),
|
||||
ok = emqx_node_rebalance_cli(DonorNode, ["node-status"]),
|
||||
ok = emqx_node_rebalance_cli(DonorNode, ["node-status", atom_to_list(DonorNode)]),
|
||||
|
||||
?assertMatch(
|
||||
{enabled, #{}},
|
||||
rpc:call(DonorNode, emqx_node_rebalance, status, [])
|
||||
),
|
||||
|
||||
%% already enabled
|
||||
?assertNot(
|
||||
emqx_node_rebalance_cli(DonorNode, ["start"])
|
||||
),
|
||||
|
||||
%% stop
|
||||
true = emqx_node_rebalance_cli(DonorNode, ["stop"]),
|
||||
|
||||
false = emqx_node_rebalance_cli(DonorNode, ["stop"]),
|
||||
|
||||
?assertEqual(
|
||||
disabled,
|
||||
rpc:call(DonorNode, emqx_node_rebalance, status, [])
|
||||
),
|
||||
|
||||
ok = stop_many(Conns).
|
||||
|
||||
%%--------------------------------------------------------------------
|
||||
%% Helpers
|
||||
%%--------------------------------------------------------------------
|
||||
|
||||
emqx_node_rebalance_cli(Node, Args) ->
|
||||
case rpc:call(Node, emqx_node_rebalance_cli, cli, [Args]) of
|
||||
{badrpc, Reason} ->
|
||||
error(Reason);
|
||||
Result ->
|
||||
Result
|
||||
end.
|
|
@ -0,0 +1,271 @@
|
|||
%%--------------------------------------------------------------------
|
||||
%% Copyright (c) 2022-2023 EMQ Technologies Co., Ltd. All Rights Reserved.
|
||||
%%--------------------------------------------------------------------
|
||||
|
||||
-module(emqx_node_rebalance_evacuation_SUITE).
|
||||
|
||||
-compile(export_all).
|
||||
-compile(nowarn_export_all).
|
||||
|
||||
-include_lib("emqx/include/emqx_mqtt.hrl").
|
||||
-include_lib("emqx/include/asserts.hrl").
|
||||
-include_lib("eunit/include/eunit.hrl").
|
||||
-include_lib("common_test/include/ct.hrl").
|
||||
-include_lib("snabbkaffe/include/snabbkaffe.hrl").
|
||||
|
||||
-import(
|
||||
emqx_eviction_agent_test_helpers,
|
||||
[emqtt_connect/1, emqtt_try_connect/1, case_specific_node_name/3]
|
||||
).
|
||||
|
||||
all() -> [{group, one_node}, {group, two_node}].
|
||||
|
||||
groups() ->
|
||||
[
|
||||
{one_node, [], [
|
||||
t_agent_busy,
|
||||
t_already_started,
|
||||
t_not_started,
|
||||
t_start,
|
||||
t_persistence,
|
||||
t_unknown_messages
|
||||
]},
|
||||
{two_node, [], [
|
||||
t_conn_evicted,
|
||||
t_migrate_to,
|
||||
t_session_evicted
|
||||
]}
|
||||
].
|
||||
|
||||
init_per_suite(Config) ->
|
||||
ok = emqx_common_test_helpers:start_apps([]),
|
||||
Config.
|
||||
|
||||
end_per_suite(_Config) ->
|
||||
ok = emqx_common_test_helpers:stop_apps([]),
|
||||
ok.
|
||||
|
||||
init_per_group(one_node, Config) ->
|
||||
[{cluster_type, one_node} | Config];
|
||||
init_per_group(two_node, Config) ->
|
||||
[{cluster_type, two_node} | Config].
|
||||
|
||||
end_per_group(_Group, _Config) ->
|
||||
ok.
|
||||
|
||||
init_per_testcase(Case, Config) ->
|
||||
NodesWithPorts =
|
||||
case ?config(cluster_type, Config) of
|
||||
one_node ->
|
||||
[{case_specific_node_name(?MODULE, Case, '_evacuated'), 2883}];
|
||||
two_node ->
|
||||
[
|
||||
{case_specific_node_name(?MODULE, Case, '_evacuated'), 2883},
|
||||
{case_specific_node_name(?MODULE, Case, '_recipient'), 3883}
|
||||
]
|
||||
end,
|
||||
ClusterNodes = emqx_eviction_agent_test_helpers:start_cluster(
|
||||
NodesWithPorts,
|
||||
[emqx_eviction_agent, emqx_node_rebalance],
|
||||
[{emqx, data_dir, case_specific_data_dir(Case, Config)}]
|
||||
),
|
||||
ok = snabbkaffe:start_trace(),
|
||||
[{cluster_nodes, ClusterNodes} | Config].
|
||||
|
||||
end_per_testcase(_Case, Config) ->
|
||||
ok = snabbkaffe:stop(),
|
||||
ok = emqx_eviction_agent_test_helpers:stop_cluster(
|
||||
?config(cluster_nodes, Config),
|
||||
[emqx_eviction_agent, emqx_node_rebalance]
|
||||
).
|
||||
|
||||
%%--------------------------------------------------------------------
|
||||
%% Tests
|
||||
%%--------------------------------------------------------------------
|
||||
|
||||
%% One node tests
|
||||
|
||||
t_agent_busy(Config) ->
|
||||
[{DonorNode, _DonorPort}] = ?config(cluster_nodes, Config),
|
||||
ok = rpc:call(DonorNode, emqx_eviction_agent, enable, [other_rebalance, undefined]),
|
||||
|
||||
?assertEqual(
|
||||
{error, eviction_agent_busy},
|
||||
rpc:call(DonorNode, emqx_node_rebalance_evacuation, start, [opts(Config)])
|
||||
).
|
||||
|
||||
t_already_started(Config) ->
|
||||
[{DonorNode, _DonorPort}] = ?config(cluster_nodes, Config),
|
||||
ok = rpc:call(DonorNode, emqx_node_rebalance_evacuation, start, [opts(Config)]),
|
||||
|
||||
?assertEqual(
|
||||
{error, already_started},
|
||||
rpc:call(DonorNode, emqx_node_rebalance_evacuation, start, [opts(Config)])
|
||||
).
|
||||
|
||||
t_not_started(Config) ->
|
||||
[{DonorNode, _DonorPort}] = ?config(cluster_nodes, Config),
|
||||
|
||||
?assertEqual(
|
||||
{error, not_started},
|
||||
rpc:call(DonorNode, emqx_node_rebalance_evacuation, stop, [])
|
||||
).
|
||||
|
||||
t_start(Config) ->
|
||||
process_flag(trap_exit, true),
|
||||
|
||||
[{DonorNode, DonorPort}] = ?config(cluster_nodes, Config),
|
||||
|
||||
ok = rpc:call(DonorNode, emqx_node_rebalance_evacuation, start, [opts(Config)]),
|
||||
?assertMatch(
|
||||
{error, {use_another_server, #{}}},
|
||||
emqtt_try_connect([{port, DonorPort}])
|
||||
).
|
||||
|
||||
t_persistence(Config) ->
|
||||
process_flag(trap_exit, true),
|
||||
|
||||
[{DonorNode, DonorPort}] = ?config(cluster_nodes, Config),
|
||||
|
||||
ok = rpc:call(DonorNode, emqx_node_rebalance_evacuation, start, [opts(Config)]),
|
||||
|
||||
?assertMatch(
|
||||
{error, {use_another_server, #{}}},
|
||||
emqtt_try_connect([{port, DonorPort}])
|
||||
),
|
||||
|
||||
ok = rpc:call(DonorNode, supervisor, terminate_child, [
|
||||
emqx_node_rebalance_sup, emqx_node_rebalance_evacuation
|
||||
]),
|
||||
{ok, _} = rpc:call(DonorNode, supervisor, restart_child, [
|
||||
emqx_node_rebalance_sup, emqx_node_rebalance_evacuation
|
||||
]),
|
||||
|
||||
?assertMatch(
|
||||
{error, {use_another_server, #{}}},
|
||||
emqtt_try_connect([{port, DonorPort}])
|
||||
),
|
||||
?assertMatch(
|
||||
{enabled, #{conn_evict_rate := 10}},
|
||||
rpc:call(DonorNode, emqx_node_rebalance_evacuation, status, [])
|
||||
).
|
||||
|
||||
t_unknown_messages(Config) ->
|
||||
process_flag(trap_exit, true),
|
||||
|
||||
[{DonorNode, _DonorPort}] = ?config(cluster_nodes, Config),
|
||||
|
||||
ok = rpc:call(DonorNode, emqx_node_rebalance_evacuation, start, [opts(Config)]),
|
||||
|
||||
Pid = rpc:call(DonorNode, erlang, whereis, [emqx_node_rebalance_evacuation]),
|
||||
|
||||
Pid ! unknown,
|
||||
|
||||
ok = gen_server:cast(Pid, unknown),
|
||||
|
||||
?assertEqual(
|
||||
ignored,
|
||||
gen_server:call(Pid, unknown)
|
||||
).
|
||||
|
||||
%% Two node tests
|
||||
|
||||
t_conn_evicted(Config) ->
|
||||
process_flag(trap_exit, true),
|
||||
|
||||
[{DonorNode, DonorPort}, _] = ?config(cluster_nodes, Config),
|
||||
|
||||
{ok, C} = emqtt_connect([{clientid, <<"evacuated">>}, {port, DonorPort}]),
|
||||
|
||||
?assertWaitEvent(
|
||||
ok = rpc:call(DonorNode, emqx_node_rebalance_evacuation, start, [opts(Config)]),
|
||||
#{?snk_kind := node_evacuation_evict_conn},
|
||||
1000
|
||||
),
|
||||
|
||||
?assertMatch(
|
||||
{error, {use_another_server, #{}}},
|
||||
emqtt_try_connect([{clientid, <<"connecting">>}, {port, DonorPort}])
|
||||
),
|
||||
|
||||
receive
|
||||
{'EXIT', C, {disconnected, 156, _}} -> ok
|
||||
after 1000 ->
|
||||
ct:fail("Connection not evicted")
|
||||
end.
|
||||
|
||||
t_migrate_to(Config) ->
|
||||
[{DonorNode, _DonorPort}, {RecipientNode, _RecipientPort}] = ?config(cluster_nodes, Config),
|
||||
|
||||
?assertEqual(
|
||||
[RecipientNode],
|
||||
rpc:call(DonorNode, emqx_node_rebalance_evacuation, migrate_to, [undefined])
|
||||
),
|
||||
|
||||
?assertEqual(
|
||||
[],
|
||||
rpc:call(DonorNode, emqx_node_rebalance_evacuation, migrate_to, [['unknown@node']])
|
||||
),
|
||||
|
||||
ok = rpc:call(RecipientNode, emqx_eviction_agent, enable, [test_rebalance, undefined]),
|
||||
|
||||
?assertEqual(
|
||||
[],
|
||||
rpc:call(DonorNode, emqx_node_rebalance_evacuation, migrate_to, [undefined])
|
||||
).
|
||||
|
||||
t_session_evicted(Config) ->
|
||||
process_flag(trap_exit, true),
|
||||
|
||||
[{DonorNode, DonorPort}, {RecipientNode, _RecipientPort}] = ?config(cluster_nodes, Config),
|
||||
|
||||
{ok, C} = emqtt_connect([
|
||||
{port, DonorPort}, {clientid, <<"client_with_sess">>}, {clean_start, false}
|
||||
]),
|
||||
|
||||
?assertWaitEvent(
|
||||
ok = rpc:call(DonorNode, emqx_node_rebalance_evacuation, start, [opts(Config)]),
|
||||
#{?snk_kind := node_evacuation_evict_sess_over},
|
||||
5000
|
||||
),
|
||||
|
||||
receive
|
||||
{'EXIT', C, {disconnected, ?RC_USE_ANOTHER_SERVER, _}} -> ok
|
||||
after 1000 ->
|
||||
ct:fail("Connection not evicted")
|
||||
end,
|
||||
|
||||
[ChannelPid] = rpc:call(DonorNode, emqx_cm_registry, lookup_channels, [<<"client_with_sess">>]),
|
||||
|
||||
?assertEqual(
|
||||
RecipientNode,
|
||||
node(ChannelPid)
|
||||
).
|
||||
|
||||
%%--------------------------------------------------------------------
|
||||
%% Helpers
|
||||
%%--------------------------------------------------------------------
|
||||
|
||||
opts(Config) ->
|
||||
#{
|
||||
server_reference => <<"srv">>,
|
||||
conn_evict_rate => 10,
|
||||
sess_evict_rate => 10,
|
||||
wait_takeover => 1,
|
||||
migrate_to => migrate_to(Config)
|
||||
}.
|
||||
|
||||
migrate_to(Config) ->
|
||||
case ?config(cluster_type, Config) of
|
||||
one_node ->
|
||||
[];
|
||||
two_node ->
|
||||
[_, {RecipientNode, _RecipientPort}] = ?config(cluster_nodes, Config),
|
||||
[RecipientNode]
|
||||
end.
|
||||
|
||||
case_specific_data_dir(Case, Config) ->
|
||||
case ?config(priv_dir, Config) of
|
||||
undefined -> undefined;
|
||||
PrivDir -> filename:join(PrivDir, atom_to_list(Case))
|
||||
end.
|
|
@ -0,0 +1,108 @@
|
|||
%%--------------------------------------------------------------------
|
||||
%% Copyright (c) 2022-2023 EMQ Technologies Co., Ltd. All Rights Reserved.
|
||||
%%--------------------------------------------------------------------
|
||||
|
||||
-module(emqx_node_rebalance_evacuation_persist_SUITE).
|
||||
|
||||
-compile(export_all).
|
||||
-compile(nowarn_export_all).
|
||||
|
||||
-include_lib("eunit/include/eunit.hrl").
|
||||
-include_lib("common_test/include/ct.hrl").
|
||||
|
||||
all() ->
|
||||
emqx_common_test_helpers:all(?MODULE).
|
||||
|
||||
init_per_suite(Config) ->
|
||||
Config.
|
||||
|
||||
end_per_suite(_Config) ->
|
||||
ok.
|
||||
|
||||
init_per_testcase(_Case, Config) ->
|
||||
_ = emqx_node_rebalance_evacuation_persist:clear(),
|
||||
Config.
|
||||
|
||||
end_per_testcase(_Case, _Config) ->
|
||||
_ = emqx_node_rebalance_evacuation_persist:clear().
|
||||
|
||||
%%--------------------------------------------------------------------
|
||||
%% Tests
|
||||
%%--------------------------------------------------------------------
|
||||
|
||||
t_save_read(_Config) ->
|
||||
DefaultOpts = #{
|
||||
server_reference => <<"default_ref">>,
|
||||
conn_evict_rate => 2001,
|
||||
sess_evict_rate => 2002,
|
||||
wait_takeover => 2003
|
||||
},
|
||||
|
||||
Opts0 = #{
|
||||
server_reference => <<"ref">>,
|
||||
conn_evict_rate => 1001,
|
||||
sess_evict_rate => 1002,
|
||||
wait_takeover => 1003
|
||||
},
|
||||
ok = emqx_node_rebalance_evacuation_persist:save(Opts0),
|
||||
|
||||
{ok, ReadOpts0} = emqx_node_rebalance_evacuation_persist:read(DefaultOpts),
|
||||
?assertEqual(Opts0, ReadOpts0),
|
||||
|
||||
Opts1 = Opts0#{server_reference => undefined},
|
||||
ok = emqx_node_rebalance_evacuation_persist:save(Opts1),
|
||||
|
||||
{ok, ReadOpts1} = emqx_node_rebalance_evacuation_persist:read(DefaultOpts),
|
||||
?assertEqual(Opts1, ReadOpts1).
|
||||
|
||||
t_read_default(_Config) ->
|
||||
ok = write_evacuation_file(<<"{}">>),
|
||||
|
||||
DefaultOpts = #{
|
||||
server_reference => <<"ref">>,
|
||||
conn_evict_rate => 1001,
|
||||
sess_evict_rate => 1002,
|
||||
wait_takeover => 1003
|
||||
},
|
||||
|
||||
{ok, ReadOpts} = emqx_node_rebalance_evacuation_persist:read(DefaultOpts),
|
||||
?assertEqual(DefaultOpts, ReadOpts).
|
||||
|
||||
t_read_bad_data(_Config) ->
|
||||
ok = write_evacuation_file(<<"{bad json">>),
|
||||
|
||||
DefaultOpts = #{
|
||||
server_reference => <<"ref">>,
|
||||
conn_evict_rate => 1001,
|
||||
sess_evict_rate => 1002,
|
||||
wait_takeover => 1003
|
||||
},
|
||||
|
||||
{ok, ReadOpts} = emqx_node_rebalance_evacuation_persist:read(DefaultOpts),
|
||||
?assertEqual(DefaultOpts, ReadOpts).
|
||||
|
||||
t_clear(_Config) ->
|
||||
ok = write_evacuation_file(<<"{}">>),
|
||||
|
||||
?assertMatch(
|
||||
{ok, _},
|
||||
emqx_node_rebalance_evacuation_persist:read(#{})
|
||||
),
|
||||
|
||||
ok = emqx_node_rebalance_evacuation_persist:clear(),
|
||||
|
||||
?assertEqual(
|
||||
none,
|
||||
emqx_node_rebalance_evacuation_persist:read(#{})
|
||||
).
|
||||
|
||||
%%--------------------------------------------------------------------
|
||||
%% Helpers
|
||||
%%--------------------------------------------------------------------
|
||||
|
||||
write_evacuation_file(Json) ->
|
||||
ok = filelib:ensure_dir(emqx_node_rebalance_evacuation_persist:evacuation_filepath()),
|
||||
ok = file:write_file(
|
||||
emqx_node_rebalance_evacuation_persist:evacuation_filepath(),
|
||||
Json
|
||||
).
|
4
mix.exs
4
mix.exs
|
@ -402,7 +402,9 @@ defmodule EMQXUmbrella.MixProject do
|
|||
emqx_oracle: :permanent,
|
||||
emqx_bridge_oracle: :permanent,
|
||||
emqx_bridge_rabbitmq: :permanent,
|
||||
emqx_ee_schema_registry: :permanent
|
||||
emqx_ee_schema_registry: :permanent,
|
||||
emqx_eviction_agent: :permanent,
|
||||
emqx_node_rebalance: :permanent
|
||||
],
|
||||
else: []
|
||||
)
|
||||
|
|
|
@ -478,7 +478,9 @@ relx_apps_per_edition(ee) ->
|
|||
emqx_oracle,
|
||||
emqx_bridge_oracle,
|
||||
emqx_bridge_rabbitmq,
|
||||
emqx_ee_schema_registry
|
||||
emqx_ee_schema_registry,
|
||||
emqx_eviction_agent,
|
||||
emqx_node_rebalance
|
||||
];
|
||||
relx_apps_per_edition(ce) ->
|
||||
[].
|
||||
|
|
Loading…
Reference in New Issue