feat(node_rebalance): implement node evacuation and rebalancing

This commit is contained in:
Ilya Averyanov 2022-04-28 15:58:17 +03:00
parent 9064b5acb8
commit a19fbe214f
56 changed files with 4668 additions and 130 deletions

View File

@ -21,6 +21,7 @@ File format:
- Improve error message for LwM2M plugin when object ID is not valid [#8654](https://github.com/emqx/emqx/pull/8654).
- Add tzdata apk package to alpine docker image. [#8671](https://github.com/emqx/emqx/pull/8671)
- Add node evacuation and cluster rebalancing features [#8597]
## v4.3.19
@ -55,6 +56,7 @@ File format:
- HTTP API(GET /rules/) support for pagination and fuzzy filtering. [#8450]
- Add check_conf cli to check config format. [#8486]
- Optimize performance of shared subscription
- Make possible to debug-print SSL handshake procedure by setting listener config `log_level=debug` [#8553](https://github.com/emqx/emqx/pull/8553)
## v4.3.16

19
apps/emqx_eviction_agent/.gitignore vendored Normal file
View File

@ -0,0 +1,19 @@
.rebar3
_*
.eunit
*.o
*.beam
*.plt
*.swp
*.swo
.erlang.cookie
ebin
log
erl_crash.dump
.rebar
logs
_build
.idea
*.iml
rebar3.crashdump
*~

View File

@ -0,0 +1,9 @@
emqx_eviction_agent
=====
An OTP library
Build
-----
$ rebar3 compile

View File

@ -0,0 +1,3 @@
##--------------------------------------------------------------------
## EMQX Eviction Agent Plugin
##--------------------------------------------------------------------

View File

@ -0,0 +1,2 @@
{erl_opts, [debug_info]}.
{deps, []}.

View File

@ -0,0 +1,18 @@
{application, emqx_eviction_agent,
[{description, "EMQX Eviction Agent"},
{vsn, "4.3.0"},
{registered, [emqx_eviction_agent_sup,
emqx_eviction_agent,
emqx_eviction_agent_conn_sup]},
{applications,
[kernel,
stdlib
]},
{mod, {emqx_eviction_agent_app,[]}},
{env,[]},
{modules, []},
{maintainers, ["EMQX Team <contact@emqx.io>"]},
{links, [{"Homepage", "https://emqx.io/"},
{"Github", "https://github.com/emqx"}
]}
]}.

View File

@ -0,0 +1,291 @@
%%--------------------------------------------------------------------
%% Copyright (c) 2022 EMQ Technologies Co., Ltd. All Rights Reserved.
%%
%% Licensed under the Apache License, Version 2.0 (the "License");
%% you may not use this file except in compliance with the License.
%% You may obtain a copy of the License at
%%
%% http://www.apache.org/licenses/LICENSE-2.0
%%
%% Unless required by applicable law or agreed to in writing, software
%% distributed under the License is distributed on an "AS IS" BASIS,
%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
%% See the License for the specific language governing permissions and
%% limitations under the License.
%%--------------------------------------------------------------------
-module(emqx_eviction_agent).
-include_lib("emqx/include/emqx_mqtt.hrl").
-include_lib("emqx/include/logger.hrl").
-include_lib("emqx/include/types.hrl").
-include_lib("stdlib/include/qlc.hrl").
-include_lib("snabbkaffe/include/snabbkaffe.hrl").
-export([start_link/0,
enable/2,
disable/1,
status/0,
connection_count/0,
session_count/0,
session_count/1,
evict_connections/1,
evict_sessions/2,
evict_sessions/3,
evict_session_channel/3
]).
-behaviour(gen_server).
-export([init/1,
handle_call/3,
handle_info/2,
handle_cast/2,
code_change/3
]).
-export([on_connect/2,
on_connack/3]).
-export([hook/0,
unhook/0]).
-export_type([server_reference/0]).
%%--------------------------------------------------------------------
%% APIs
%%--------------------------------------------------------------------
-type server_reference() :: binary() | undefined.
-type status() :: {enabled, conn_stats()} | disabled.
-type conn_stats() :: #{connections := non_neg_integer(),
sessions := non_neg_integer()}.
-type kind() :: atom().
-spec start_link() -> startlink_ret().
start_link() ->
gen_server:start_link({local, ?MODULE}, ?MODULE, [], []).
-spec enable(kind(), server_reference()) -> ok_or_error(eviction_agent_busy).
enable(Kind, ServerReference) ->
gen_server:call(?MODULE, {enable, Kind, ServerReference}).
-spec disable(kind()) -> ok.
disable(Kind) ->
gen_server:call(?MODULE, {disable, Kind}).
-spec status() -> status().
status() ->
case enable_status() of
{enabled, _Kind, _ServerReference} ->
{enabled, stats()};
disabled ->
disabled
end.
-spec evict_connections(pos_integer()) -> ok_or_error(disabled).
evict_connections(N) ->
case enable_status() of
{enabled, _Kind, ServerReference} ->
ok = do_evict_connections(N, ServerReference);
disabled ->
{error, disabled}
end.
-spec evict_sessions(pos_integer(), node() | [node()]) -> ok_or_error(disabled).
evict_sessions(N, Node) when is_atom(Node) ->
evict_sessions(N, [Node]);
evict_sessions(N, Nodes) when is_list(Nodes) andalso length(Nodes) > 0 ->
evict_sessions(N, Nodes, any).
-spec evict_sessions(pos_integer(), node() | [node()], atom()) -> ok_or_error(disabled).
evict_sessions(N, Node, ConnState) when is_atom(Node) ->
evict_sessions(N, [Node], ConnState);
evict_sessions(N, Nodes, ConnState)
when is_list(Nodes) andalso length(Nodes) > 0 ->
case enable_status() of
{enabled, _Kind, _ServerReference} ->
ok = do_evict_sessions(N, Nodes, ConnState);
disabled ->
{error, disabled}
end.
%%--------------------------------------------------------------------
%% gen_server callbacks
%%--------------------------------------------------------------------
init([]) ->
_ = persistent_term:erase(?MODULE),
{ok, #{}}.
%% enable
handle_call({enable, Kind, ServerReference}, _From, St) ->
Reply = case enable_status() of
disabled ->
ok = persistent_term:put(?MODULE, {enabled, Kind, ServerReference});
{enabled, Kind, _ServerReference} ->
ok = persistent_term:put(?MODULE, {enabled, Kind, ServerReference});
{enabled, _OtherKind, _ServerReference} ->
{error, eviction_agent_busy}
end,
{reply, Reply, St};
%% disable
handle_call({disable, Kind}, _From, St) ->
Reply = case enable_status() of
disabled ->
{error, disabled};
{enabled, Kind, _ServerReference} ->
_ = persistent_term:erase(?MODULE),
ok;
{enabled, _OtherKind, _ServerReference} ->
{error, eviction_agent_busy}
end,
{reply, Reply, St}.
handle_info(Msg, St) ->
?LOG(warning, "Unknown Msg: ~p, State: ~p", [Msg, St]),
{noreply, St}.
handle_cast(Msg, St) ->
?LOG(warning, "Unknown cast Msg: ~p, State: ~p", [Msg, St]),
{noreply, St}.
code_change(_Vsn, State, _Extra) ->
{ok, State}.
%%--------------------------------------------------------------------
%% Hook callbacks
%%--------------------------------------------------------------------
on_connect(_ConnInfo, _Props) ->
case enable_status() of
{enabled, _Kind, _ServerReference} ->
{stop, {error, ?RC_USE_ANOTHER_SERVER}};
disabled ->
ignore
end.
on_connack(#{proto_name := <<"MQTT">>, proto_ver := ?MQTT_PROTO_V5},
use_another_server,
Props) ->
case enable_status() of
{enabled, _Kind, ServerReference} ->
{ok, Props#{'Server-Reference' => ServerReference}};
disabled ->
{ok, Props}
end;
on_connack(_ClientInfo, _Reason, Props) ->
{ok, Props}.
%%--------------------------------------------------------------------
%% Hook funcs
%%--------------------------------------------------------------------
hook() ->
?tp(debug, eviction_agent_hook, #{}),
ok = emqx_hooks:put('client.connack', {?MODULE, on_connack, []}),
ok = emqx_hooks:put('client.connect', {?MODULE, on_connect, []}).
unhook() ->
?tp(debug, eviction_agent_unhook, #{}),
ok = emqx_hooks:del('client.connect', {?MODULE, on_connect}),
ok = emqx_hooks:del('client.connack', {?MODULE, on_connack}).
enable_status() ->
persistent_term:get(?MODULE, disabled).
% connection management
stats() ->
#{
connections => connection_count(),
sessions => session_count()
}.
connection_table() ->
emqx_cm:live_connection_table().
connection_count() ->
table_count(connection_table()).
channel_with_session_table(any) ->
qlc:q([{ClientId, ConnInfo, ClientInfo}
|| {ClientId, _, ConnInfo, ClientInfo} <- emqx_cm:channel_with_session_table()]);
channel_with_session_table(RequiredConnState) ->
qlc:q([{ClientId, ConnInfo, ClientInfo}
|| {ClientId, ConnState, ConnInfo, ClientInfo} <- emqx_cm:channel_with_session_table(),
RequiredConnState =:= ConnState]).
session_count() ->
session_count(any).
session_count(ConnState) ->
table_count(channel_with_session_table(ConnState)).
table_count(QH) ->
qlc:fold(fun(_, Acc) -> Acc + 1 end, 0, QH).
take_connections(N) ->
ChanQH = qlc:q([ChanPid || {_ClientId, ChanPid} <- connection_table()]),
ChanPidCursor = qlc:cursor(ChanQH),
ChanPids = qlc:next_answers(ChanPidCursor, N),
ok = qlc:delete_cursor(ChanPidCursor),
ChanPids.
take_channel_with_sessions(N, ConnState) ->
ChanPidCursor = qlc:cursor(channel_with_session_table(ConnState)),
Channels = qlc:next_answers(ChanPidCursor, N),
ok = qlc:delete_cursor(ChanPidCursor),
Channels.
do_evict_connections(N, ServerReference) when N > 0 ->
ChanPids = take_connections(N),
ok = lists:foreach(
fun(ChanPid) ->
disconnect_channel(ChanPid, ServerReference)
end,
ChanPids).
do_evict_sessions(N, Nodes, ConnState) when N > 0 ->
Channels = take_channel_with_sessions(N, ConnState),
ok = lists:foreach(
fun({ClientId, ConnInfo, ClientInfo}) ->
evict_session_channel(Nodes, ClientId, ConnInfo, ClientInfo)
end,
Channels).
evict_session_channel(Nodes, ClientId, ConnInfo, ClientInfo) ->
Node = select_random(Nodes),
?LOG(info, "Evicting client=~p to node=~p, conninfo=~p, clientinfo=~p",
[ClientId, Node, ConnInfo, ClientInfo]),
case rpc:call(Node, ?MODULE, evict_session_channel, [ClientId, ConnInfo, ClientInfo]) of
{badrpc, Reason} ->
?LOG(error, "RPC error while evicting client=~p to node=~p: ~p",
[ClientId, Node, Reason]),
{error, Reason};
{error, Reason} = Error ->
?LOG(error, "Error evicting client=~p to node=~p: ~p",
[ClientId, Node, Reason]),
Error;
Res -> Res
end.
evict_session_channel(ClientId, ConnInfo, ClientInfo) ->
?LOG(info, "Taking up client=~p, conninfo=~p, clientinfo=~p",
[ClientId, ConnInfo, ClientInfo]),
Result = emqx_eviction_agent_channel:start_supervised(
#{conninfo => ConnInfo,
clientinfo => ClientInfo}),
?LOG(info, "Taking up client=~p, result=~p",
[ClientId, Result]),
Result.
disconnect_channel(ChanPid, ServerReference) ->
ChanPid ! {disconnect,
?RC_USE_ANOTHER_SERVER,
use_another_server,
#{'Server-Reference' => ServerReference}}.
select_random(List) when length(List) > 0 ->
lists:nth(rand:uniform(length(List)) , List).

View File

@ -0,0 +1,36 @@
%%--------------------------------------------------------------------
%% Copyright (c) 2022 EMQ Technologies Co., Ltd. All Rights Reserved.
%%
%% Licensed under the Apache License, Version 2.0 (the "License");
%% you may not use this file except in compliance with the License.
%% You may obtain a copy of the License at
%%
%% http://www.apache.org/licenses/LICENSE-2.0
%%
%% Unless required by applicable law or agreed to in writing, software
%% distributed under the License is distributed on an "AS IS" BASIS,
%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
%% See the License for the specific language governing permissions and
%% limitations under the License.
%%--------------------------------------------------------------------
-module(emqx_eviction_agent_api).
-include_lib("emqx/include/logger.hrl").
-rest_api(#{name => node_eviction_status,
method => 'GET',
path => "/node_eviction/status",
func => status,
descr => "Get node eviction status"}).
-export([status/2]).
status(_Bindings, _Params) ->
case emqx_eviction_agent:status() of
disabled ->
{ok, #{status => disabled}};
{enabled, Stats} ->
{ok, #{status => enabled,
stats => Stats}}
end.

View File

@ -0,0 +1,36 @@
%%--------------------------------------------------------------------
%% Copyright (c) 2022 EMQ Technologies Co., Ltd. All Rights Reserved.
%%
%% Licensed under the Apache License, Version 2.0 (the "License");
%% you may not use this file except in compliance with the License.
%% You may obtain a copy of the License at
%%
%% http://www.apache.org/licenses/LICENSE-2.0
%%
%% Unless required by applicable law or agreed to in writing, software
%% distributed under the License is distributed on an "AS IS" BASIS,
%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
%% See the License for the specific language governing permissions and
%% limitations under the License.
%%--------------------------------------------------------------------
-module(emqx_eviction_agent_app).
-behaviour(application).
-emqx_plugin(?MODULE).
-export([ start/2
, stop/1
]).
start(_Type, _Args) ->
Env = application:get_all_env(emqx_eviction_agent),
ok = emqx_eviction_agent:hook(),
{ok, Sup} = emqx_eviction_agent_sup:start_link(Env),
ok = emqx_eviction_agent_cli:load(),
{ok, Sup}.
stop(_State) ->
ok = emqx_eviction_agent:unhook(),
ok = emqx_eviction_agent_cli:unload().

View File

@ -0,0 +1,299 @@
%%--------------------------------------------------------------------
%% Copyright (c) 2022 EMQ Technologies Co., Ltd. All Rights Reserved.
%%
%% Licensed under the Apache License, Version 2.0 (the "License");
%% you may not use this file except in compliance with the License.
%% You may obtain a copy of the License at
%%
%% http://www.apache.org/licenses/LICENSE-2.0
%%
%% Unless required by applicable law or agreed to in writing, software
%% distributed under the License is distributed on an "AS IS" BASIS,
%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
%% See the License for the specific language governing permissions and
%% limitations under the License.
%%--------------------------------------------------------------------
%% MQTT Channel
-module(emqx_eviction_agent_channel).
-include_lib("emqx/include/emqx.hrl").
-include_lib("emqx/include/emqx_mqtt.hrl").
-include_lib("emqx/include/logger.hrl").
-include_lib("emqx/include/types.hrl").
-include_lib("snabbkaffe/include/snabbkaffe.hrl").
-logger_header("[Evicted Channel]").
-export([start_link/1,
start_supervised/1,
call/2,
call/3,
cast/2,
stop/1
]).
-export([init/1,
handle_call/3,
handle_cast/2,
handle_info/2,
terminate/2,
code_change/3
]).
-type opts() :: #{conninfo := emqx_types:conninfo(),
clientinfo := emqx_types:clientinfo()}.
%%--------------------------------------------------------------------
%% API
%%--------------------------------------------------------------------
-spec start_supervised(opts()) -> startlink_ret().
start_supervised(#{clientinfo := #{clientid := ClientId}} = Opts) ->
RandomId = integer_to_binary(erlang:unique_integer([positive])),
Id = <<ClientId/binary, "-", RandomId/binary>>,
ChildSpec = #{id => Id,
start => {?MODULE, start_link, [Opts]},
restart => temporary,
shutdown => 5000,
type => worker,
modules => [?MODULE]
},
supervisor:start_child(
emqx_eviction_agent_conn_sup,
ChildSpec).
-spec start_link(opts()) -> startlink_ret().
start_link(Opts) ->
gen_server:start_link(?MODULE, [Opts], []).
-spec cast(pid(), term()) -> ok.
cast(Pid, Req) ->
gen_server:cast(Pid, Req).
-spec call(pid(), term()) -> term().
call(Pid, Req) ->
call(Pid, Req, infinity).
-spec call(pid(), term(), timeout()) -> term().
call(Pid, Req, Timeout) ->
gen_server:call(Pid, Req, Timeout).
-spec stop(pid()) -> ok.
stop(Pid) ->
gen_server:stop(Pid).
%%--------------------------------------------------------------------
%% gen_server API
%%--------------------------------------------------------------------
init([#{conninfo := OldConnInfo, clientinfo := #{clientid := ClientId} = OldClientInfo}]) ->
process_flag(trap_exit, true),
ClientInfo = clientinfo(OldClientInfo),
ConnInfo = conninfo(OldConnInfo),
case open_session(ConnInfo, ClientInfo) of
{ok, Channel0} ->
case set_expiry_timer(Channel0) of
{ok, Channel1} ->
?LOG(
info,
"Channel initialized for client=~p on node=~p",
[ClientId, node()]),
{ok, Channel1, hibernate};
{error, Reason} ->
{stop, Reason}
end;
{error, Reason} ->
{stop, Reason}
end.
handle_call(kick, _From, Channel) ->
{stop, kicked, ok, Channel};
handle_call(discard, _From, Channel) ->
{stop, discarded, ok, Channel};
handle_call({takeover, 'begin'}, _From, #{session := Session} = Channel) ->
{reply, Session, Channel#{takeover => true}};
handle_call({takeover, 'end'}, _From, #{session := Session,
clientinfo := #{clientid := ClientId},
pendings := Pendings} = Channel) ->
ok = emqx_session:takeover(Session),
%% TODO: Should not drain deliver here (side effect)
Delivers = emqx_misc:drain_deliver(),
AllPendings = lists:append(Delivers, Pendings),
?tp(debug,
emqx_channel_takeover_end,
#{clientid => ClientId}),
{stop, normal, AllPendings, Channel};
handle_call(list_acl_cache, _From, Channel) ->
{reply, [], Channel};
handle_call({quota, _Policy}, _From, Channel) ->
{reply, ok, Channel};
handle_call(Req, _From, Channel) ->
?LOG(error, "Unexpected call: ~p", [Req]),
{reply, ignored, Channel}.
handle_info(Deliver = {deliver, _Topic, _Msg}, Channel) ->
Delivers = [Deliver | emqx_misc:drain_deliver()],
{noreply, handle_deliver(Delivers, Channel)};
handle_info(expire_session, Channel) ->
{stop, expired, Channel};
handle_info(Info, Channel) ->
?LOG(error, "Unexpected info: ~p", [Info]),
{noreply, Channel}.
handle_cast(Msg, Channel) ->
?LOG(error, "Unexpected cast: ~p", [Msg]),
{noreply, Channel}.
terminate(Reason, #{clientinfo := ClientInfo, session := Session} = Channel) ->
ok = cancel_expiry_timer(Channel),
emqx_session:terminate(ClientInfo, Reason, Session).
code_change(_OldVsn, Channel, _Extra) ->
{ok, Channel}.
%%--------------------------------------------------------------------
%% Internal functions
%%--------------------------------------------------------------------
handle_deliver(Delivers,
#{takeover := true,
pendings := Pendings,
session := Session,
clientinfo := #{clientid := ClientId} = ClientInfo} = Channel) ->
%% NOTE: Order is important here. While the takeover is in
%% progress, the session cannot enqueue messages, since it already
%% passed on the queue to the new connection in the session state.
NPendings = lists:append(
Pendings,
ignore_local(ClientInfo, maybe_nack(Delivers), ClientId, Session)),
Channel#{pendings => NPendings};
handle_deliver(Delivers,
#{takeover := false,
session := Session,
clientinfo := #{clientid := ClientId} = ClientInfo} = Channel) ->
NSession = emqx_session:enqueue(
ClientInfo,
ignore_local(ClientInfo, maybe_nack(Delivers), ClientId, Session),
Session),
Channel#{session => NSession}.
cancel_expiry_timer(#{expiry_timer := TRef}) when is_reference(TRef) ->
_ = erlang:cancel_timer(TRef),
ok;
cancel_expiry_timer(_) ->
ok.
set_expiry_timer(#{conninfo := ConnInfo} = Channel) ->
case maps:get(expiry_interval, ConnInfo) of
?UINT_MAX -> {ok, Channel};
I when I > 0 ->
Timer = erlang:send_after(timer:seconds(I), self(), expire_session),
{ok, Channel#{expiry_timer => Timer}};
_ ->
{error, should_be_expired}
end.
open_session(ConnInfo, #{clientid := ClientId} = ClientInfo) ->
Channel = channel(ConnInfo, ClientInfo),
case emqx_cm:open_session(false, ClientInfo, ConnInfo) of
{ok, #{present := false}} ->
?LOG(info, "No session for clientid=~p", [ClientId]),
{error, no_session};
{ok, #{session := Session, present := true, pendings := Pendings0}} ->
?LOG(info, "Session opened for client=~p on node=~p", [ClientId, node()]),
Pendings1 = lists:usort(lists:append(Pendings0, emqx_misc:drain_deliver())),
NSession = emqx_session:enqueue(
ClientInfo,
ignore_local(
ClientInfo,
maybe_nack(Pendings1),
ClientId,
Session),
Session),
NChannel = Channel#{session => NSession},
ok = emqx_cm:insert_channel_info(ClientId, info(NChannel), []),
?LOG(info, "Channel info updated for client=~p on node=~p", [ClientId, node()]),
{ok, NChannel};
{error, Reason} = Error ->
?LOG(error, "Failed to open session due to ~p", [Reason]),
Error
end.
conninfo(OldConnInfo) ->
DisconnectedAt = maps:get(disconnected_at, OldConnInfo, erlang:system_time(millisecond)),
ConnInfo0 = maps:with(
[socktype,
sockname,
peername,
peercert,
clientid,
clean_start,
receive_maximum,
expiry_interval],
OldConnInfo),
ConnInfo0#{
conn_mod => ?MODULE,
connected => false,
disconnected_at => DisconnectedAt
}.
clientinfo(OldClientInfo) ->
maps:with(
[zone,
protocol,
peerhost,
sockport,
clientid,
username,
is_bridge,
is_superuser,
mountpoint],
OldClientInfo).
channel(ConnInfo, ClientInfo) ->
#{conninfo => ConnInfo,
clientinfo => ClientInfo,
expiry_timer => undefined,
takeover => false,
resuming => false,
pendings => []
}.
info(Channel) ->
#{conninfo => maps:get(conninfo, Channel, undefined),
clientinfo => maps:get(clientinfo, Channel, undefined),
session => maps:get(session, Channel, undefined),
conn_state => disconnected
}.
ignore_local(ClientInfo, Delivers, Subscriber, Session) ->
Subs = emqx_session:info(subscriptions, Session),
lists:dropwhile(fun({deliver, Topic, #message{from = Publisher} = Msg}) ->
case maps:find(Topic, Subs) of
{ok, #{nl := 1}} when Subscriber =:= Publisher ->
ok = emqx_hooks:run('delivery.dropped', [ClientInfo, Msg, no_local]),
ok = emqx_metrics:inc('delivery.dropped'),
ok = emqx_metrics:inc('delivery.dropped.no_local'),
true;
_ ->
false
end
end, Delivers).
maybe_nack(Delivers) ->
lists:filter(fun not_nacked/1, Delivers).
not_nacked({deliver, _Topic, Msg}) ->
not (emqx_shared_sub:is_ack_required(Msg)
andalso (ok == emqx_shared_sub:nack_no_connection(Msg))).

View File

@ -0,0 +1,42 @@
%%--------------------------------------------------------------------
%% Copyright (c) 2022 EMQ Technologies Co., Ltd. All Rights Reserved.
%%
%% Licensed under the Apache License, Version 2.0 (the "License");
%% you may not use this file except in compliance with the License.
%% You may obtain a copy of the License at
%%
%% http://www.apache.org/licenses/LICENSE-2.0
%%
%% Unless required by applicable law or agreed to in writing, software
%% distributed under the License is distributed on an "AS IS" BASIS,
%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
%% See the License for the specific language governing permissions and
%% limitations under the License.
%%--------------------------------------------------------------------
-module(emqx_eviction_agent_cli).
%% APIs
-export([ load/0
, unload/0
, cli/1
]).
load() ->
emqx_ctl:register_command(eviction, {?MODULE, cli}, []).
unload() ->
emqx_ctl:unregister_command(eviction).
cli(["status"]) ->
case emqx_eviction_agent:status() of
disabled ->
emqx_ctl:print("Eviction status: disabled~n");
{enabled, _Stats} ->
emqx_ctl:print("Eviction status: enabled~n")
end;
cli(_) ->
emqx_ctl:usage(
[{"eviction status",
"Get current node eviction status"}]).

View File

@ -0,0 +1,33 @@
%%--------------------------------------------------------------------
%% Copyright (c) 2022 EMQ Technologies Co., Ltd. All Rights Reserved.
%%
%% Licensed under the Apache License, Version 2.0 (the "License");
%% you may not use this file except in compliance with the License.
%% You may obtain a copy of the License at
%%
%% http://www.apache.org/licenses/LICENSE-2.0
%%
%% Unless required by applicable law or agreed to in writing, software
%% distributed under the License is distributed on an "AS IS" BASIS,
%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
%% See the License for the specific language governing permissions and
%% limitations under the License.
%%--------------------------------------------------------------------
-module(emqx_eviction_agent_conn_sup).
-behaviour(supervisor).
-export([start_link/1]).
-export([init/1]).
start_link(Env) ->
supervisor:start_link({local, ?MODULE}, ?MODULE, [Env]).
init([_Env]) ->
Childs = [],
{ok, {
{one_for_one, 10, 3600},
Childs}
}.

View File

@ -0,0 +1,43 @@
%%--------------------------------------------------------------------
%% Copyright (c) 2022 EMQ Technologies Co., Ltd. All Rights Reserved.
%%
%% Licensed under the Apache License, Version 2.0 (the "License");
%% you may not use this file except in compliance with the License.
%% You may obtain a copy of the License at
%%
%% http://www.apache.org/licenses/LICENSE-2.0
%%
%% Unless required by applicable law or agreed to in writing, software
%% distributed under the License is distributed on an "AS IS" BASIS,
%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
%% See the License for the specific language governing permissions and
%% limitations under the License.
%%--------------------------------------------------------------------
-module(emqx_eviction_agent_sup).
-behaviour(supervisor).
-export([start_link/1]).
-export([init/1]).
start_link(Env) ->
supervisor:start_link({local, ?MODULE}, ?MODULE, [Env]).
init([_Env]) ->
Childs = [child_spec(worker, emqx_eviction_agent, []),
child_spec(supervisor, emqx_eviction_agent_conn_sup, [#{}])],
{ok, {
{one_for_one, 10, 3600},
Childs}
}.
child_spec(Type, Mod, Args) ->
#{id => Mod,
start => {Mod, start_link, Args},
restart => permanent,
shutdown => 5000,
type => Type,
modules => [Mod]
}.

View File

@ -0,0 +1,232 @@
%%--------------------------------------------------------------------
%% Copyright (c) 2022 EMQ Technologies Co., Ltd. All Rights Reserved.
%%
%% Licensed under the Apache License, Version 2.0 (the "License");
%% you may not use this file except in compliance with the License.
%% You may obtain a copy of the License at
%%
%% http://www.apache.org/licenses/LICENSE-2.0
%%
%% Unless required by applicable law or agreed to in writing, software
%% distributed under the License is distributed on an "AS IS" BASIS,
%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
%% See the License for the specific language governing permissions and
%% limitations under the License.
%%--------------------------------------------------------------------
-module(emqx_eviction_agent_SUITE).
-compile(export_all).
-compile(nowarn_export_all).
-include_lib("eunit/include/eunit.hrl").
-include_lib("common_test/include/ct.hrl").
-include_lib("emqx/include/emqx_mqtt.hrl").
-include_lib("snabbkaffe/include/snabbkaffe.hrl").
-import(emqx_eviction_agent_test_helpers,
[emqtt_connect/0, emqtt_connect/2]).
all() ->
emqx_ct:all(?MODULE).
init_per_suite(Config) ->
emqx_ct_helpers:start_apps([emqx_eviction_agent]),
Config.
end_per_suite(_Config) ->
emqx_ct_helpers:stop_apps([emqx_eviction_agent]).
init_per_testcase(t_explicit_session_takeover, Config) ->
_ = emqx_eviction_agent:disable(test_eviction),
Node = emqx_node_helpers:start_slave(
evacuate1,
#{start_apps => [emqx, emqx_eviction_agent]}),
[{evacuate_node, Node} | Config];
init_per_testcase(_TestCase, Config) ->
_ = emqx_eviction_agent:disable(test_eviction),
Config.
end_per_testcase(t_explicit_session_takeover, Config) ->
_ = emqx_node_helpers:stop_slave(?config(evacuate_node, Config)),
_ = emqx_eviction_agent:disable(test_eviction);
end_per_testcase(_TestCase, _Config) ->
_ = emqx_eviction_agent:disable(test_eviction).
t_enable_disable(_Config) ->
erlang:process_flag(trap_exit, true),
?assertMatch(
disabled,
emqx_eviction_agent:status()),
{ok, C0} = emqtt_connect(),
ok = emqtt:disconnect(C0),
ok = emqx_eviction_agent:enable(test_eviction, undefined),
?assertMatch(
{error, eviction_agent_busy},
emqx_eviction_agent:enable(bar, undefined)),
?assertMatch(
ok,
emqx_eviction_agent:enable(test_eviction, <<"srv">>)),
?assertMatch(
{enabled, #{}},
emqx_eviction_agent:status()),
?assertMatch(
{error, {use_another_server, #{}}},
emqtt_connect()),
?assertMatch(
{error, eviction_agent_busy},
emqx_eviction_agent:disable(bar)),
?assertMatch(
ok,
emqx_eviction_agent:disable(test_eviction)),
?assertMatch(
{error, disabled},
emqx_eviction_agent:disable(test_eviction)),
?assertMatch(
disabled,
emqx_eviction_agent:status()),
{ok, C1} = emqtt_connect(),
ok = emqtt:disconnect(C1).
t_evict_connections_status(_Config) ->
erlang:process_flag(trap_exit, true),
{ok, _C} = emqtt_connect(),
{error, disabled} = emqx_eviction_agent:evict_connections(1),
ok = emqx_eviction_agent:enable(test_eviction, undefined),
?assertMatch(
{enabled, #{connections := 1, sessions := _}},
emqx_eviction_agent:status()),
ok = emqx_eviction_agent:evict_connections(1),
ct:sleep(100),
?assertMatch(
{enabled, #{connections := 0, sessions := _}},
emqx_eviction_agent:status()),
ok = emqx_eviction_agent:disable(test_eviction).
t_explicit_session_takeover(Config) ->
erlang:process_flag(trap_exit, true),
{ok, C0} = emqtt_connect(<<"client_with_session">>, false),
{ok, _, _} = emqtt:subscribe(C0, <<"t1">>),
ok = emqx_eviction_agent:enable(test_eviction, undefined),
?assertEqual(
1,
emqx_eviction_agent:connection_count()),
ok = emqx_eviction_agent:evict_connections(1),
receive
{'EXIT', C0, {disconnected, ?RC_USE_ANOTHER_SERVER, _}} -> ok
after 1000 ->
?assert(false, "Connection not evicted")
end,
?assertEqual(
0,
emqx_eviction_agent:connection_count()),
?assertEqual(
1,
emqx_eviction_agent:session_count()),
%% First, evacuate to the same node
?check_trace(
?wait_async_action(
emqx_eviction_agent:evict_sessions(1, node()),
#{?snk_kind := emqx_channel_takeover_end},
1000),
fun(_Result, Trace) ->
?assertMatch(
[#{clientid := <<"client_with_session">>} | _ ],
?of_kind(emqx_channel_takeover_end, Trace))
end),
ok = emqx_eviction_agent:disable(test_eviction),
ok = connect_and_publish(<<"t1">>, <<"MessageToEvictedSession1">>),
ok = emqx_eviction_agent:enable(test_eviction, undefined),
%% Evacuate to another node
TargetNodeForEvacuation = ?config(evacuate_node, Config),
?check_trace(
?wait_async_action(
emqx_eviction_agent:evict_sessions(1, TargetNodeForEvacuation),
#{?snk_kind := emqx_channel_takeover_end},
1000),
fun(_Result, Trace) ->
?assertMatch(
[#{clientid := <<"client_with_session">>} | _ ],
?of_kind(emqx_channel_takeover_end, Trace))
end),
?assertEqual(
0,
emqx_eviction_agent:session_count()),
?assertEqual(
1,
rpc:call(TargetNodeForEvacuation, emqx_eviction_agent, session_count, [])),
ok = emqx_eviction_agent:disable(test_eviction),
ct:pal("evicted chann info: ~p", [emqx_cm:get_chan_info(<<"client_with_session">>)]),
ok = connect_and_publish(<<"t1">>, <<"MessageToEvictedSession2">>),
ct:sleep(100),
{ok, C2} = emqtt_connect(<<"client_with_session">>, false),
ok = assert_receive_publish(
[#{payload => <<"MessageToEvictedSession1">>, topic => <<"t1">>},
#{payload => <<"MessageToEvictedSession2">>, topic => <<"t1">>}]),
ok = emqtt:disconnect(C2).
t_disable_on_restart(_Config) ->
ok = emqx_eviction_agent:enable(test_eviction, undefined),
ok = supervisor:terminate_child(emqx_eviction_agent_sup, emqx_eviction_agent),
{ok, _} = supervisor:restart_child(emqx_eviction_agent_sup, emqx_eviction_agent),
?assertEqual(
disabled,
emqx_eviction_agent:status()).
assert_receive_publish([]) -> ok;
assert_receive_publish([#{payload := Msg, topic := Topic} | Rest]) ->
receive
{publish, #{payload := Msg,
topic := Topic}} ->
assert_receive_publish(Rest)
after 1000 ->
?assert(false, "Message `" ++ binary_to_list(Msg) ++ "` is lost")
end.
connect_and_publish(Topic, Message) ->
{ok, C} = emqtt_connect(),
emqtt:publish(C, Topic, Message),
ok = emqtt:disconnect(C).

View File

@ -0,0 +1,64 @@
%%--------------------------------------------------------------------
%% Copyright (c) 2022 EMQ Technologies Co., Ltd. All Rights Reserved.
%%
%% Licensed under the Apache License, Version 2.0 (the "License");
%% you may not use this file except in compliance with the License.
%% You may obtain a copy of the License at
%%
%% http://www.apache.org/licenses/LICENSE-2.0
%%
%% Unless required by applicable law or agreed to in writing, software
%% distributed under the License is distributed on an "AS IS" BASIS,
%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
%% See the License for the specific language governing permissions and
%% limitations under the License.
%%--------------------------------------------------------------------
-module(emqx_eviction_agent_api_SUITE).
-compile(export_all).
-compile(nowarn_export_all).
-include_lib("eunit/include/eunit.hrl").
-include_lib("common_test/include/ct.hrl").
-import(emqx_mgmt_api_test_helpers,
[request_api/3,
auth_header_/0,
api_path/1]).
all() ->
emqx_ct:all(?MODULE).
init_per_suite(Config) ->
emqx_ct_helpers:start_apps([emqx_eviction_agent, emqx_management]),
Config.
end_per_suite(Config) ->
emqx_ct_helpers:stop_apps([emqx_management, emqx_eviction_agent]),
Config.
t_status(_Config) ->
?assertMatch(
{ok, #{<<"status">> := <<"disabled">>}},
api_get(["node_eviction", "status"])),
ok = emqx_eviction_agent:enable(apitest, undefined),
?assertMatch(
{ok, #{<<"status">> := <<"enabled">>,
<<"stats">> := #{}}},
api_get(["node_eviction", "status"])),
ok = emqx_eviction_agent:disable(apitest),
?assertMatch(
{ok, #{<<"status">> := <<"disabled">>}},
api_get(["node_eviction", "status"])).
api_get(Path) ->
case request_api(get, api_path(Path), auth_header_()) of
{ok, ResponseBody} ->
{ok, jiffy:decode(list_to_binary(ResponseBody), [return_maps])};
{error, _} = Error -> Error
end.

View File

@ -0,0 +1,155 @@
%%--------------------------------------------------------------------
%% Copyright (c) 2022 EMQ Technologies Co., Ltd. All Rights Reserved.
%%
%% Licensed under the Apache License, Version 2.0 (the "License");
%% you may not use this file except in compliance with the License.
%% You may obtain a copy of the License at
%%
%% http://www.apache.org/licenses/LICENSE-2.0
%%
%% Unless required by applicable law or agreed to in writing, software
%% distributed under the License is distributed on an "AS IS" BASIS,
%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
%% See the License for the specific language governing permissions and
%% limitations under the License.
%%--------------------------------------------------------------------
-module(emqx_eviction_agent_channel_SUITE).
-compile(export_all).
-compile(nowarn_export_all).
-include_lib("eunit/include/eunit.hrl").
-include_lib("common_test/include/ct.hrl").
-include_lib("emqx/include/emqx_mqtt.hrl").
-include_lib("snabbkaffe/include/snabbkaffe.hrl").
-define(CLIENT_ID, <<"client_with_session">>).
-import(emqx_eviction_agent_test_helpers,
[emqtt_connect/2]).
all() ->
emqx_ct:all(?MODULE).
init_per_suite(Config) ->
emqx_ct_helpers:start_apps([emqx_eviction_agent]),
Config.
end_per_suite(_Config) ->
emqx_ct_helpers:stop_apps([emqx_eviction_agent]).
t_start_no_session(_Config) ->
Opts = #{clientinfo => #{clientid => ?CLIENT_ID,
zone => internal},
conninfo => #{clientid => ?CLIENT_ID,
receive_maximum => 32}},
?assertMatch(
{error, {no_session, _}},
emqx_eviction_agent_channel:start_supervised(Opts)).
t_start_no_expire(_Config) ->
erlang:process_flag(trap_exit, true),
_ = emqtt_connect(?CLIENT_ID, false),
Opts = #{clientinfo => #{clientid => ?CLIENT_ID,
zone => internal},
conninfo => #{clientid => ?CLIENT_ID,
receive_maximum => 32,
expiry_interval => 0}},
?assertMatch(
{error, {should_be_expired, _}},
emqx_eviction_agent_channel:start_supervised(Opts)).
t_start_infinite_expire(_Config) ->
erlang:process_flag(trap_exit, true),
_ = emqtt_connect(?CLIENT_ID, false),
Opts = #{clientinfo => #{clientid => ?CLIENT_ID,
zone => internal},
conninfo => #{clientid => ?CLIENT_ID,
receive_maximum => 32,
expiry_interval => ?UINT_MAX}},
?assertMatch(
{ok, _},
emqx_eviction_agent_channel:start_supervised(Opts)).
t_kick(_Config) ->
erlang:process_flag(trap_exit, true),
_ = emqtt_connect(?CLIENT_ID, false),
Opts = evict_session_opts(?CLIENT_ID),
{ok, Pid} = emqx_eviction_agent_channel:start_supervised(Opts),
?assertEqual(
ok,
emqx_eviction_agent_channel:call(Pid, kick)).
t_discard(_Config) ->
erlang:process_flag(trap_exit, true),
_ = emqtt_connect(?CLIENT_ID, false),
Opts = evict_session_opts(?CLIENT_ID),
{ok, Pid} = emqx_eviction_agent_channel:start_supervised(Opts),
?assertEqual(
ok,
emqx_eviction_agent_channel:call(Pid, discard)).
t_stop(_Config) ->
erlang:process_flag(trap_exit, true),
_ = emqtt_connect(?CLIENT_ID, false),
Opts = evict_session_opts(?CLIENT_ID),
{ok, Pid} = emqx_eviction_agent_channel:start_supervised(Opts),
?assertEqual(
ok,
emqx_eviction_agent_channel:stop(Pid)).
t_ignored_calls(_Config) ->
erlang:process_flag(trap_exit, true),
_ = emqtt_connect(?CLIENT_ID, false),
Opts = evict_session_opts(?CLIENT_ID),
{ok, Pid} = emqx_eviction_agent_channel:start_supervised(Opts),
ok = emqx_eviction_agent_channel:cast(Pid, unknown),
Pid ! unknown,
?assertEqual(
[],
emqx_eviction_agent_channel:call(Pid, list_acl_cache)),
?assertEqual(
ok,
emqx_eviction_agent_channel:call(Pid, {quota, quota})),
?assertEqual(
ignored,
emqx_eviction_agent_channel:call(Pid, unknown)).
t_expire(_Config) ->
erlang:process_flag(trap_exit, true),
_ = emqtt_connect(?CLIENT_ID, false),
#{conninfo := ConnInfo} = Opts0 = evict_session_opts(?CLIENT_ID),
Opts1 = Opts0#{conninfo => ConnInfo#{expiry_interval => 1}},
{ok, Pid} = emqx_eviction_agent_channel:start_supervised(Opts1),
ct:sleep(1500),
?assertNot(is_process_alive(Pid)).
evict_session_opts(ClientId) ->
maps:with(
[conninfo, clientinfo],
emqx_cm:get_chan_info(ClientId)).

View File

@ -0,0 +1,47 @@
%%--------------------------------------------------------------------
%% Copyright (c) 2022 EMQ Technologies Co., Ltd. All Rights Reserved.
%%
%% Licensed under the Apache License, Version 2.0 (the "License");
%% you may not use this file except in compliance with the License.
%% You may obtain a copy of the License at
%%
%% http://www.apache.org/licenses/LICENSE-2.0
%%
%% Unless required by applicable law or agreed to in writing, software
%% distributed under the License is distributed on an "AS IS" BASIS,
%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
%% See the License for the specific language governing permissions and
%% limitations under the License.
%%--------------------------------------------------------------------
-module(emqx_eviction_agent_cli_SUITE).
-compile(export_all).
-compile(nowarn_export_all).
-include_lib("eunit/include/eunit.hrl").
-include_lib("common_test/include/ct.hrl").
all() ->
emqx_ct:all(?MODULE).
init_per_suite(Config) ->
emqx_ct_helpers:start_apps([emqx_eviction_agent]),
Config.
end_per_suite(Config) ->
_ = emqx_eviction_agent:disable(foo),
emqx_ct_helpers:stop_apps([emqx_eviction_agent]),
Config.
t_status(_Config) ->
%% usage
ok = emqx_eviction_agent_cli:cli(["foobar"]),
%% status
ok = emqx_eviction_agent_cli:cli(["status"]),
ok = emqx_eviction_agent:enable(foo, undefined),
%% status
ok = emqx_eviction_agent_cli:cli(["status"]).

View File

@ -0,0 +1,55 @@
%%--------------------------------------------------------------------
%% Copyright (c) 2022 EMQ Technologies Co., Ltd. All Rights Reserved.
%%
%% Licensed under the Apache License, Version 2.0 (the "License");
%% you may not use this file except in compliance with the License.
%% You may obtain a copy of the License at
%%
%% http://www.apache.org/licenses/LICENSE-2.0
%%
%% Unless required by applicable law or agreed to in writing, software
%% distributed under the License is distributed on an "AS IS" BASIS,
%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
%% See the License for the specific language governing permissions and
%% limitations under the License.
%%--------------------------------------------------------------------
-module(emqx_eviction_agent_test_helpers).
-export([emqtt_connect/0,
emqtt_connect/2,
emqtt_connect_many/1,
emqtt_try_connect/0]).
emqtt_connect() ->
emqtt_connect(<<"client1">>, true).
emqtt_connect(ClientId, CleanStart) ->
{ok, C} = emqtt:start_link(
[{clientid, ClientId},
{clean_start, CleanStart},
{proto_ver, v5},
{properties, #{'Session-Expiry-Interval' => 600}}
]),
case emqtt:connect(C) of
{ok, _} -> {ok, C};
{error, _} = Error -> Error
end.
emqtt_connect_many(Count) ->
lists:map(
fun(N) ->
NBin = integer_to_binary(N),
ClientId = <<"client-", NBin/binary>>,
{ok, C} = emqtt_connect(ClientId, false),
C
end,
lists:seq(1, Count)).
emqtt_try_connect() ->
case emqtt_connect() of
{ok, C} ->
emqtt:disconnect(C),
ok;
{error, _} = Error -> Error
end.

View File

@ -25,13 +25,12 @@
-include_lib("emqx/include/emqx_mqtt.hrl").
-include_lib("emqx_management/include/emqx_mgmt.hrl").
-define(CONTENT_TYPE, "application/x-www-form-urlencoded").
-define(HOST, "http://127.0.0.1:8081/").
-define(API_VERSION, "v4").
-define(BASE_PATH, "api").
-import(emqx_mgmt_api_test_helpers,
[request_api/3,
request_api/4,
request_api/5,
auth_header_/0,
api_path/1]).
all() ->
emqx_ct:all(?MODULE).
@ -657,49 +656,6 @@ t_data_import_content(_) ->
application:stop(emqx_rule_engine),
application:stop(emqx_dashboard).
request_api(Method, Url, Auth) ->
request_api(Method, Url, [], Auth, []).
request_api(Method, Url, QueryParams, Auth) ->
request_api(Method, Url, QueryParams, Auth, []).
request_api(Method, Url, QueryParams, Auth, []) ->
NewUrl = case QueryParams of
"" -> Url;
_ -> Url ++ "?" ++ QueryParams
end,
do_request_api(Method, {NewUrl, [Auth]});
request_api(Method, Url, QueryParams, Auth, Body) ->
NewUrl = case QueryParams of
"" -> Url;
_ -> Url ++ "?" ++ QueryParams
end,
do_request_api(Method, {NewUrl, [Auth], "application/json", emqx_json:encode(Body)}).
do_request_api(Method, Request)->
ct:pal("Method: ~p, Request: ~p", [Method, Request]),
case httpc:request(Method, Request, [], []) of
{error, socket_closed_remotely} ->
{error, socket_closed_remotely};
{ok, {{"HTTP/1.1", Code, _}, _, Return} }
when Code =:= 200 orelse Code =:= 201 ->
{ok, Return};
{ok, {Reason, _, _}} ->
{error, Reason}
end.
auth_header_() ->
AppId = <<"admin">>,
AppSecret = <<"public">>,
auth_header_(binary_to_list(AppId), binary_to_list(AppSecret)).
auth_header_(User, Pass) ->
Encoded = base64:encode_to_string(lists:append([User,":",Pass])),
{"Authorization","Basic " ++ Encoded}.
api_path(Parts)->
?HOST ++ filename:join([?BASE_PATH, ?API_VERSION] ++ Parts).
filter(List, Key, Value) ->
lists:filter(fun(Item) ->
maps:get(Key, Item) == Value

View File

@ -0,0 +1,69 @@
%%--------------------------------------------------------------------
%% Copyright (c) 2020-2022 EMQ Technologies Co., Ltd. All Rights Reserved.
%%
%% Licensed under the Apache License, Version 2.0 (the "License");
%% you may not use this file except in compliance with the License.
%% You may obtain a copy of the License at
%%
%% http://www.apache.org/licenses/LICENSE-2.0
%%
%% Unless required by applicable law or agreed to in writing, software
%% distributed under the License is distributed on an "AS IS" BASIS,
%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
%% See the License for the specific language governing permissions and
%% limitations under the License.
%%--------------------------------------------------------------------
-module(emqx_mgmt_api_test_helpers).
-compile(export_all).
-compile(nowarn_export_all).
-define(HOST, "http://127.0.0.1:8081/").
-define(API_VERSION, "v4").
-define(BASE_PATH, "api").
request_api(Method, Url, Auth) ->
request_api(Method, Url, [], Auth, []).
request_api(Method, Url, QueryParams, Auth) ->
request_api(Method, Url, QueryParams, Auth, []).
request_api(Method, Url, QueryParams, Auth, []) ->
NewUrl = case QueryParams of
"" -> Url;
_ -> Url ++ "?" ++ QueryParams
end,
do_request_api(Method, {NewUrl, [Auth]});
request_api(Method, Url, QueryParams, Auth, Body) ->
NewUrl = case QueryParams of
"" -> Url;
_ -> Url ++ "?" ++ QueryParams
end,
do_request_api(Method, {NewUrl, [Auth], "application/json", emqx_json:encode(Body)}).
do_request_api(Method, Request)->
ct:pal("Method: ~p, Request: ~p", [Method, Request]),
case httpc:request(Method, Request, [], []) of
{error, socket_closed_remotely} ->
{error, socket_closed_remotely};
{ok, {{"HTTP/1.1", Code, _}, _, Return} }
when Code =:= 200 orelse Code =:= 201 ->
{ok, Return};
{ok, {Reason, _, _}} ->
{error, Reason}
end.
auth_header_() ->
AppId = <<"admin">>,
AppSecret = <<"public">>,
auth_header_(binary_to_list(AppId), binary_to_list(AppSecret)).
auth_header_(User, Pass) ->
Encoded = base64:encode_to_string(lists:append([User,":",Pass])),
{"Authorization","Basic " ++ Encoded}.
api_path(Parts)->
?HOST ++ filename:join([?BASE_PATH, ?API_VERSION] ++ Parts).

19
apps/emqx_node_rebalance/.gitignore vendored Normal file
View File

@ -0,0 +1,19 @@
.rebar3
_*
.eunit
*.o
*.beam
*.plt
*.swp
*.swo
.erlang.cookie
ebin
log
erl_crash.dump
.rebar
logs
_build
.idea
*.iml
rebar3.crashdump
*~

View File

@ -0,0 +1,9 @@
emqx_node_rebalance
=====
An OTP library
Build
-----
$ rebar3 compile

View File

@ -0,0 +1,3 @@
##--------------------------------------------------------------------
## EMQX Node Rebalance Plugin
##--------------------------------------------------------------------

View File

@ -0,0 +1,31 @@
%%--------------------------------------------------------------------
%% Copyright (c) 2022 EMQ Technologies Co., Ltd. All Rights Reserved.
%%
%% Licensed under the Apache License, Version 2.0 (the "License");
%% you may not use this file except in compliance with the License.
%% You may obtain a copy of the License at
%%
%% http://www.apache.org/licenses/LICENSE-2.0
%%
%% Unless required by applicable law or agreed to in writing, software
%% distributed under the License is distributed on an "AS IS" BASIS,
%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
%% See the License for the specific language governing permissions and
%% limitations under the License.
%%--------------------------------------------------------------------
-define(DEFAULT_CONN_EVICT_RATE, 500).
-define(DEFAULT_SESS_EVICT_RATE, 500).
-define(DEFAULT_WAIT_HEALTH_CHECK, 60). %% sec
-define(DEFAULT_WAIT_TAKEOVER, 60). %% sec
-define(DEFAULT_ABS_CONN_THRESHOLD, 1000).
-define(DEFAULT_ABS_SESS_THRESHOLD, 1000).
-define(DEFAULT_REL_CONN_THRESHOLD, 1.1).
-define(DEFAULT_REL_SESS_THRESHOLD, 1.1).
-define(EVICT_INTERVAL, 1000).
-define(EVACUATION_FILENAME, <<".evacuation">>).

View File

@ -0,0 +1,2 @@
{erl_opts, [debug_info]}.
{deps, []}.

View File

@ -0,0 +1,19 @@
{application, emqx_node_rebalance,
[{description, "EMQX Node Rebalance"},
{vsn, "4.3.0"},
{registered, [emqx_node_rebalance_sup,
emqx_node_rebalance,
emqx_node_rebalance_agent,
emqx_node_rebalance_evacuation]},
{applications,
[kernel,
stdlib
]},
{mod, {emqx_node_rebalance_app,[]}},
{env,[]},
{modules, []},
{maintainers, ["EMQX Team <contact@emqx.io>"]},
{links, [{"Homepage", "https://emqx.io/"},
{"Github", "https://github.com/emqx"}
]}
]}.

View File

@ -0,0 +1,414 @@
%%--------------------------------------------------------------------
%% Copyright (c) 2022 EMQ Technologies Co., Ltd. All Rights Reserved.
%%
%% Licensed under the Apache License, Version 2.0 (the "License");
%% you may not use this file except in compliance with the License.
%% You may obtain a copy of the License at
%%
%% http://www.apache.org/licenses/LICENSE-2.0
%%
%% Unless required by applicable law or agreed to in writing, software
%% distributed under the License is distributed on an "AS IS" BASIS,
%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
%% See the License for the specific language governing permissions and
%% limitations under the License.
%%--------------------------------------------------------------------
-module(emqx_node_rebalance).
-include("emqx_node_rebalance.hrl").
-include_lib("emqx/include/logger.hrl").
-include_lib("emqx/include/types.hrl").
-include_lib("snabbkaffe/include/snabbkaffe.hrl").
-export([start/1,
status/0,
status/1,
stop/0
]).
-export([start_link/0]).
-behavior(gen_statem).
-export([init/1,
callback_mode/0,
handle_event/4,
code_change/4
]).
-export([is_node_available/0,
available_nodes/1,
connection_count/0,
session_count/0,
disconnected_session_count/0]).
%%--------------------------------------------------------------------
%% APIs
%%--------------------------------------------------------------------
-type start_opts() :: #{conn_evict_rate => pos_integer(),
sess_evict_rate => pos_integer(),
wait_health_check => pos_integer(),
wait_takeover => pos_integer(),
abs_conn_threshold => pos_integer(),
rel_conn_threshold => number(),
abs_sess_threshold => pos_integer(),
rel_sess_threshold => number(),
nodes => [node()]
}.
-type start_error() :: already_started | [{node(), term()}].
-spec start(start_opts()) -> ok_or_error(start_error()).
start(StartOpts) ->
Opts = maps:merge(default_opts(), StartOpts),
gen_statem:call(?MODULE, {start, Opts}).
-spec stop() -> ok_or_error(not_started).
stop() ->
gen_statem:call(?MODULE, stop).
-spec status() -> disabled | {enabled, map()}.
status() ->
gen_statem:call(?MODULE, status).
-spec status(pid()) -> disabled | {enabled, map()}.
status(Pid) ->
gen_statem:call(Pid, status).
-spec start_link() -> startlink_ret().
start_link() ->
gen_statem:start_link({local, ?MODULE}, ?MODULE, [], []).
-spec available_nodes(list(node())) -> list(node()).
available_nodes(Nodes) when is_list(Nodes) ->
{Available, _} = rpc:multicall(Nodes, ?MODULE, is_node_available, []),
lists:filter(fun is_atom/1, Available).
%%--------------------------------------------------------------------
%% gen_statem callbacks
%%--------------------------------------------------------------------
callback_mode() -> handle_event_function.
%% states: disabled, wait_health_check, evicting_conns, wait_takeover, evicting_sessions
init([]) ->
?tp(debug, emqx_node_rebalance_started, #{}),
{ok, disabled, #{}}.
%% start
handle_event({call, From},
{start, #{wait_health_check := WaitHealthCheck} = Opts},
disabled,
#{} = Data) ->
case enable_rebalance(Data#{opts => Opts}) of
{ok, NewData} ->
?LOG(warning, "Node rebalance enabled: ~p", [Opts]),
{next_state,
wait_health_check,
NewData,
[{state_timeout, seconds(WaitHealthCheck), evict_conns},
{reply, From, ok}]};
{error, Reason} ->
?LOG(warning, "Node rebalance enabling failed: ~p", [Reason]),
{keep_state_and_data,
[{reply, From, {error, Reason}}]}
end;
handle_event({call, From}, {start, _Opts}, _State, #{}) ->
{keep_state_and_data,
[{reply, From, {error, already_started}}]};
%% stop
handle_event({call, From}, stop, disabled, #{}) ->
{keep_state_and_data,
[{reply, From, {error, not_started}}]};
handle_event({call, From}, stop, _State, Data) ->
ok = disable_rebalance(Data),
?LOG(warning, "Node rebalance stopped"),
{next_state,
disabled,
deinit(Data),
[{reply, From, ok}]};
%% status
handle_event({call, From}, status, disabled, #{}) ->
{keep_state_and_data,
[{reply, From, disabled}]};
handle_event({call, From}, status, State, Data) ->
Stats = get_stats(State, Data),
{keep_state_and_data,
[{reply, From, {enabled, Stats#{state => State,
coordinator_node => node()}}}]};
%% conn eviction
handle_event(state_timeout,
evict_conns,
wait_health_check,
Data) ->
?LOG(warning, "Node rebalance wait_health_check over"),
{next_state,
evicting_conns,
Data,
[{state_timeout, 0, evict_conns}]};
handle_event(state_timeout,
evict_conns,
evicting_conns,
#{opts := #{wait_takeover := WaitTakeover,
evict_interval := EvictInterval}} = Data) ->
case evict_conns(Data) of
ok ->
?LOG(warning, "Node rebalance evict_conns over"),
{next_state,
wait_takeover,
Data,
[{state_timeout, seconds(WaitTakeover), evict_sessions}]};
{continue, NewData} ->
{keep_state,
NewData,
[{state_timeout, EvictInterval, evict_conns}]}
end;
handle_event(state_timeout,
evict_sessions,
wait_takeover,
Data) ->
?LOG(warning, "Node rebalance wait_takeover over"),
{next_state,
evicting_sessions,
Data,
[{state_timeout, 0, evict_sessions}]};
handle_event(state_timeout,
evict_sessions,
evicting_sessions,
#{opts := #{evict_interval := EvictInterval}} = Data) ->
case evict_sessions(Data) of
ok ->
?tp(debug, emqx_node_rebalance_evict_sess_over, #{}),
?LOG(warning, "Node rebalance evict_sess over"),
ok = disable_rebalance(Data),
?LOG(warning, "Rebalance finished successfully"),
{next_state,
disabled,
deinit(Data)};
{continue, NewData} ->
{keep_state,
NewData,
[{state_timeout, EvictInterval, evict_sessions}]}
end;
handle_event({call, From}, Msg, State, Data) ->
?LOG(warning, "Unknown call: ~p, State: ~p, Data: ~p", [Msg, State, Data]),
{keep_state_and_data,
[{reply, From, ignored}]};
handle_event(info, Msg, State, Data) ->
?LOG(warning, "Unknown Msg: ~p, State: ~p, Data: ~p", [Msg, State, Data]),
keep_state_and_data;
handle_event(cast, Msg, State, Data) ->
?LOG(warning, "Unknown cast Msg: ~p, State: ~p, Data: ~p", [Msg, State, Data]),
keep_state_and_data.
code_change(_Vsn, State, Data, _Extra) ->
{ok, State, Data}.
%%--------------------------------------------------------------------
%% internal funs
%%--------------------------------------------------------------------
enable_rebalance(#{opts := Opts} = Data) ->
Nodes = maps:get(nodes, Opts),
ConnCounts = multicall(Nodes, {?MODULE, connection_count, []}),
SessCounts = multicall(Nodes, {?MODULE, session_count, []}),
{_, Counts} = lists:unzip(ConnCounts),
Avg = avg(Counts),
{DonorCounts,
RecipientCounts} = lists:partition(
fun({_Node, Count}) ->
Count >= Avg
end,
ConnCounts),
?LOG(warning, "Enabling rebalance: ConnCounts=~p, DonorCounts=~p, RecipientCounts=~p",
[ConnCounts, DonorCounts, RecipientCounts]),
{DonorNodes, _} = lists:unzip(DonorCounts),
{RecipientNodes, _} = lists:unzip(RecipientCounts),
case need_rebalance(DonorNodes, RecipientNodes, ConnCounts, SessCounts, Opts) of
false -> {error, nothing_to_balance};
true ->
_ = multicall(DonorNodes, {emqx_node_rebalance_agent, enable, [self()]}),
{ok, Data#{donors => DonorNodes,
recipients => RecipientNodes,
initial_conn_counts => maps:from_list(ConnCounts),
initial_sess_counts => maps:from_list(SessCounts)}}
end.
disable_rebalance(#{donors := DonorNodes}) ->
_ = multicall(DonorNodes, {emqx_node_rebalance_agent, disable, [self()]}),
ok.
evict_conns(#{donors := DonorNodes, recipients := RecipientNodes, opts := Opts} = Data) ->
DonorNodeCounts = multicall(DonorNodes, {?MODULE, connection_count, []}),
{_, DonorCounts} = lists:unzip(DonorNodeCounts),
RecipientNodeCounts = multicall(RecipientNodes, {?MODULE, connection_count, []}),
{_, RecipientCounts} = lists:unzip(RecipientNodeCounts),
DonorAvg = avg(DonorCounts),
RecipientAvg = avg(RecipientCounts),
Thresholds = thresholds(conn, Opts),
NewData = Data#{donor_conn_avg => DonorAvg,
recipient_conn_avg => RecipientAvg,
donor_conn_counts => maps:from_list(DonorNodeCounts),
recipient_conn_counts => maps:from_list(RecipientNodeCounts)},
case within_thresholds(DonorAvg, RecipientAvg, Thresholds) of
true -> ok;
false ->
ConnEvictRate = maps:get(conn_evict_rate, Opts),
NodesToEvict = nodes_to_evict(RecipientAvg, DonorNodeCounts),
?LOG(warning, "Node rebalance, evict_conns, nodes=~p, counts=~p",
[NodesToEvict, ConnEvictRate]),
_ = multicall(NodesToEvict, {emqx_eviction_agent, evict_connections, [ConnEvictRate]}),
{continue, NewData}
end.
evict_sessions(#{donors := DonorNodes, recipients := RecipientNodes, opts := Opts} = Data) ->
DonorNodeCounts = multicall(DonorNodes, {?MODULE, disconnected_session_count, []}),
{_, DonorCounts} = lists:unzip(DonorNodeCounts),
RecipientNodeCounts = multicall(RecipientNodes, {?MODULE, disconnected_session_count, []}),
{_, RecipientCounts} = lists:unzip(RecipientNodeCounts),
DonorAvg = avg(DonorCounts),
RecipientAvg = avg(RecipientCounts),
Thresholds = thresholds(sess, Opts),
NewData = Data#{donor_sess_avg => DonorAvg,
recipient_sess_avg => RecipientAvg,
donor_sess_counts => maps:from_list(DonorNodeCounts),
recipient_sess_counts => maps:from_list(RecipientNodeCounts)},
case within_thresholds(DonorAvg, RecipientAvg, Thresholds) of
true -> ok;
false ->
SessEvictRate = maps:get(sess_evict_rate, Opts),
NodesToEvict = nodes_to_evict(RecipientAvg, DonorNodeCounts),
?LOG(warning, "Node rebalance, evict_sessions, nodes=~p, counts=~p",
[NodesToEvict, SessEvictRate]),
_ = multicall(NodesToEvict,
{emqx_eviction_agent,
evict_sessions,
[SessEvictRate, RecipientNodes, disconnected]}),
{continue, NewData}
end.
need_rebalance([] = _DonorNodes, _RecipientNodes, _ConnCounts, _SessCounts, _Opts) -> false;
need_rebalance(_DonorNodes, [] = _RecipientNodes, _ConnCounts, _SessCounts, _Opts) -> false;
need_rebalance(DonorNodes, RecipientNodes, ConnCounts, SessCounts, Opts) ->
DonorConnAvg = avg_for_nodes(DonorNodes, ConnCounts),
RecipientConnAvg = avg_for_nodes(RecipientNodes, ConnCounts),
DonorSessAvg = avg_for_nodes(DonorNodes, SessCounts),
RecipientSessAvg = avg_for_nodes(RecipientNodes, SessCounts),
Result = (not within_thresholds(DonorConnAvg, RecipientConnAvg, thresholds(conn, Opts)))
orelse (not within_thresholds(DonorSessAvg, RecipientSessAvg, thresholds(sess, Opts))),
?tp(debug, emqx_node_rebalance_need_rebalance,
#{donors => DonorNodes,
recipients => RecipientNodes,
conn_counts => ConnCounts,
sess_counts => SessCounts,
opts => Opts,
result => Result
}),
Result.
avg_for_nodes(Nodes, Counts) ->
avg(maps:values(maps:with(Nodes, maps:from_list(Counts)))).
within_thresholds(Value, GoalValue, {AbsThres, RelThres}) ->
(Value =< GoalValue + AbsThres) orelse (Value =< GoalValue * RelThres).
thresholds(conn, #{abs_conn_threshold := Abs, rel_conn_threshold := Rel}) ->
{Abs, Rel};
thresholds(sess, #{abs_sess_threshold := Abs, rel_sess_threshold := Rel}) ->
{Abs, Rel}.
nodes_to_evict(Goal, NodeCounts) ->
{Nodes, _} = lists:unzip(
lists:filter(
fun({_Node, Count}) ->
Count > Goal
end,
NodeCounts)),
Nodes.
get_stats(disabled, _Data) -> #{};
get_stats(_State, Data) -> Data.
avg(List) when length(List) >= 1 ->
lists:sum(List) / length(List).
multicall(Nodes, {M, F, A}) ->
case rpc:multicall(Nodes, M, F, A) of
{Results, []} ->
case lists:partition(fun is_ok/1, lists:zip(Nodes, Results)) of
{OkResults, []} ->
[{Node, ok_result(Result)} || {Node, Result} <- OkResults];
{_, BadResults} ->
error({bad_nodes, BadResults})
end;
{_, [_BadNode | _] = BadNodes} ->
error({bad_nodes, BadNodes})
end.
is_ok({_Node, {ok, _}}) -> true;
is_ok({_Node, ok}) -> true;
is_ok(_) -> false.
ok_result({ok, Result}) -> Result;
ok_result(ok) -> ok.
connection_count() ->
{ok, emqx_eviction_agent:connection_count()}.
session_count() ->
{ok, emqx_eviction_agent:session_count()}.
disconnected_session_count() ->
{ok, emqx_eviction_agent:session_count(disconnected)}.
default_opts() ->
#{
conn_evict_rate => ?DEFAULT_CONN_EVICT_RATE,
abs_conn_threshold => ?DEFAULT_ABS_CONN_THRESHOLD,
rel_conn_threshold => ?DEFAULT_REL_CONN_THRESHOLD,
sess_evict_rate => ?DEFAULT_SESS_EVICT_RATE,
abs_sess_threshold => ?DEFAULT_ABS_SESS_THRESHOLD,
rel_sess_threshold => ?DEFAULT_REL_SESS_THRESHOLD,
wait_health_check => ?DEFAULT_WAIT_HEALTH_CHECK,
wait_takeover => ?DEFAULT_WAIT_TAKEOVER,
evict_interval => ?EVICT_INTERVAL,
nodes => all_nodes()
}.
deinit(Data) ->
Keys = [recipient_conn_avg, recipient_sess_avg, donor_conn_avg, donor_sess_avg,
recipient_conn_counts, recipient_sess_counts, donor_conn_counts, donor_sess_counts,
initial_conn_counts, initial_sess_counts,
opts],
maps:without(Keys, Data).
is_node_available() ->
true = is_pid(whereis(emqx_node_rebalance_agent)),
disabled = emqx_eviction_agent:status(),
node().
all_nodes() ->
ekka_mnesia:cluster_nodes(all).
seconds(Sec) ->
round(timer:seconds(Sec)).

View File

@ -0,0 +1,127 @@
%%--------------------------------------------------------------------
%% Copyright (c) 2022 EMQ Technologies Co., Ltd. All Rights Reserved.
%%
%% Licensed under the Apache License, Version 2.0 (the "License");
%% you may not use this file except in compliance with the License.
%% You may obtain a copy of the License at
%%
%% http://www.apache.org/licenses/LICENSE-2.0
%%
%% Unless required by applicable law or agreed to in writing, software
%% distributed under the License is distributed on an "AS IS" BASIS,
%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
%% See the License for the specific language governing permissions and
%% limitations under the License.
%%--------------------------------------------------------------------
-module(emqx_node_rebalance_agent).
-include_lib("emqx/include/emqx_mqtt.hrl").
-include_lib("emqx/include/logger.hrl").
-include_lib("emqx/include/types.hrl").
-include_lib("stdlib/include/qlc.hrl").
-include_lib("snabbkaffe/include/snabbkaffe.hrl").
-export([start_link/0,
enable/1,
disable/1,
status/0
]).
-export([init/1,
handle_call/3,
handle_info/2,
handle_cast/2,
code_change/3
]).
-define(ENABLE_KIND, emqx_node_rebalance).
%%--------------------------------------------------------------------
%% APIs
%%--------------------------------------------------------------------
-type status() :: {enabled, pid()} | disabled.
-spec start_link() -> startlink_ret().
start_link() ->
gen_server:start_link({local, ?MODULE}, ?MODULE, [], []).
-spec enable(pid()) -> ok_or_error(already_enabled | eviction_agent_busy).
enable(CoordinatorPid) ->
gen_server:call(?MODULE, {enable, CoordinatorPid}).
-spec disable(pid()) -> ok_or_error(already_disabled | invalid_coordinator).
disable(CoordinatorPid) ->
gen_server:call(?MODULE, {disable, CoordinatorPid}).
-spec status() -> status().
status() ->
gen_server:call(?MODULE, status).
%%--------------------------------------------------------------------
%% gen_server callbacks
%%--------------------------------------------------------------------
init([]) ->
{ok, #{}}.
handle_call({enable, CoordinatorPid}, _From, St) ->
case St of
#{coordinator_pid := _Pid} ->
{reply, {error, already_enabled}, St};
_ ->
true = link(CoordinatorPid),
EvictionAgentPid = whereis(emqx_eviction_agent),
true = link(EvictionAgentPid),
case emqx_eviction_agent:enable(?ENABLE_KIND, undefined) of
ok ->
{reply, ok, #{coordinator_pid => CoordinatorPid,
eviction_agent_pid => EvictionAgentPid}};
{error, eviction_agent_busy} ->
true = unlink(EvictionAgentPid),
true = unlink(CoordinatorPid),
{reply, {error, eviction_agent_busy}, St}
end
end;
handle_call({disable, CoordinatorPid}, _From, St) ->
case St of
#{coordinator_pid := CoordinatorPid,
eviction_agent_pid := EvictionAgentPid} ->
_ = emqx_eviction_agent:disable(?ENABLE_KIND),
true = unlink(EvictionAgentPid),
true = unlink(CoordinatorPid),
NewSt = maps:without(
[coordinator_pid, eviction_agent_pid],
St),
{reply, ok, NewSt};
#{coordinator_pid := _CoordinatorPid} ->
{reply, {error, invalid_coordinator}, St};
#{} ->
{reply, {error, already_disabled}, St}
end;
handle_call(status, _From, St) ->
case St of
#{coordinator_pid := Pid} ->
{reply, {enabled, Pid}, St};
_ ->
{reply, disabled, St}
end;
handle_call(Msg, _From, St) ->
?LOG(warning, "Unknown call: ~p, State: ~p", [Msg, St]),
{reply, ignored, St}.
handle_info(Msg, St) ->
?LOG(warning, "Unknown Msg: ~p, State: ~p", [Msg, St]),
{noreply, St}.
handle_cast(Msg, St) ->
?LOG(warning, "Unknown cast Msg: ~p, State: ~p", [Msg, St]),
{noreply, St}.
code_change(_Vsn, State, _Extra) ->
{ok, State}.

View File

@ -0,0 +1,243 @@
%%--------------------------------------------------------------------
%% Copyright (c) 2022 EMQ Technologies Co., Ltd. All Rights Reserved.
%%
%% Licensed under the Apache License, Version 2.0 (the "License");
%% you may not use this file except in compliance with the License.
%% You may obtain a copy of the License at
%%
%% http://www.apache.org/licenses/LICENSE-2.0
%%
%% Unless required by applicable law or agreed to in writing, software
%% distributed under the License is distributed on an "AS IS" BASIS,
%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
%% See the License for the specific language governing permissions and
%% limitations under the License.
%%--------------------------------------------------------------------
-module(emqx_node_rebalance_api).
-import(minirest, [return/1]).
-rest_api(#{name => load_rebalance_status,
method => 'GET',
path => "/load_rebalance/status",
func => status,
descr => "Get load rebalance status"}).
-rest_api(#{name => load_rebalance_global_status,
method => 'GET',
path => "/load_rebalance/global_status",
func => global_status,
descr => "Get status of all rebalance/evacuation processes across the cluster"}).
-rest_api(#{name => load_rebalance_availability_check,
method => 'GET',
path => "/load_rebalance/availability_check",
func => availability_check,
descr => "Node rebalance availability check"}).
-rest_api(#{name => load_rebalance_start,
method => 'POST',
path => "/load_rebalance/:bin:node/start",
func => rebalance_start,
descr => "Start rebalancing with the node as coordinator"}).
-rest_api(#{name => load_rebalance_stop,
method => 'POST',
path => "/load_rebalance/:bin:node/stop",
func => rebalance_stop,
descr => "Stop rebalancing coordinated by the node"}).
-rest_api(#{name => load_rebalance_evacuation_start,
method => 'POST',
path => "/load_rebalance/:bin:node/evacuation/start",
func => rebalance_evacuation_start,
descr => "Start evacuation on a node "}).
-rest_api(#{name => load_rebalance_evacuation_stop,
method => 'POST',
path => "/load_rebalance/:bin:node/evacuation/stop",
func => rebalance_evacuation_stop,
descr => "Stop evacuation on the node"}).
-export([status/2,
availability_check/2,
global_status/2,
rebalance_evacuation_start/2,
rebalance_evacuation_stop/2,
rebalance_start/2,
rebalance_stop/2
]).
status(_Bindings, _Params) ->
case emqx_node_rebalance_status:local_status() of
disabled ->
{ok, #{status => disabled}};
{rebalance, Stats} ->
{ok, format_status(rebalance, Stats)};
{evacuation, Stats} ->
{ok, format_status(evacuation, Stats)}
end.
global_status(_Bindings, _Params) ->
#{evacuations := Evacuations,
rebalances := Rebalances} = emqx_node_rebalance_status:global_status(),
{ok, #{evacuations => maps:from_list(Evacuations),
rebalances => maps:from_list(Rebalances)}}.
availability_check(_Bindings, _Params) ->
case emqx_eviction_agent:status() of
disabled ->
{200, #{}};
{enabled, _Stats} ->
{503, #{}}
end.
rebalance_evacuation_start(#{node := NodeBin}, Params) ->
validated(
fun() ->
{Node, Opts} = validate_evacuation(NodeBin, params(Params)),
rpc(Node, emqx_node_rebalance_evacuation, start, [Opts])
end).
rebalance_evacuation_stop(#{node := NodeBin}, _Params) ->
validated(
fun() ->
Node = parse_node(NodeBin),
rpc(Node, emqx_node_rebalance_evacuation, stop, [])
end).
rebalance_start(#{node := NodeBin}, Params) ->
validated(
fun() ->
{Node, Opts} = validate_rebalance(NodeBin, params(Params)),
rpc(Node, emqx_node_rebalance, start, [Opts])
end).
rebalance_stop(#{node := NodeBin}, _Params) ->
validated(
fun() ->
Node = parse_node(NodeBin),
rpc(Node, emqx_node_rebalance, stop, [])
end).
rpc(Node, M, F, A) ->
case rpc:call(Node, M, F, A) of
ok -> return({ok, []});
{error, Error} ->
return({error, 400, io_lib:format("~p", [Error])});
{badrpc, _} ->
return({error, 400, io_lib:format("Error communicating with node ~p", [Node])});
Unknown ->
return({error, 400, io_lib:format("Unrecognized rpc result from node ~p: ~p",
[Node, Unknown])})
end.
format_status(Process, Stats) ->
Stats#{process => Process, status => enabled}.
validate_evacuation(Node, Params) ->
NodeToEvacuate = parse_node(Node),
OptList = lists:map(
fun validate_evacuation_param/1,
Params),
{NodeToEvacuate, maps:from_list(OptList)}.
validate_rebalance(Node, Params) ->
CoordinatorNode = parse_node(Node),
OptList = lists:map(
fun validate_rebalance_param/1,
Params),
{CoordinatorNode, maps:from_list(OptList)}.
validate_evacuation_param({<<"conn_evict_rate">>, Value}) ->
validate_pos_int(conn_evict_rate, Value);
validate_evacuation_param({<<"sess_evict_rate">>, Value}) ->
validate_pos_int(sess_evict_rate, Value);
validate_evacuation_param({<<"redirect_to">>, Value}) ->
validate_binary(server_reference, Value);
validate_evacuation_param({<<"wait_takeover">>, Value}) ->
validate_pos_int(wait_takeover, Value);
validate_evacuation_param({<<"migrate_to">>, Value}) ->
validate_nodes(migrate_to, Value);
validate_evacuation_param(Value) ->
validation_error(io_lib:format("Unknown evacuation param: ~p", [Value])).
validate_rebalance_param({<<"wait_health_check">>, Value}) ->
validate_pos_int(wait_health_check, Value);
validate_rebalance_param({<<"conn_evict_rate">>, Value}) ->
validate_pos_int(conn_evict_rate, Value);
validate_rebalance_param({<<"sess_evict_rate">>, Value}) ->
validate_pos_int(sess_evict_rate, Value);
validate_rebalance_param({<<"abs_conn_threshold">>, Value}) ->
validate_pos_int(abs_conn_threshold, Value);
validate_rebalance_param({<<"rel_conn_threshold">>, Value}) ->
validate_fraction(rel_conn_threshold, Value);
validate_rebalance_param({<<"abs_sess_threshold">>, Value}) ->
validate_pos_int(abs_sess_threshold, Value);
validate_rebalance_param({<<"rel_sess_threshold">>, Value}) ->
validate_fraction(rel_sess_threshold, Value);
validate_rebalance_param({<<"wait_takeover">>, Value}) ->
validate_pos_int(wait_takeover, Value);
validate_rebalance_param({<<"nodes">>, Value}) ->
validate_nodes(nodes, Value);
validate_rebalance_param(Value) ->
validation_error(io_lib:format("Unknown rebalance param: ~p", [Value])).
validate_binary(Name, Value) when is_binary(Value) ->
{Name, Value};
validate_binary(Name, _Value) ->
validation_error("invalid string in " ++ atom_to_list(Name)).
validate_pos_int(Name, Value) ->
case is_integer(Value) andalso Value > 0 of
true -> {Name, Value};
false ->
validation_error("invalid " ++ atom_to_list(Name) ++ " value")
end.
validate_fraction(Name, Value) ->
case is_number(Value) andalso Value > 1.0 of
true -> {Name, Value};
false ->
validation_error("invalid " ++ atom_to_list(Name) ++ " value")
end.
validate_nodes(Name, NodeList) when is_list(NodeList) ->
Nodes = lists:map(
fun parse_node/1,
NodeList),
case emqx_node_rebalance_evacuation:available_nodes(Nodes) of
[] ->
validation_error(io_lib:format("no available nodes list in ~p: ~p", [Name, Nodes]));
Nodes ->
{Name, Nodes};
OtherNodes ->
validation_error(
io_lib:format("unavailable nodes in ~p: ~p",
[Name, Nodes -- OtherNodes]))
end;
validate_nodes(Name, Nodes) ->
validation_error(io_lib:format("invalid node list in ~p: ~p", [Name, Nodes])).
validated(Fun) ->
try
Fun()
catch throw:{validation_error, Error} ->
return({error, 400, iolist_to_binary(Error)})
end.
validation_error(Error) ->
throw({validation_error, Error}).
parse_node(Bin) when is_binary(Bin) ->
try
binary_to_existing_atom(Bin)
catch
error:badarg ->
validation_error("invalid node: " ++ [Bin])
end.
params([{}]) -> [];
params(Params) -> Params.

View File

@ -0,0 +1,34 @@
%%--------------------------------------------------------------------
%% Copyright (c) 2022 EMQ Technologies Co., Ltd. All Rights Reserved.
%%
%% Licensed under the Apache License, Version 2.0 (the "License");
%% you may not use this file except in compliance with the License.
%% You may obtain a copy of the License at
%%
%% http://www.apache.org/licenses/LICENSE-2.0
%%
%% Unless required by applicable law or agreed to in writing, software
%% distributed under the License is distributed on an "AS IS" BASIS,
%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
%% See the License for the specific language governing permissions and
%% limitations under the License.
%%--------------------------------------------------------------------
-module(emqx_node_rebalance_app).
-behaviour(application).
-emqx_plugin(?MODULE).
-export([ start/2
, stop/1
]).
start(_Type, _Args) ->
Env = application:get_all_env(emqx_node_rebalance),
{ok, Sup} = emqx_node_rebalance_sup:start_link(Env),
ok = emqx_node_rebalance_cli:load(),
{ok, Sup}.
stop(_State) ->
emqx_node_rebalance_cli:unload().

View File

@ -0,0 +1,265 @@
%%--------------------------------------------------------------------
%% Copyright (c) 2022 EMQ Technologies Co., Ltd. All Rights Reserved.
%%
%% Licensed under the Apache License, Version 2.0 (the "License");
%% you may not use this file except in compliance with the License.
%% You may obtain a copy of the License at
%%
%% http://www.apache.org/licenses/LICENSE-2.0
%%
%% Unless required by applicable law or agreed to in writing, software
%% distributed under the License is distributed on an "AS IS" BASIS,
%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
%% See the License for the specific language governing permissions and
%% limitations under the License.
%%--------------------------------------------------------------------
-module(emqx_node_rebalance_cli).
%% APIs
-export([ load/0
, unload/0
, cli/1
]).
load() ->
emqx_ctl:register_command(rebalance, {?MODULE, cli}, []).
unload() ->
emqx_ctl:unregister_command(rebalance).
cli(["start" | StartArgs]) ->
case start_args(StartArgs) of
{evacuation, Opts} ->
case emqx_node_rebalance_evacuation:status() of
disabled ->
ok = emqx_node_rebalance_evacuation:start(Opts),
emqx_ctl:print("Rebalance(evacuation) started~n"),
true;
{enabled, _} ->
emqx_ctl:print("Rebalance is already enabled~n"),
false
end;
{rebalance, Opts} ->
case emqx_node_rebalance:start(Opts) of
ok ->
emqx_ctl:print("Rebalance started~n"),
true;
{error, Reason} ->
emqx_ctl:print("Rebalance start error: ~p~n", [Reason]),
false
end;
{error, Error} ->
emqx_ctl:print("Rebalance start error: ~s~n", [Error]),
false
end;
cli(["node-status", NodeStr]) ->
Node = list_to_atom(NodeStr),
node_status(emqx_node_rebalance_status:local_status(Node));
cli(["node-status"]) ->
node_status(emqx_node_rebalance_status:local_status());
cli(["status"]) ->
#{evacuations := Evacuations,
rebalances := Rebalances} = emqx_node_rebalance_status:global_status(),
lists:foreach(
fun({Node, Status}) ->
emqx_ctl:print("--------------------------------------------------------------------~n"),
emqx_ctl:print("Node ~p: evacuation~n~s",
[Node, emqx_node_rebalance_status:format_local_status(Status)])
end,
Evacuations),
lists:foreach(
fun({Node, Status}) ->
emqx_ctl:print("--------------------------------------------------------------------~n"),
emqx_ctl:print("Node ~p: rebalance coordinator~n~s",
[Node, emqx_node_rebalance_status:format_coordinator_status(Status)])
end,
Rebalances);
cli(["stop"]) ->
case emqx_node_rebalance_evacuation:status() of
{enabled, _} ->
ok = emqx_node_rebalance_evacuation:stop(),
emqx_ctl:print("Rebalance(evacuation) stopped~n"),
true;
disabled ->
case emqx_node_rebalance:status() of
{enabled, _} ->
ok = emqx_node_rebalance:stop(),
emqx_ctl:print("Rebalance stopped~n"),
true;
disabled ->
emqx_ctl:print("Rebalance is already disabled~n"),
false
end
end;
cli(_) ->
emqx_ctl:usage(
[{"rebalance start --evacuation \\\n"
" [--redirect-to \"Host1:Port1 Host2:Port2 ...\"] \\\n"
" [--conn-evict-rate CountPerSec] \\\n"
" [--migrate-to \"node1@host1 node2@host2 ...\"] \\\n"
" [--wait-takeover Secs] \\\n"
" [--sess-evict-rate CountPerSec]",
"Start current node evacuation with optional server redirect to the specified servers"},
{"rebalance start \\\n"
" [--nodes \"node1@host1 node2@host2\"] \\\n"
" [--wait-health-check Secs] \\\n"
" [--conn-evict-rate ConnPerSec] \\\n"
" [--abs-conn-threshold Count] \\\n"
" [--rel-conn-threshold Fraction] \\\n"
" [--conn-evict-rate ConnPerSec] \\\n"
" [--wait-takeover Secs] \\\n"
" [--sess-evict-rate CountPerSec] \\\n"
" [--abs-sess-threshold Count] \\\n"
" [--rel-sess-threshold Fraction]",
"Start current node evacuation with optional server redirect to the specified servers"},
{"rebalance node-status",
"Get current node rebalance status"},
{"rebalance node-status \"node1@host1\"",
"Get remote node rebalance status"},
{"rebalance status",
"Get statuses of all current rebalance/evacuation processes across the cluster"},
{"rebalance stop",
"Stop node rebalance"}]).
node_status(NodeStatus) ->
case NodeStatus of
{Process, Status} when Process =:= evacuation orelse Process =:= rebalance ->
emqx_ctl:print("Rebalance type: ~p~n~s~n",
[Process, emqx_node_rebalance_status:format_local_status(Status)]);
disabled ->
emqx_ctl:print("Rebalance disabled~n");
Other ->
emqx_ctl:print("Error detecting rebalance status: ~p~n", [Other])
end.
start_args(Args) ->
case collect_args(Args, #{}) of
{ok, #{"--evacuation" := true} = Collected} ->
case validate_evacuation(maps:to_list(Collected), #{}) of
{ok, Validated} ->
{evacuation, Validated};
{error, _} = Error -> Error
end;
{ok, #{} = Collected} ->
case validate_rebalance(maps:to_list(Collected), #{}) of
{ok, Validated} ->
{rebalance, Validated};
{error, _} = Error -> Error
end;
{error, _} = Error -> Error
end.
collect_args([], Map) -> {ok, Map};
%% evacuation
collect_args(["--evacuation" | Args], Map) ->
collect_args(Args, Map#{"--evacuation" => true});
collect_args(["--redirect-to", ServerReference | Args], Map) ->
collect_args(Args, Map#{"--redirect-to" => ServerReference});
collect_args(["--migrate-to", MigrateTo | Args], Map) ->
collect_args(Args, Map#{"--migrate-to" => MigrateTo});
%% rebalance
collect_args(["--nodes", Nodes | Args], Map) ->
collect_args(Args, Map#{"--nodes" => Nodes});
collect_args(["--wait-health-check", WaitHealthCheck | Args], Map) ->
collect_args(Args, Map#{"--wait-health-check" => WaitHealthCheck});
collect_args(["--abs-conn-threshold", AbsConnThres | Args], Map) ->
collect_args(Args, Map#{"--abs-conn-threshold" => AbsConnThres});
collect_args(["--rel-conn-threshold", RelConnThres | Args], Map) ->
collect_args(Args, Map#{"--rel-conn-threshold" => RelConnThres});
collect_args(["--abs-sess-threshold", AbsSessThres | Args], Map) ->
collect_args(Args, Map#{"--abs-sess-threshold" => AbsSessThres});
collect_args(["--rel-sess-threshold", RelSessThres | Args], Map) ->
collect_args(Args, Map#{"--rel-sess-threshold" => RelSessThres});
%% common
collect_args(["--conn-evict-rate", ConnEvictRate | Args], Map) ->
collect_args(Args, Map#{"--conn-evict-rate" => ConnEvictRate});
collect_args(["--wait-takeover", WaitTakeover | Args], Map) ->
collect_args(Args, Map#{"--wait-takeover" => WaitTakeover});
collect_args(["--sess-evict-rate", SessEvictRate | Args], Map) ->
collect_args(Args, Map#{"--sess-evict-rate" => SessEvictRate});
%% fallback
collect_args(Args, _Map) ->
{error, io_lib:format("unknown arguments: ~p", [Args])}.
validate_evacuation([], Map) ->
{ok, Map};
validate_evacuation([{"--evacuation", _} | Rest], Map) ->
validate_evacuation(Rest, Map);
validate_evacuation([{"--redirect-to", ServerReference} | Rest], Map) ->
validate_evacuation(Rest, Map#{server_reference => list_to_binary(ServerReference)});
validate_evacuation([{"--conn-evict-rate", _} | _] = Opts, Map) ->
validate_pos_int(conn_evict_rate, Opts, Map, fun validate_evacuation/2);
validate_evacuation([{"--sess-evict-rate", _} | _] = Opts, Map) ->
validate_pos_int(sess_evict_rate, Opts, Map, fun validate_evacuation/2);
validate_evacuation([{"--wait-takeover", _} | _] = Opts, Map) ->
validate_pos_int(wait_takeover, Opts, Map, fun validate_evacuation/2);
validate_evacuation([{"--migrate-to", MigrateTo} | Rest], Map) ->
Nodes = lists:map(fun list_to_atom/1, string:tokens(MigrateTo, ", ")),
case emqx_node_rebalance_evacuation:available_nodes(Nodes) of
[] ->
{error, "invalid --migrate-to, no nodes"};
Nodes ->
validate_evacuation(Rest, Map#{migrate_to => Nodes});
OtherNodes ->
{error,
io_lib:format("invalid --migrate-to, unavailable nodes: ~p",
[Nodes -- OtherNodes])}
end;
validate_evacuation(Rest, _Map) ->
{error, io_lib:format("unknown evacuation arguments: ~p", [Rest])}.
validate_rebalance([], Map) ->
{ok, Map};
validate_rebalance([{"--wait-health-check", _} | _] = Opts, Map) ->
validate_pos_int(wait_health_check, Opts, Map, fun validate_rebalance/2);
validate_rebalance([{"--conn-evict-rate", _} | _] = Opts, Map) ->
validate_pos_int(conn_evict_rate, Opts, Map, fun validate_rebalance/2);
validate_rebalance([{"--sess-evict-rate", _} | _] = Opts, Map) ->
validate_pos_int(sess_evict_rate, Opts, Map, fun validate_rebalance/2);
validate_rebalance([{"--abs-conn-threshold", _} | _] = Opts, Map) ->
validate_pos_int(abs_conn_threshold, Opts, Map, fun validate_rebalance/2);
validate_rebalance([{"--rel-conn-threshold", _} | _] = Opts, Map) ->
validate_fraction(rel_conn_threshold, Opts, Map, fun validate_rebalance/2);
validate_rebalance([{"--abs-sess-threshold", _} | _] = Opts, Map) ->
validate_pos_int(abs_sess_threshold, Opts, Map, fun validate_rebalance/2);
validate_rebalance([{"--rel-sess-threshold", _} | _] = Opts, Map) ->
validate_fraction(rel_sess_threshold, Opts, Map, fun validate_rebalance/2);
validate_rebalance([{"--wait-takeover", _} | _] = Opts, Map) ->
validate_pos_int(wait_takeover, Opts, Map, fun validate_rebalance/2);
validate_rebalance([{"--nodes", NodeStr} | Rest], Map) ->
Nodes = lists:map(fun list_to_atom/1, string:tokens(NodeStr, ", ")),
case emqx_node_rebalance:available_nodes(Nodes) of
[] ->
{error, "invalid --nodes, no nodes"};
Nodes ->
validate_rebalance(Rest, Map#{nodes => Nodes});
OtherNodes ->
{error,
io_lib:format("invalid --nodes, unavailable nodes: ~p",
[Nodes -- OtherNodes])}
end;
validate_rebalance(Rest, _Map) ->
{error, io_lib:format("unknown rebalance arguments: ~p", [Rest])}.
validate_fraction(Name, [{OptionName, Value} | Rest], Map, Next) ->
case string:to_float(Value) of
{Num, ""} when Num > 1.0 ->
Next(Rest, Map#{Name => Num});
_ ->
{error, "invalid " ++ OptionName ++ " value"}
end.
validate_pos_int(Name, [{OptionName, Value} | Rest], Map, Next) ->
case string:to_integer(Value) of
{Int, ""} when Int > 0 ->
Next(Rest, Map#{Name => Int});
_ ->
{error, "invalid " ++ OptionName ++ " value"}
end.

View File

@ -0,0 +1,298 @@
%%--------------------------------------------------------------------
%% Copyright (c) 2022 EMQ Technologies Co., Ltd. All Rights Reserved.
%%
%% Licensed under the Apache License, Version 2.0 (the "License");
%% you may not use this file except in compliance with the License.
%% You may obtain a copy of the License at
%%
%% http://www.apache.org/licenses/LICENSE-2.0
%%
%% Unless required by applicable law or agreed to in writing, software
%% distributed under the License is distributed on an "AS IS" BASIS,
%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
%% See the License for the specific language governing permissions and
%% limitations under the License.
%%--------------------------------------------------------------------
-module(emqx_node_rebalance_evacuation).
-include("emqx_node_rebalance.hrl").
-include_lib("emqx/include/logger.hrl").
-include_lib("emqx/include/types.hrl").
-include_lib("snabbkaffe/include/snabbkaffe.hrl").
-export([start/1,
status/0,
stop/0
]).
-export([start_link/0]).
-behavior(gen_statem).
-export([init/1,
callback_mode/0,
handle_event/4,
code_change/4
]).
-export([is_node_available/0,
available_nodes/1]).
-ifdef(TEST).
-export([migrate_to/1]).
-endif.
%%--------------------------------------------------------------------
%% APIs
%%--------------------------------------------------------------------
-define(EVICT_INTERVAL_NO_NODES, 30000).
-type migrate_to() :: [node()] | undefined.
-type start_opts() :: #{server_reference => emqx_eviction_agent:server_reference(),
conn_evict_rate => pos_integer(),
sess_evict_rate => pos_integer(),
wait_takeover => pos_integer(),
migrate_to => migrate_to()
}.
-type start_error() :: already_started | eviction_agent_busy.
-type stats() :: #{
initial_conns := non_neg_integer(),
initial_sessions := non_neg_integer(),
current_conns := non_neg_integer(),
current_sessions := non_neg_integer(),
conn_evict_rate := pos_integer(),
sess_evict_rate := pos_integer(),
server_reference := emqx_eviction_agent:server_reference(),
migrate_to := migrate_to()
}.
-type status() :: {started, stats()} | stopped.
-spec start(start_opts()) -> ok_or_error(start_error()).
start(StartOpts) ->
Opts = maps:merge(default_opts(), StartOpts),
gen_statem:call(?MODULE, {start, Opts}).
-spec stop() -> ok_or_error(not_started).
stop() ->
gen_statem:call(?MODULE, stop).
-spec status() -> status().
status() ->
gen_statem:call(?MODULE, status).
-spec start_link() -> startlink_ret().
start_link() ->
gen_statem:start_link({local, ?MODULE}, ?MODULE, [], []).
-spec available_nodes(list(node())) -> list(node()).
available_nodes(Nodes) when is_list(Nodes) ->
{Available, _} = rpc:multicall(Nodes, ?MODULE, is_node_available, []),
lists:filter(fun is_atom/1, Available).
%%--------------------------------------------------------------------
%% gen_statem callbacks
%%--------------------------------------------------------------------
callback_mode() -> handle_event_function.
%% states: disabled, evicting_conns, waiting_takeover, evicting_sessions, prohibiting
init([]) ->
case emqx_node_rebalance_evacuation_persist:read(default_opts()) of
{ok, #{server_reference := ServerReference} = Opts} ->
?LOG(warning, "Restoring evacuation state: ~p", [Opts]),
case emqx_eviction_agent:enable(?MODULE, ServerReference) of
ok ->
Data = init_data(#{}, Opts),
ok = warn_enabled(),
{ok, evicting_conns, Data, [{state_timeout, 0, evict_conns}]};
{error, eviction_agent_busy} ->
emqx_node_rebalance_evacuation_persist:clear(),
{ok, disabled, #{}}
end;
none ->
{ok, disabled, #{}}
end.
%% start
handle_event({call, From},
{start, #{server_reference := ServerReference} = Opts},
disabled,
#{} = Data) ->
case emqx_eviction_agent:enable(?MODULE, ServerReference) of
ok ->
NewData = init_data(Data, Opts),
ok = emqx_node_rebalance_evacuation_persist:save(Opts),
?LOG(warning, "Node evacuation started"),
{next_state,
evicting_conns,
NewData,
[{state_timeout, 0, evict_conns},
{reply, From, ok}]};
{error, eviction_agent_busy} ->
{keep_state_and_data,
[{reply, From, {error, eviction_agent_busy}}]}
end;
handle_event({call, From}, {start, _Opts}, _State, #{}) ->
{keep_state_and_data,
[{reply, From, {error, already_started}}]};
%% stop
handle_event({call, From}, stop, disabled, #{}) ->
{keep_state_and_data,
[{reply, From, {error, not_started}}]};
handle_event({call, From}, stop, _State, Data) ->
ok = emqx_node_rebalance_evacuation_persist:clear(),
_ = emqx_eviction_agent:disable(?MODULE),
?LOG(warning, "Node evacuation stopped"),
{next_state,
disabled,
deinit(Data),
[{reply, From, ok}]};
%% status
handle_event({call, From}, status, disabled, #{}) ->
{keep_state_and_data,
[{reply, From, disabled}]};
handle_event({call, From}, status, State, #{migrate_to := MigrateTo} = Data) ->
Stats = maps:with(
[initial_conns, current_conns,
initial_sessions, current_sessions,
server_reference, conn_evict_rate, sess_evict_rate],
Data),
{keep_state_and_data,
[{reply, From, {enabled, Stats#{state => State, migrate_to => migrate_to(MigrateTo)}}}]};
%% conn eviction
handle_event(state_timeout,
evict_conns,
evicting_conns,
#{conn_evict_rate := ConnEvictRate,
wait_takeover := WaitTakeover} = Data) ->
case emqx_eviction_agent:status() of
{enabled, #{connections := Conns}} when Conns > 0 ->
ok = emqx_eviction_agent:evict_connections(ConnEvictRate),
?tp(debug, node_evacuation_evict_conn, #{conn_evict_rate => ConnEvictRate}),
?LOG(warning, "Node evacuation evict_conns, count=~p, conn_evict_rate=~p",
[Conns, ConnEvictRate]),
NewData = Data#{current_conns => Conns},
{keep_state,
NewData,
[{state_timeout, ?EVICT_INTERVAL, evict_conns}]};
{enabled, #{connections := 0}} ->
NewData = Data#{current_conns => 0},
?LOG(warning, "Node evacuation evict_conns over"),
{next_state,
waiting_takeover,
NewData,
[{state_timeout, timer:seconds(WaitTakeover), evict_sessions}]}
end;
handle_event(state_timeout,
evict_sessions,
waiting_takeover,
Data) ->
?LOG(warning, "Node evacuation wait_takeover over"),
{next_state,
evicting_sessions,
Data,
[{state_timeout, 0, evict_sessions}]};
%% session eviction
handle_event(state_timeout,
evict_sessions,
evicting_sessions,
#{sess_evict_rate := SessEvictRate,
migrate_to := MigrateTo,
current_sessions := CurrSessCount} = Data) ->
case emqx_eviction_agent:status() of
{enabled, #{sessions := SessCount}} when SessCount > 0 ->
case migrate_to(MigrateTo) of
[] ->
?LOG(warning,
"No nodes are available to evacuate sessions, session_count=~p",
[CurrSessCount]),
{keep_state_and_data,
[{state_timeout, ?EVICT_INTERVAL_NO_NODES, evict_sessions}]};
Nodes ->
ok = emqx_eviction_agent:evict_sessions(SessEvictRate, Nodes),
?LOG(warning, "Node evacuation evict_sessions, count=~p, sess_evict_rate=~p,"
"target_nodes=~p", [SessCount, SessEvictRate, Nodes]),
NewData = Data#{current_sessions => SessCount},
{keep_state,
NewData,
[{state_timeout, ?EVICT_INTERVAL, evict_sessions}]}
end;
{enabled, #{sessions := 0}} ->
?tp(debug, node_evacuation_evict_sess_over, #{}),
?LOG(warning, "Node evacuation evict_sessions over"),
NewData = Data#{current_sessions => 0},
{next_state,
prohibiting,
NewData}
end;
handle_event({call, From}, Msg, State, Data) ->
?LOG(warning, "Unknown call: ~p, State: ~p, Data: ~p", [Msg, State, Data]),
{keep_state_and_data,
[{reply, From, ignored}]};
handle_event(info, Msg, State, Data) ->
?LOG(warning, "Unknown Msg: ~p, State: ~p, Data: ~p", [Msg, State, Data]),
keep_state_and_data;
handle_event(cast, Msg, State, Data) ->
?LOG(warning, "Unknown cast Msg: ~p, State: ~p, Data: ~p", [Msg, State, Data]),
keep_state_and_data.
code_change(_Vsn, State, Data, _Extra) ->
{ok, State, Data}.
%%--------------------------------------------------------------------
%% internal funs
%%--------------------------------------------------------------------
default_opts() ->
#{
server_reference => undefined,
conn_evict_rate => ?DEFAULT_CONN_EVICT_RATE,
sess_evict_rate => ?DEFAULT_SESS_EVICT_RATE,
wait_takeover => ?DEFAULT_WAIT_TAKEOVER,
migrate_to => undefined
}.
init_data(Data0, Opts) ->
Data1 = maps:merge(Data0, Opts),
{enabled, #{connections := ConnCount, sessions := SessCount}} = emqx_eviction_agent:status(),
Data1#{
initial_conns => ConnCount,
current_conns => ConnCount,
initial_sessions => SessCount,
current_sessions => SessCount
}.
deinit(Data) ->
Keys = [initial_conns, current_conns, initial_sessions, current_sessions]
++ maps:keys(default_opts()),
maps:without(Keys, Data).
warn_enabled() ->
Msg = "Node evacuation is enabled. The node will not receive connections.",
?LOG(warning, Msg),
io:format(standard_error, "~s~n", [Msg]).
migrate_to(undefined) ->
migrate_to(all_nodes());
migrate_to(Nodes) when is_list(Nodes) ->
available_nodes(Nodes).
is_node_available() ->
disabled = emqx_eviction_agent:status(),
node().
all_nodes() ->
ekka_mnesia:cluster_nodes(all) -- [node()].

View File

@ -0,0 +1,109 @@
%%--------------------------------------------------------------------
%% Copyright (c) 2022 EMQ Technologies Co., Ltd. All Rights Reserved.
%%
%% Licensed under the Apache License, Version 2.0 (the "License");
%% you may not use this file except in compliance with the License.
%% You may obtain a copy of the License at
%%
%% http://www.apache.org/licenses/LICENSE-2.0
%%
%% Unless required by applicable law or agreed to in writing, software
%% distributed under the License is distributed on an "AS IS" BASIS,
%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
%% See the License for the specific language governing permissions and
%% limitations under the License.
%%--------------------------------------------------------------------
-module(emqx_node_rebalance_evacuation_persist).
-export([save/1,
clear/0,
read/1]).
-ifdef(TEST).
-export([evacuation_filepath/0]).
-endif.
-include("emqx_node_rebalance.hrl").
-include_lib("emqx/include/types.hrl").
%%--------------------------------------------------------------------
%% APIs
%%--------------------------------------------------------------------
%% do not persist `migrate_to`:
%% * after restart there is nothing to migrate
%% * this value may be invalid after node was offline
-type start_opts() :: #{server_reference => emqx_eviction_agent:server_reference(),
conn_evict_rate => pos_integer(),
sess_evict_rate => pos_integer(),
wait_takeover => pos_integer()
}.
-spec save(start_opts()) -> ok_or_error(term()).
save(#{server_reference := ServerReference,
conn_evict_rate := ConnEvictRate,
sess_evict_rate := SessEvictRate,
wait_takeover := WaitTakeover} = Data)
when (is_binary(ServerReference) orelse ServerReference =:= undefined) andalso
is_integer(ConnEvictRate) andalso ConnEvictRate > 0 andalso
is_integer(SessEvictRate) andalso SessEvictRate > 0 andalso
is_integer(WaitTakeover) andalso WaitTakeover >= 0 ->
Filepath = evacuation_filepath(),
case filelib:ensure_dir(Filepath) of
ok ->
JsonData = emqx_json:encode(
prepare_for_encode(maps:with(persist_keys(), Data)),
[pretty]),
file:write_file(Filepath, JsonData);
{error, _} = Error -> Error
end.
-spec clear() -> ok.
clear() ->
file:delete(evacuation_filepath()).
-spec read(start_opts()) -> {ok, start_opts()} | none.
read(DefaultOpts) ->
case file:read_file(evacuation_filepath()) of
{ok, Data} ->
case emqx_json:safe_decode(Data, [return_maps]) of
{ok, Map} when is_map(Map) ->
{ok, map_to_opts(DefaultOpts, Map)};
_NotAMap ->
{ok, DefaultOpts}
end;
{error, _} ->
none
end.
%%--------------------------------------------------------------------
%% Internal funcs
%%--------------------------------------------------------------------
persist_keys() ->
[server_reference,
conn_evict_rate,
sess_evict_rate,
wait_takeover].
prepare_for_encode(#{server_reference := undefined} = Data) ->
Data#{server_reference => null};
prepare_for_encode(Data) -> Data.
format_after_decode(#{server_reference := null} = Data) ->
Data#{server_reference => undefined};
format_after_decode(Data) -> Data.
map_to_opts(DefaultOpts, Map) ->
format_after_decode(
map_to_opts(
maps:to_list(DefaultOpts), Map, #{})).
map_to_opts([], _Map, Opts) -> Opts;
map_to_opts([{Key, DefaultVal} | Rest], Map, Opts) ->
map_to_opts(Rest, Map, Opts#{Key => maps:get(atom_to_binary(Key), Map, DefaultVal)}).
evacuation_filepath() ->
filename:join([emqx:get_env(data_dir), ?EVACUATION_FILENAME]).

View File

@ -0,0 +1,225 @@
%%--------------------------------------------------------------------
%% Copyright (c) 2022 EMQ Technologies Co., Ltd. All Rights Reserved.
%%
%% Licensed under the Apache License, Version 2.0 (the "License");
%% you may not use this file except in compliance with the License.
%% You may obtain a copy of the License at
%%
%% http://www.apache.org/licenses/LICENSE-2.0
%%
%% Unless required by applicable law or agreed to in writing, software
%% distributed under the License is distributed on an "AS IS" BASIS,
%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
%% See the License for the specific language governing permissions and
%% limitations under the License.
%%--------------------------------------------------------------------
-module(emqx_node_rebalance_status).
-export([local_status/0,
local_status/1,
global_status/0,
format_local_status/1,
format_coordinator_status/1]).
%% For RPC
-export([evacuation_status/0,
rebalance_status/0]).
%%--------------------------------------------------------------------
%% APIs
%%--------------------------------------------------------------------
-spec local_status() -> disabled | {evacuation, map()} | {rebalance, map()}.
local_status() ->
case emqx_node_rebalance_evacuation:status() of
{enabled, Status} ->
{evacuation, evacuation(Status)};
disabled ->
case emqx_node_rebalance_agent:status() of
{enabled, CoordinatorPid} ->
case emqx_node_rebalance:status(CoordinatorPid) of
{enabled, Status} ->
local_rebalance(Status, node());
disabled ->
disabled
end;
disabled ->
disabled
end
end.
-spec local_status(node()) -> disabled | {evacuation, map()} | {rebalance, map()}.
local_status(Node) ->
rpc:call(Node, ?MODULE, ?FUNCTION_NAME, []).
-spec format_local_status(map()) -> iodata().
format_local_status(Status) ->
format_status(Status, local_status_field_format_order()).
-spec global_status() -> #{rebalances := [{node(), map()}], evacuations := [{node(), map()}]}.
global_status() ->
Nodes = ekka_mnesia:cluster_nodes(all),
{RebalanceResults, _} = rpc:multicall(Nodes, ?MODULE, rebalance_status, []),
Rebalances = [{Node, coordinator_rebalance(Status)} || {Node, {enabled, Status}} <- RebalanceResults],
{EvacuatioResults, _} = rpc:multicall(Nodes, ?MODULE, evacuation_status, []),
Evacuations = [{Node, evacuation(Status)} || {Node, {enabled, Status}} <- EvacuatioResults],
#{rebalances => Rebalances, evacuations => Evacuations}.
-spec format_coordinator_status(map()) -> iodata().
format_coordinator_status(Status) ->
format_status(Status, coordinator_status_field_format_order()).
%%--------------------------------------------------------------------
%% Internal functions
%%--------------------------------------------------------------------
evacuation(Status) ->
#{
state => maps:get(state, Status),
connection_eviction_rate => maps:get(conn_evict_rate, Status),
session_eviction_rate => maps:get(sess_evict_rate, Status),
connection_goal => 0,
session_goal => 0,
session_recipients => maps:get(migrate_to, Status),
stats => #{
initial_connected => maps:get(initial_conns, Status),
current_connected => maps:get(current_conns, Status),
initial_sessions => maps:get(initial_sessions, Status),
current_sessions => maps:get(current_sessions, Status)
}
}.
local_rebalance(#{donors := Donors} = Stats, Node) ->
case lists:member(Node, Donors) of
true -> {rebalance, donor_rebalance(Stats, Node)};
false -> disabled
end.
donor_rebalance(Status, Node) ->
Opts = maps:get(opts, Status),
InitialConnCounts = maps:get(initial_conn_counts, Status),
InitialSessCounts = maps:get(initial_sess_counts, Status),
CurrentStats = #{
initial_connected => maps:get(Node, InitialConnCounts),
initial_sessions => maps:get(Node, InitialSessCounts),
current_connected => emqx_eviction_agent:connection_count(),
current_sessions => emqx_eviction_agent:session_count(),
current_disconnected_sessions => emqx_eviction_agent:session_count(
disconnected)
},
maps:from_list(
[
{state, maps:get(state, Status)},
{coordinator_node, maps:get(coordinator_node, Status)},
{connection_eviction_rate, maps:get(conn_evict_rate, Opts)},
{session_eviction_rate, maps:get(sess_evict_rate, Opts)},
{recipients, maps:get(recipients, Status)},
{stats, CurrentStats}
] ++
[{connection_goal, maps:get(recipient_conn_avg, Status)}
|| maps:is_key(recipient_conn_avg, Status)
] ++
[{disconnected_session_goal, maps:get(recipient_sess_avg, Status)}
|| maps:is_key(recipient_sess_avg, Status)
]).
coordinator_rebalance(Status) ->
Opts = maps:get(opts, Status),
maps:from_list(
[
{state, maps:get(state, Status)},
{coordinator_node, maps:get(coordinator_node, Status)},
{connection_eviction_rate, maps:get(conn_evict_rate, Opts)},
{session_eviction_rate, maps:get(sess_evict_rate, Opts)},
{recipients, maps:get(recipients, Status)},
{donors, maps:get(donors, Status)}
] ++
[{connection_goal, maps:get(recipient_conn_avg, Status)}
|| maps:is_key(recipient_conn_avg, Status)
] ++
[{disconnected_session_goal, maps:get(recipient_sess_avg, Status)}
|| maps:is_key(recipient_sess_avg, Status)
] ++
[{donor_conn_avg, maps:get(donor_conn_avg, Status)}
|| maps:is_key(donor_conn_avg, Status)
] ++
[{donor_sess_avg, maps:get(donor_sess_avg, Status)}
|| maps:is_key(donor_sess_avg, Status)
]).
local_status_field_format_order() ->
[state,
coordinator_node,
connection_eviction_rate,
session_eviction_rate,
connection_goal,
session_goal,
disconnected_session_goal,
session_recipients,
recipients,
stats].
coordinator_status_field_format_order() ->
[state,
coordinator_node,
donors,
recipients,
connection_eviction_rate,
session_eviction_rate,
connection_goal,
disconnected_session_goal,
donor_conn_avg,
donor_sess_avg].
format_status(Status, FieldOrder) ->
Fields = lists:flatmap(
fun(FieldName) ->
maps:to_list(maps:with([FieldName], Status))
end,
FieldOrder),
lists:map(
fun format_local_status_field/1,
Fields).
format_local_status_field({state, State}) ->
io_lib:format("Rebalance state: ~p~n", [State]);
format_local_status_field({coordinator_node, Node}) ->
io_lib:format("Coordinator node: ~p~n", [Node]);
format_local_status_field({connection_eviction_rate, ConnEvictRate}) ->
io_lib:format("Connection eviction rate: ~p connections/second~n", [ConnEvictRate]);
format_local_status_field({session_eviction_rate, SessEvictRate}) ->
io_lib:format("Session eviction rate: ~p sessions/second~n", [SessEvictRate]);
format_local_status_field({connection_goal, ConnGoal}) ->
io_lib:format("Connection goal: ~p~n", [ConnGoal]);
format_local_status_field({session_goal, SessGoal}) ->
io_lib:format("Session goal: ~p~n", [SessGoal]);
format_local_status_field({disconnected_session_goal, DisconnSessGoal}) ->
io_lib:format("Disconnected session goal: ~p~n", [DisconnSessGoal]);
format_local_status_field({session_recipients, SessionRecipients}) ->
io_lib:format("Session recipient nodes: ~p~n", [SessionRecipients]);
format_local_status_field({recipients, Recipients}) ->
io_lib:format("Recipient nodes: ~p~n", [Recipients]);
format_local_status_field({donors, Donors}) ->
io_lib:format("Donor nodes: ~p~n", [Donors]);
format_local_status_field({donor_conn_avg, DonorConnAvg}) ->
io_lib:format("Current average donor node connection count: ~p~n", [DonorConnAvg]);
format_local_status_field({donor_sess_avg, DonorSessAvg}) ->
io_lib:format("Current average donor node disconnected session count: ~p~n", [DonorSessAvg]);
format_local_status_field({stats, Stats}) ->
format_local_stats(Stats).
format_local_stats(Stats) ->
["Channel statistics:\n" |
lists:map(
fun({Name, Value}) ->
io_lib:format(" ~p: ~p~n", [Name, Value])
end,
maps:to_list(Stats))].
evacuation_status() ->
{node(), emqx_node_rebalance_evacuation:status()}.
rebalance_status() ->
{node(), emqx_node_rebalance:status()}.

View File

@ -0,0 +1,44 @@
%%--------------------------------------------------------------------
%% Copyright (c) 2022 EMQ Technologies Co., Ltd. All Rights Reserved.
%%
%% Licensed under the Apache License, Version 2.0 (the "License");
%% you may not use this file except in compliance with the License.
%% You may obtain a copy of the License at
%%
%% http://www.apache.org/licenses/LICENSE-2.0
%%
%% Unless required by applicable law or agreed to in writing, software
%% distributed under the License is distributed on an "AS IS" BASIS,
%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
%% See the License for the specific language governing permissions and
%% limitations under the License.
%%--------------------------------------------------------------------
-module(emqx_node_rebalance_sup).
-behaviour(supervisor).
-export([start_link/1]).
-export([init/1]).
start_link(Env) ->
supervisor:start_link({local, ?MODULE}, ?MODULE, [Env]).
init([_Env]) ->
Childs = [child_spec(emqx_node_rebalance_evacuation, []),
child_spec(emqx_node_rebalance_agent, []),
child_spec(emqx_node_rebalance, [])],
{ok, {
{one_for_one, 10, 3600},
Childs}
}.
child_spec(Mod, Args) ->
#{id => Mod,
start => {Mod, start_link, Args},
restart => permanent,
shutdown => 5000,
type => worker,
modules => [Mod]
}.

View File

@ -0,0 +1,183 @@
%%--------------------------------------------------------------------
%% Copyright (c) 2022 EMQ Technologies Co., Ltd. All Rights Reserved.
%%
%% Licensed under the Apache License, Version 2.0 (the "License");
%% you may not use this file except in compliance with the License.
%% You may obtain a copy of the License at
%%
%% http://www.apache.org/licenses/LICENSE-2.0
%%
%% Unless required by applicable law or agreed to in writing, software
%% distributed under the License is distributed on an "AS IS" BASIS,
%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
%% See the License for the specific language governing permissions and
%% limitations under the License.
%%--------------------------------------------------------------------
-module(emqx_node_rebalance_SUITE).
-compile(export_all).
-compile(nowarn_export_all).
-include_lib("emqx/include/emqx.hrl").
-include_lib("emqx/include/emqx_mqtt.hrl").
-include_lib("eunit/include/eunit.hrl").
-include_lib("common_test/include/ct.hrl").
-include_lib("snabbkaffe/include/snabbkaffe.hrl").
-import(emqx_eviction_agent_test_helpers,
[emqtt_connect_many/1]).
all() ->
emqx_ct:all(?MODULE).
init_per_suite(Config) ->
emqx_ct_helpers:start_apps([emqx_eviction_agent, emqx_node_rebalance]),
Config.
end_per_suite(Config) ->
emqx_ct_helpers:stop_apps([emqx_node_rebalance, emqx_eviction_agent]),
Config.
init_per_testcase(_Case, Config) ->
_ = emqx_node_rebalance:stop(),
Node = emqx_node_helpers:start_slave(
recipient1,
#{start_apps => [emqx, emqx_eviction_agent, emqx_node_rebalance]}),
[{recipient_node, Node} | Config].
end_per_testcase(_Case, Config) ->
_ = emqx_node_helpers:stop_slave(?config(recipient_node, Config)),
_ = emqx_node_rebalance:stop().
t_rebalance(Config) ->
process_flag(trap_exit, true),
RecipientNode = ?config(recipient_node, Config),
Nodes = [node(), RecipientNode],
_Conns = emqtt_connect_many(500),
Opts = #{conn_evict_rate => 10,
sess_evict_rate => 10,
evict_interval => 10,
abs_conn_threshold => 50,
abs_sess_threshold => 50,
rel_conn_threshold => 1.0,
rel_sess_threshold => 1.0,
wait_health_check => 0.01,
wait_takeover => 0.01,
nodes => Nodes
},
?check_trace(
?wait_async_action(
emqx_node_rebalance:start(Opts),
#{?snk_kind := emqx_node_rebalance_evict_sess_over},
10000),
fun({ok, _}, Trace) ->
?assertMatch(
[_ | _],
?of_kind(emqx_node_rebalance_evict_sess_over, Trace))
end),
DonorConnCount = emqx_eviction_agent:connection_count(),
DonorSessCount = emqx_eviction_agent:session_count(),
DonorDSessCount = emqx_eviction_agent:session_count(disconnected),
RecipientConnCount = rpc:call(RecipientNode, emqx_eviction_agent, connection_count, []),
RecipientSessCount = rpc:call(RecipientNode, emqx_eviction_agent, session_count, []),
RecipientDSessCount = rpc:call(RecipientNode, emqx_eviction_agent, session_count, [disconnected]),
ct:pal("Donor: conn=~p, sess=~p, dsess=~p",
[DonorConnCount, DonorSessCount, DonorDSessCount]),
ct:pal("Recipient: conn=~p, sess=~p, dsess=~p",
[RecipientConnCount, RecipientSessCount, RecipientDSessCount]),
?assert(DonorConnCount - 50 =< RecipientConnCount),
?assert(DonorDSessCount - 50 =< RecipientDSessCount).
t_rebalance_node_crash(Config) ->
process_flag(trap_exit, true),
RecipientNode = ?config(recipient_node, Config),
Nodes = [node(), RecipientNode],
_Conns = emqtt_connect_many(50),
Opts = #{conn_evict_rate => 10,
sess_evict_rate => 10,
evict_interval => 10,
abs_conn_threshold => 50,
abs_sess_threshold => 50,
rel_conn_threshold => 1.0,
rel_sess_threshold => 1.0,
wait_health_check => 0.01,
wait_takeover => 0.01,
nodes => Nodes
},
ok = emqx_node_rebalance:start(Opts),
?check_trace(
?wait_async_action(
emqx_node_helpers:stop_slave(?config(recipient_node, Config)),
#{?snk_kind := emqx_node_rebalance_started},
1000),
fun(_Result, _Trace) -> ok end),
?assertEqual(
disabled,
emqx_node_rebalance:status()).
t_no_need_to_rebalance(_Config) ->
process_flag(trap_exit, true),
?assertEqual(
{error, nothing_to_balance},
emqx_node_rebalance:start(#{})),
_Conns = emqtt_connect_many(50),
?assertEqual(
{error, nothing_to_balance},
emqx_node_rebalance:start(#{})).
t_unknown_mesages(Config) ->
process_flag(trap_exit, true),
RecipientNode = ?config(recipient_node, Config),
Nodes = [node(), RecipientNode],
_Conns = emqtt_connect_many(500),
Opts = #{wait_health_check => 100,
abs_conn_threshold => 50,
nodes => Nodes
},
Pid = whereis(emqx_node_rebalance),
Pid ! unknown,
ok = gen_server:cast(Pid, unknown),
?assertEqual(
ignored,
gen_server:call(Pid, unknown)),
ok = emqx_node_rebalance:start(Opts),
Pid ! unknown,
ok = gen_server:cast(Pid, unknown),
?assertEqual(
ignored,
gen_server:call(Pid, unknown)).
t_available_nodes(Config) ->
rpc:call(?config(recipient_node, Config),
emqx_eviction_agent,
enable,
[test_rebalance, undefined]),
?assertEqual(
[node()],
emqx_node_rebalance:available_nodes(
[node(), ?config(recipient_node, Config)])).

View File

@ -0,0 +1,163 @@
%%--------------------------------------------------------------------
%% Copyright (c) 2022 EMQ Technologies Co., Ltd. All Rights Reserved.
%%
%% Licensed under the Apache License, Version 2.0 (the "License");
%% you may not use this file except in compliance with the License.
%% You may obtain a copy of the License at
%%
%% http://www.apache.org/licenses/LICENSE-2.0
%%
%% Unless required by applicable law or agreed to in writing, software
%% distributed under the License is distributed on an "AS IS" BASIS,
%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
%% See the License for the specific language governing permissions and
%% limitations under the License.
%%--------------------------------------------------------------------
-module(emqx_node_rebalance_agent_SUITE).
-compile(export_all).
-compile(nowarn_export_all).
-include_lib("emqx/include/emqx.hrl").
-include_lib("emqx/include/emqx_mqtt.hrl").
-include_lib("eunit/include/eunit.hrl").
-include_lib("common_test/include/ct.hrl").
-include_lib("snabbkaffe/include/snabbkaffe.hrl").
all() ->
emqx_ct:all(?MODULE).
init_per_suite(Config) ->
emqx_ct_helpers:start_apps([emqx_eviction_agent, emqx_node_rebalance]),
Config.
end_per_suite(Config) ->
emqx_ct_helpers:stop_apps([emqx_node_rebalance, emqx_eviction_agent]),
Config.
init_per_testcase(_Case, Config) ->
_ = emqx_node_rebalance_evacuation:stop(),
Node = emqx_node_helpers:start_slave(
evacuate1,
#{start_apps => [emqx, emqx_eviction_agent, emqx_node_rebalance]}),
[{evacuate_node, Node} | Config].
end_per_testcase(_Case, Config) ->
_ = emqx_node_helpers:stop_slave(?config(evacuate_node, Config)),
_ = emqx_node_rebalance_evacuation:stop().
t_enable_disable(_Config) ->
?assertEqual(
disabled,
emqx_node_rebalance_agent:status()),
?assertEqual(
ok,
emqx_node_rebalance_agent:enable(self())),
?assertEqual(
{error, already_enabled},
emqx_node_rebalance_agent:enable(self())),
?assertEqual(
{enabled, self()},
emqx_node_rebalance_agent:status()),
?assertEqual(
{error, invalid_coordinator},
emqx_node_rebalance_agent:disable(spawn_link(fun() -> ok end))),
?assertEqual(
ok,
emqx_node_rebalance_agent:disable(self())),
?assertEqual(
{error, already_disabled},
emqx_node_rebalance_agent:disable(self())),
?assertEqual(
disabled,
emqx_node_rebalance_agent:status()).
t_enable_egent_busy(_Config) ->
ok = emqx_eviction_agent:enable(rebalance_test, undefined),
?assertEqual(
{error, eviction_agent_busy},
emqx_node_rebalance_agent:enable(self())),
ok = emqx_eviction_agent:disable(rebalance_test).
% The following tests verify that emqx_node_rebalance_agent correctly links
% coordinator process with emqx_eviction_agent-s.
t_rebalance_agent_coordinator_fail(Config) ->
process_flag(trap_exit, true),
Node = ?config(evacuate_node, Config),
CoordinatorPid = spawn_link(
fun() ->
receive
done -> ok
end
end),
?assertEqual(
disabled,
rpc:call(Node, emqx_eviction_agent, status, [])),
?assertEqual(
ok,
rpc:call(Node, emqx_node_rebalance_agent, enable, [CoordinatorPid])),
?assertMatch(
{enabled, _},
rpc:call(Node, emqx_eviction_agent, status, [])),
EvictionAgentPid = rpc:call(Node, erlang, whereis, [emqx_eviction_agent]),
true = link(EvictionAgentPid),
true = exit(CoordinatorPid, kill),
receive
{'EXIT', EvictionAgentPid, _} -> true
after
1000 -> ?assert(false, "emqx_eviction_agent did not exit")
end.
t_rebalance_agent_fail(Config) ->
process_flag(trap_exit, true),
Node = ?config(evacuate_node, Config),
CoordinatorPid = spawn_link(
fun() ->
receive
done -> ok
end
end),
?assertEqual(
ok,
rpc:call(Node, emqx_node_rebalance_agent, enable, [CoordinatorPid])),
EvictionAgentPid = rpc:call(Node, erlang, whereis, [emqx_eviction_agent]),
true = exit(EvictionAgentPid, kill),
receive
{'EXIT', CoordinatorPid, _} -> true
after
1000 -> ?assert(false, "emqx_eviction_agent did not exit")
end.
t_unknown_messages(_Config) ->
Pid = whereis(emqx_node_rebalance_agent),
ok = gen_server:cast(Pid, unknown),
Pid ! unknown,
ignored = gen_server:call(Pid, unknown).

View File

@ -0,0 +1,321 @@
%%--------------------------------------------------------------------
%% Copyright (c) 2022 EMQ Technologies Co., Ltd. All Rights Reserved.
%%
%% Licensed under the Apache License, Version 2.0 (the "License");
%% you may not use this file except in compliance with the License.
%% You may obtain a copy of the License at
%%
%% http://www.apache.org/licenses/LICENSE-2.0
%%
%% Unless required by applicable law or agreed to in writing, software
%% distributed under the License is distributed on an "AS IS" BASIS,
%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
%% See the License for the specific language governing permissions and
%% limitations under the License.
%%--------------------------------------------------------------------
-module(emqx_node_rebalance_api_SUITE).
-compile(export_all).
-compile(nowarn_export_all).
-include_lib("eunit/include/eunit.hrl").
-include_lib("common_test/include/ct.hrl").
-import(emqx_mgmt_api_test_helpers,
[request_api/3,
request_api/5,
auth_header_/0,
api_path/1]).
-import(emqx_eviction_agent_test_helpers,
[emqtt_connect_many/1]).
all() ->
emqx_ct:all(?MODULE).
init_per_suite(Config) ->
emqx_ct_helpers:start_apps([emqx_eviction_agent, emqx_node_rebalance, emqx_management]),
Config.
end_per_suite(Config) ->
emqx_ct_helpers:stop_apps([emqx_management, emqx_node_rebalance, emqx_eviction_agent]),
Config.
init_per_testcase(Case, Config)
when Case =:= t_start_evacuation_validation
orelse Case =:= t_start_rebalance_validation
orelse Case =:= t_start_stop_rebalance ->
_ = emqx_node_rebalance:stop(),
_ = emqx_node_rebalance_evacuation:stop(),
Node = emqx_node_helpers:start_slave(
recipient1,
#{start_apps => [emqx, emqx_eviction_agent, emqx_node_rebalance]}),
[{recipient_node, Node} | Config];
init_per_testcase(_Case, Config) ->
_ = emqx_node_rebalance:stop(),
_ = emqx_node_rebalance_evacuation:stop(),
Config.
end_per_testcase(Case, Config)
when Case =:= t_start_evacuation_validation
orelse Case =:= t_start_rebalance_validation
orelse Case =:= t_start_stop_rebalance ->
_ = emqx_node_helpers:stop_slave(?config(recipient_node, Config)),
_ = emqx_node_rebalance:stop(),
_ = emqx_node_rebalance_evacuation:stop();
end_per_testcase(_Case, _Config) ->
_ = emqx_node_rebalance:stop(),
_ = emqx_node_rebalance_evacuation:stop().
t_start_evacuation_validation(Config) ->
BadOpts = [#{conn_evict_rate => <<"conn">>},
#{sess_evict_rate => <<"sess">>},
#{redirect_to => 123},
#{wait_takeover => <<"wait">>},
#{migrate_to => []},
#{migrate_to => <<"migrate_to">>},
#{migrate_to => [<<"bad_node">>]},
#{migrate_to => [<<"bad_node">>, atom_to_binary(node())]},
#{unknown => <<"Value">>}
],
lists:foreach(
fun(Opts) ->
?assertMatch(
{ok, #{}},
api_post(["load_rebalance", atom_to_list(node()), "evacuation", "start"],
Opts)),
?assertMatch(
{ok, #{<<"status">> := <<"disabled">>}},
api_get(["load_rebalance", "status"]))
end,
BadOpts),
?assertMatch(
{ok, #{}},
api_post(["load_rebalance", "bad@node", "evacuation", "start"],
#{})),
?assertMatch(
{ok, #{<<"status">> := <<"disabled">>}},
api_get(["load_rebalance", "status"])),
?assertMatch(
{ok, #{}},
api_post(["load_rebalance", atom_to_list(node()), "evacuation", "start"],
#{conn_evict_rate => 10,
sess_evict_rate => 10,
wait_takeover => 10,
redirect_to => <<"srv">>,
migrate_to => [atom_to_binary(?config(recipient_node, Config))]})),
?assertMatch(
{ok, #{<<"status">> := <<"enabled">>}},
api_get(["load_rebalance", "status"])).
t_start_rebalance_validation(Config) ->
BadOpts = [#{conn_evict_rate => <<"conn">>},
#{sess_evict_rate => <<"sess">>},
#{abs_conn_threshold => <<"act">>},
#{rel_conn_threshold => <<"rct">>},
#{abs_sess_threshold => <<"act">>},
#{rel_sess_threshold => <<"rct">>},
#{wait_takeover => <<"wait">>},
#{wait_health_check => <<"wait">>},
#{nodes => <<"nodes">>},
#{nodes => []},
#{nodes => [<<"bad_node">>]},
#{nodes => [<<"bad_node">>, atom_to_binary(node())]},
#{unknown => <<"Value">>}
],
lists:foreach(
fun(Opts) ->
?assertMatch(
{ok, #{}},
api_post(["load_rebalance", atom_to_list(node()), "start"],
Opts)),
?assertMatch(
{ok, #{<<"status">> := <<"disabled">>}},
api_get(["load_rebalance", "status"]))
end,
BadOpts),
?assertMatch(
{ok, #{}},
api_post(["load_rebalance", "bad@node", "start"],
#{})),
?assertMatch(
{ok, #{<<"status">> := <<"disabled">>}},
api_get(["load_rebalance", "status"])),
_Conns = emqtt_connect_many(50),
?assertMatch(
{ok, #{}},
api_post(["load_rebalance", atom_to_list(node()), "start"],
#{conn_evict_rate => 10,
sess_evict_rate => 10,
wait_takeover => 10,
wait_health_check => 10,
abs_conn_threshold => 10,
rel_conn_threshold => 1.001,
abs_sess_threshold => 10,
rel_sess_threshold => 1.001,
nodes => [atom_to_binary(?config(recipient_node, Config)),
atom_to_binary(node())]})),
?assertMatch(
{ok, #{<<"status">> := <<"enabled">>}},
api_get(["load_rebalance", "status"])).
t_start_stop_evacuation(_Config) ->
?assertMatch(
{ok, #{<<"status">> := <<"disabled">>}},
api_get(["load_rebalance", "status"])),
?assertMatch(
{ok, #{}},
api_post(["load_rebalance", atom_to_list(node()), "evacuation", "start"],
#{conn_evict_rate => 10,
sess_evict_rate => 20})),
?assertMatch(
{ok, #{<<"state">> := _,
<<"process">> := <<"evacuation">>,
<<"connection_eviction_rate">> := 10,
<<"session_eviction_rate">> := 20,
<<"connection_goal">> := 0,
<<"session_goal">> := 0,
<<"stats">> := #{
<<"initial_connected">> := _,
<<"current_connected">> := _,
<<"initial_sessions">> := _,
<<"current_sessions">> := _
}}},
api_get(["load_rebalance", "status"])),
?assertMatch(
{ok, #{<<"rebalances">> := #{},
<<"evacuations">> :=
#{<<"test@127.0.0.1">> := #{<<"state">> := _,
<<"connection_eviction_rate">> := 10,
<<"session_eviction_rate">> := 20,
<<"connection_goal">> := 0,
<<"session_goal">> := 0,
<<"stats">> := #{
<<"initial_connected">> := _,
<<"current_connected">> := _,
<<"initial_sessions">> := _,
<<"current_sessions">> := _
}
}
}}},
api_get(["load_rebalance", "global_status"])),
?assertMatch(
{ok, #{}},
api_post(["load_rebalance", atom_to_list(node()), "evacuation", "stop"],
#{})),
?assertMatch(
{ok, #{<<"status">> := <<"disabled">>}},
api_get(["load_rebalance", "status"])),
?assertMatch(
{ok, #{<<"evacuations">> := #{}, <<"rebalances">> := #{}}},
api_get(["load_rebalance", "global_status"])).
t_start_stop_rebalance(Config) ->
?assertMatch(
{ok, #{<<"status">> := <<"disabled">>}},
api_get(["load_rebalance", "status"])),
_Conns = emqtt_connect_many(100),
?assertMatch(
{ok, #{}},
api_post(["load_rebalance", atom_to_list(node()), "start"],
#{conn_evict_rate => 10,
sess_evict_rate => 20,
abs_conn_threshold => 10})),
?assertMatch(
{ok, #{<<"state">> := _,
<<"process">> := <<"rebalance">>,
<<"coordinator_node">> := _,
<<"connection_eviction_rate">> := 10,
<<"session_eviction_rate">> := 20,
<<"stats">> := #{
<<"initial_connected">> := _,
<<"current_connected">> := _,
<<"initial_sessions">> := _,
<<"current_sessions">> := _
}}},
api_get(["load_rebalance", "status"])),
DonorNode = atom_to_binary(node()),
RecipientNode = atom_to_binary(?config(recipient_node, Config)),
?assertMatch(
{ok, #{<<"evacuations">> := #{},
<<"rebalances">> :=
#{<<"test@127.0.0.1">> := #{<<"state">> := _,
<<"coordinator_node">> := _,
<<"connection_eviction_rate">> := 10,
<<"session_eviction_rate">> := 20,
<<"donors">> := [DonorNode],
<<"recipients">> := [RecipientNode]
}
}}},
api_get(["load_rebalance", "global_status"])),
?assertMatch(
{ok, #{}},
api_post(["load_rebalance", atom_to_list(node()), "stop"],
#{})),
?assertMatch(
{ok, #{<<"status">> := <<"disabled">>}},
api_get(["load_rebalance", "status"])),
?assertMatch(
{ok, #{<<"evacuations">> := #{}, <<"rebalances">> := #{}}},
api_get(["load_rebalance", "global_status"])).
t_availability_check(_Config) ->
?assertMatch(
{ok, #{}},
api_get(["load_rebalance", "availability_check"])),
ok = emqx_node_rebalance_evacuation:start(#{}),
?assertMatch(
{error, {_, 503, _}},
api_get(["load_rebalance", "availability_check"])),
ok = emqx_node_rebalance_evacuation:stop(),
?assertMatch(
{ok, #{}},
api_get(["load_rebalance", "availability_check"])).
api_get(Path) ->
case request_api(get, api_path(Path), auth_header_()) of
{ok, ResponseBody} ->
{ok, jiffy:decode(list_to_binary(ResponseBody), [return_maps])};
{error, _} = Error -> Error
end.
api_post(Path, Data) ->
case request_api(post, api_path(Path), [], auth_header_(), Data) of
{ok, ResponseBody} ->
{ok, jiffy:decode(list_to_binary(ResponseBody), [return_maps])};
{error, _} = Error -> Error
end.

View File

@ -0,0 +1,199 @@
%%--------------------------------------------------------------------
%% Copyright (c) 2022 EMQ Technologies Co., Ltd. All Rights Reserved.
%%
%% Licensed under the Apache License, Version 2.0 (the "License");
%% you may not use this file except in compliance with the License.
%% You may obtain a copy of the License at
%%
%% http://www.apache.org/licenses/LICENSE-2.0
%%
%% Unless required by applicable law or agreed to in writing, software
%% distributed under the License is distributed on an "AS IS" BASIS,
%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
%% See the License for the specific language governing permissions and
%% limitations under the License.
%%--------------------------------------------------------------------
-module(emqx_node_rebalance_cli_SUITE).
-compile(export_all).
-compile(nowarn_export_all).
-include_lib("eunit/include/eunit.hrl").
-include_lib("common_test/include/ct.hrl").
-import(emqx_eviction_agent_test_helpers,
[emqtt_connect_many/1]).
all() ->
emqx_ct:all(?MODULE).
init_per_suite(Config) ->
emqx_ct_helpers:start_apps([emqx_eviction_agent, emqx_node_rebalance]),
Config.
end_per_suite(Config) ->
emqx_ct_helpers:stop_apps([emqx_node_rebalance, emqx_eviction_agent]),
Config.
init_per_testcase(t_rebalance, Config) ->
_ = emqx_node_rebalance_evacuation:stop(),
Node = emqx_node_helpers:start_slave(
evacuate1,
#{start_apps => [emqx, emqx_eviction_agent, emqx_node_rebalance]}),
[{evacuate_node, Node} | Config];
init_per_testcase(_Case, Config) ->
_ = emqx_node_rebalance_evacuation:stop(),
_ = emqx_node_rebalance:stop(),
Config.
end_per_testcase(t_rebalance, Config) ->
_ = emqx_node_rebalance_evacuation:stop(),
_ = emqx_node_rebalance:stop(),
_ = emqx_node_helpers:stop_slave(?config(evacuate_node, Config));
end_per_testcase(_Case, _Config) ->
_ = emqx_node_rebalance_evacuation:stop(),
_ = emqx_node_rebalance:stop().
t_evacuation(_Config) ->
%% usage
ok = emqx_node_rebalance_cli:cli(["foobar"]),
%% status
ok = emqx_node_rebalance_cli:cli(["status"]),
ok = emqx_node_rebalance_cli:cli(["node-status"]),
ok = emqx_node_rebalance_cli:cli(["node-status", atom_to_list(node())]),
%% start with invalid args
?assertNot(
emqx_node_rebalance_cli:cli(["start", "--evacuation", "--foo-bar"])),
?assertNot(
emqx_node_rebalance_cli:cli(["start", "--evacuation", "--conn-evict-rate", "foobar"])),
?assertNot(
emqx_node_rebalance_cli:cli(["start", "--evacuation", "--sess-evict-rate", "foobar"])),
?assertNot(
emqx_node_rebalance_cli:cli(["start", "--evacuation", "--wait-takeover", "foobar"])),
?assertNot(
emqx_node_rebalance_cli:cli(["start", "--evacuation",
"--migrate-to", "nonexistent@node"])),
?assertNot(
emqx_node_rebalance_cli:cli(["start", "--evacuation",
"--migrate-to", ""])),
?assertNot(
emqx_node_rebalance_cli:cli(["start", "--evacuation",
"--unknown-arg"])),
?assert(
emqx_node_rebalance_cli:cli(["start", "--evacuation",
"--conn-evict-rate", "10",
"--sess-evict-rate", "10",
"--wait-takeover", "10",
"--migrate-to", atom_to_list(node()),
"--redirect-to", "srv"])),
%% status
ok = emqx_node_rebalance_cli:cli(["status"]),
ok = emqx_node_rebalance_cli:cli(["node-status"]),
ok = emqx_node_rebalance_cli:cli(["node-status", atom_to_list(node())]),
?assertMatch(
{enabled, #{}},
emqx_node_rebalance_evacuation:status()),
%% already enabled
?assertNot(
emqx_node_rebalance_cli:cli(["start", "--evacuation",
"--conn-evict-rate", "10",
"--redirect-to", "srv"])),
%% stop
true = emqx_node_rebalance_cli:cli(["stop"]),
false = emqx_node_rebalance_cli:cli(["stop"]),
?assertEqual(
disabled,
emqx_node_rebalance_evacuation:status()).
t_rebalance(Config) ->
%% start with invalid args
?assertNot(
emqx_node_rebalance_cli:cli(["start", "--foo-bar"])),
?assertNot(
emqx_node_rebalance_cli:cli(["start", "--conn-evict-rate", "foobar"])),
?assertNot(
emqx_node_rebalance_cli:cli(["start", "--abs-conn-threshold", "foobar"])),
?assertNot(
emqx_node_rebalance_cli:cli(["start", "--rel-conn-threshold", "foobar"])),
?assertNot(
emqx_node_rebalance_cli:cli(["start", "--sess-evict-rate", "foobar"])),
?assertNot(
emqx_node_rebalance_cli:cli(["start", "--abs-sess-threshold", "foobar"])),
?assertNot(
emqx_node_rebalance_cli:cli(["start", "--rel-sess-threshold", "foobar"])),
?assertNot(
emqx_node_rebalance_cli:cli(["start", "--wait-takeover", "foobar"])),
?assertNot(
emqx_node_rebalance_cli:cli(["start", "--wait-health-check", "foobar"])),
?assertNot(
emqx_node_rebalance_cli:cli(["start",
"--nodes", "nonexistent@node"])),
?assertNot(
emqx_node_rebalance_cli:cli(["start",
"--nodes", ""])),
?assertNot(
emqx_node_rebalance_cli:cli(["start",
"--nodes", atom_to_list(?config(evacuate_node, Config))])),
?assertNot(
emqx_node_rebalance_cli:cli(["start",
"--unknown-arg"])),
_ = emqtt_connect_many(20),
?assert(
emqx_node_rebalance_cli:cli(["start",
"--conn-evict-rate", "10",
"--abs-conn-threshold", "10",
"--rel-conn-threshold", "1.1",
"--sess-evict-rate", "10",
"--abs-sess-threshold", "10",
"--rel-sess-threshold", "1.1",
"--wait-takeover", "10",
"--nodes", atom_to_list(node()) ++ ","
++ atom_to_list(?config(evacuate_node, Config))
])),
%% status
ok = emqx_node_rebalance_cli:cli(["status"]),
ok = emqx_node_rebalance_cli:cli(["node-status"]),
ok = emqx_node_rebalance_cli:cli(["node-status", atom_to_list(node())]),
?assertMatch(
{enabled, #{}},
emqx_node_rebalance:status()),
%% already enabled
?assertNot(
emqx_node_rebalance_cli:cli(["start"])),
%% stop
true = emqx_node_rebalance_cli:cli(["stop"]),
false = emqx_node_rebalance_cli:cli(["stop"]),
?assertEqual(
disabled,
emqx_node_rebalance:status()).

View File

@ -0,0 +1,194 @@
%%--------------------------------------------------------------------
%% Copyright (c) 2022 EMQ Technologies Co., Ltd. All Rights Reserved.
%%
%% Licensed under the Apache License, Version 2.0 (the "License");
%% you may not use this file except in compliance with the License.
%% You may obtain a copy of the License at
%%
%% http://www.apache.org/licenses/LICENSE-2.0
%%
%% Unless required by applicable law or agreed to in writing, software
%% distributed under the License is distributed on an "AS IS" BASIS,
%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
%% See the License for the specific language governing permissions and
%% limitations under the License.
%%--------------------------------------------------------------------
-module(emqx_node_rebalance_evacuation_SUITE).
-compile(export_all).
-compile(nowarn_export_all).
-include_lib("emqx/include/emqx_mqtt.hrl").
-include_lib("eunit/include/eunit.hrl").
-include_lib("common_test/include/ct.hrl").
-include_lib("snabbkaffe/include/snabbkaffe.hrl").
-import(emqx_eviction_agent_test_helpers,
[emqtt_connect/0, emqtt_connect/2, emqtt_try_connect/0]).
all() ->
emqx_ct:all(?MODULE).
init_per_suite(Config) ->
emqx_ct_helpers:start_apps([emqx_eviction_agent, emqx_node_rebalance]),
Config.
end_per_suite(Config) ->
emqx_ct_helpers:stop_apps([emqx_node_rebalance, emqx_eviction_agent]),
Config.
init_per_testcase(_Case, Config) ->
_ = emqx_node_rebalance_evacuation:stop(),
Node = emqx_node_helpers:start_slave(
evacuate1,
#{start_apps => [emqx, emqx_eviction_agent]}),
[{evacuate_node, Node} | Config].
end_per_testcase(_Case, Config) ->
_ = emqx_node_helpers:stop_slave(?config(evacuate_node, Config)),
_ = emqx_node_rebalance_evacuation:stop().
t_agent_busy(Config) ->
ok = emqx_eviction_agent:enable(other_rebalance, undefined),
?assertEqual(
{error, eviction_agent_busy},
emqx_node_rebalance_evacuation:start(opts(Config))),
emqx_eviction_agent:disable(other_rebalance).
t_already_started(Config) ->
ok = emqx_node_rebalance_evacuation:start(opts(Config)),
?assertEqual(
{error, already_started},
emqx_node_rebalance_evacuation:start(opts(Config))),
ok = emqx_node_rebalance_evacuation:stop().
t_not_started(_Config) ->
?assertEqual(
{error, not_started},
emqx_node_rebalance_evacuation:stop()).
t_start(Config) ->
process_flag(trap_exit, true),
ok = emqx_node_rebalance_evacuation:start(opts(Config)),
?assertMatch(
{error, {use_another_server, #{}}},
emqtt_try_connect()),
ok = emqx_node_rebalance_evacuation:stop().
t_persistence(Config) ->
process_flag(trap_exit, true),
ok = emqx_node_rebalance_evacuation:start(opts(Config)),
?assertMatch(
{error, {use_another_server, #{}}},
emqtt_try_connect()),
ok = supervisor:terminate_child(emqx_node_rebalance_sup, emqx_node_rebalance_evacuation),
{ok, _} = supervisor:restart_child(emqx_node_rebalance_sup, emqx_node_rebalance_evacuation),
?assertMatch(
{error, {use_another_server, #{}}},
emqtt_try_connect()),
?assertMatch(
{enabled, #{conn_evict_rate := 10}},
emqx_node_rebalance_evacuation:status()),
ok = emqx_node_rebalance_evacuation:stop().
t_conn_evicted(Config) ->
process_flag(trap_exit, true),
{ok, C} = emqtt_connect(),
?check_trace(
?wait_async_action(
emqx_node_rebalance_evacuation:start(opts(Config)),
#{?snk_kind := node_evacuation_evict_conn},
1000),
fun(_Result, _Trace) -> ok end),
ct:sleep(100),
?assertMatch(
{error, {use_another_server, #{}}},
emqtt_try_connect()),
?assertNot(
is_process_alive(C)).
t_migrate_to(Config) ->
?assertEqual(
[?config(evacuate_node, Config)],
emqx_node_rebalance_evacuation:migrate_to(undefined)),
?assertEqual(
[],
emqx_node_rebalance_evacuation:migrate_to(['unknown@node'])),
rpc:call(?config(evacuate_node, Config),
emqx_eviction_agent,
enable,
[test_rebalance, undefined]),
?assertEqual(
[],
emqx_node_rebalance_evacuation:migrate_to(undefined)).
t_session_evicted(Config) ->
process_flag(trap_exit, true),
{ok, C} = emqtt_connect(<<"client_with_sess">>, false),
?check_trace(
?wait_async_action(
emqx_node_rebalance_evacuation:start(opts(Config)),
#{?snk_kind := node_evacuation_evict_sess_over},
5000),
fun(_Result, Trace) ->
?assertMatch(
[_ | _],
?of_kind(node_evacuation_evict_sess_over, Trace))
end),
receive
{'EXIT', C, {disconnected, ?RC_USE_ANOTHER_SERVER, _}} -> ok
after 1000 ->
?assert(false, "Connection not evicted")
end,
[ChannelPid] = emqx_cm_registry:lookup_channels(<<"client_with_sess">>),
?assertEqual(
?config(evacuate_node, Config),
node(ChannelPid)).
t_unknown_messages(Config) ->
ok = emqx_node_rebalance_evacuation:start(opts(Config)),
whereis(emqx_node_rebalance_evacuation) ! unknown,
gen_server:cast(emqx_node_rebalance_evacuation, unknown),
?assertEqual(
ignored,
gen_server:call(emqx_node_rebalance_evacuation, unknown)),
ok = emqx_node_rebalance_evacuation:stop().
opts(Config) ->
#{
server_reference => <<"srv">>,
conn_evict_rate => 10,
sess_evict_rate => 10,
wait_takeover => 1,
migrate_to => [?config(evacuate_node, Config)]
}.

View File

@ -0,0 +1,107 @@
%%--------------------------------------------------------------------
%% Copyright (c) 2022 EMQ Technologies Co., Ltd. All Rights Reserved.
%%
%% Licensed under the Apache License, Version 2.0 (the "License");
%% you may not use this file except in compliance with the License.
%% You may obtain a copy of the License at
%%
%% http://www.apache.org/licenses/LICENSE-2.0
%%
%% Unless required by applicable law or agreed to in writing, software
%% distributed under the License is distributed on an "AS IS" BASIS,
%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
%% See the License for the specific language governing permissions and
%% limitations under the License.
%%--------------------------------------------------------------------
-module(emqx_node_rebalance_evacuation_persist_SUITE).
-compile(export_all).
-compile(nowarn_export_all).
-include_lib("eunit/include/eunit.hrl").
-include_lib("common_test/include/ct.hrl").
all() ->
emqx_ct:all(?MODULE).
init_per_suite(Config) ->
emqx_ct_helpers:start_apps([emqx_eviction_agent, emqx_node_rebalance]),
Config.
end_per_suite(Config) ->
emqx_ct_helpers:stop_apps([emqx_node_rebalance, emqx_eviction_agent]),
Config.
init_per_testcase(_Case, Config) ->
_ = emqx_node_rebalance_evacuation_persist:clear(),
Config.
end_per_testcase(_Case, _Config) ->
_ = emqx_node_rebalance_evacuation_persist:clear().
t_save_read(_Config) ->
DefaultOpts = #{server_reference => <<"default_ref">>,
conn_evict_rate => 2001,
sess_evict_rate => 2002,
wait_takeover => 2003
},
Opts0 = #{server_reference => <<"ref">>,
conn_evict_rate => 1001,
sess_evict_rate => 1002,
wait_takeover => 1003
},
ok = emqx_node_rebalance_evacuation_persist:save(Opts0),
{ok, ReadOpts0} = emqx_node_rebalance_evacuation_persist:read(DefaultOpts),
?assertEqual(Opts0, ReadOpts0),
Opts1 = Opts0#{server_reference => undefined},
ok = emqx_node_rebalance_evacuation_persist:save(Opts1),
{ok, ReadOpts1} = emqx_node_rebalance_evacuation_persist:read(DefaultOpts),
?assertEqual(Opts1, ReadOpts1).
t_read_default(_Config) ->
ok = write_evacuation_file(<<"{}">>),
DefaultOpts = #{server_reference => <<"ref">>,
conn_evict_rate => 1001,
sess_evict_rate => 1002,
wait_takeover => 1003
},
{ok, ReadOpts} = emqx_node_rebalance_evacuation_persist:read(DefaultOpts),
?assertEqual(DefaultOpts, ReadOpts).
t_read_bad_data(_Config) ->
ok = write_evacuation_file(<<"{bad json">>),
DefaultOpts = #{server_reference => <<"ref">>,
conn_evict_rate => 1001,
sess_evict_rate => 1002,
wait_takeover => 1003
},
{ok, ReadOpts} = emqx_node_rebalance_evacuation_persist:read(DefaultOpts),
?assertEqual(DefaultOpts, ReadOpts).
t_clear(_Config) ->
ok = write_evacuation_file(<<"{}">>),
?assertMatch(
{ok, _},
emqx_node_rebalance_evacuation_persist:read(#{})),
ok = emqx_node_rebalance_evacuation_persist:clear(),
?assertEqual(
none,
emqx_node_rebalance_evacuation_persist:read(#{})).
write_evacuation_file(Json) ->
ok = filelib:ensure_dir(emqx_node_rebalance_evacuation_persist:evacuation_filepath()),
ok = file:write_file(
emqx_node_rebalance_evacuation_persist:evacuation_filepath(),
Json).

View File

@ -1,6 +1,6 @@
{application, emqx_rule_engine,
[{description, "EMQ X Rule Engine"},
{vsn, "4.3.13"}, % strict semver, bump manually!
{vsn, "4.3.14"}, % strict semver, bump manually!
{modules, []},
{registered, [emqx_rule_engine_sup, emqx_rule_registry]},
{applications, [kernel,stdlib,rulesql,getopt]},

View File

@ -1,7 +1,8 @@
%% -*- mode: erlang -*-
%% Unless you know what you are doing, DO NOT edit manually!!
{VSN,
[{"4.3.12",[{load_module,emqx_rule_registry,brutal_purge,soft_purge,[]}]},
[{"4.3.13",[{load_module,emqx_rule_registry,brutal_purge,soft_purge,[]}]},
{"4.3.12",[{load_module,emqx_rule_registry,brutal_purge,soft_purge,[]}]},
{"4.3.11",
[{load_module,emqx_rule_registry,brutal_purge,soft_purge,[]},
{load_module,emqx_rule_validator,brutal_purge,soft_purge,[]},
@ -165,7 +166,8 @@
{load_module,emqx_rule_runtime,brutal_purge,soft_purge,[]},
{load_module,emqx_rule_engine_api,brutal_purge,soft_purge,[]}]},
{<<".*">>,[]}],
[{"4.3.12",[{load_module,emqx_rule_registry,brutal_purge,soft_purge,[]}]},
[{"4.3.13",[{load_module,emqx_rule_registry,brutal_purge,soft_purge,[]}]},
{"4.3.12",[{load_module,emqx_rule_registry,brutal_purge,soft_purge,[]}]},
{"4.3.11",
[{load_module,emqx_rule_registry,brutal_purge,soft_purge,[]},
{load_module,emqx_rule_validator,brutal_purge,soft_purge,[]},

View File

@ -6,3 +6,5 @@
{emqx_telemetry, {{enable_plugin_emqx_telemetry}}}.
{emqx_rule_engine, {{enable_plugin_emqx_rule_engine}}}.
{emqx_bridge_mqtt, {{enable_plugin_emqx_bridge_mqtt}}}.
{emqx_eviction_agent, true}.
{emqx_node_rebalance, true}.

View File

@ -1,6 +1,6 @@
{application, emqx_dashboard,
[{description, "EMQ X Web Dashboard"},
{vsn, "4.3.14"}, % strict semver, bump manually!
{vsn, "4.3.15"}, % strict semver, bump manually!
{modules, []},
{registered, [emqx_dashboard_sup]},
{applications, [kernel,stdlib,mnesia,minirest]},

View File

@ -51,6 +51,8 @@ overrides() ->
[ {add, [ {extra_src_dirs, [{"etc", [{recursive,true}]}]}
, {erl_opts, [{compile_info, [{emqx_vsn, get_vsn()}]}]}
]}
, {add, relx, [{erl_opts, [{d, 'RLX_LOG', rlx_log}]}]}
, {add, snabbkaffe,
[{erl_opts, common_compile_opts()}]}
] ++ community_plugin_overrides().
@ -295,6 +297,8 @@ relx_plugin_apps(ReleaseType) ->
, emqx_recon
, emqx_rule_engine
, emqx_sasl
, emqx_eviction_agent
, emqx_node_rebalance
]
++ [emqx_telemetry || not is_enterprise()]
++ relx_plugin_apps_per_rel(ReleaseType)

View File

@ -6,7 +6,7 @@
%% the emqx `release' version, which in turn is comprised of several
%% apps, one of which is this. See `emqx_release.hrl' for more
%% info.
{vsn, "4.3.19"}, % strict semver, bump manually!
{vsn, "4.3.20"}, % strict semver, bump manually!
{modules, []},
{registered, []},
{applications, [ kernel

View File

@ -1,12 +1,19 @@
%% -*- mode: erlang -*-
%% Unless you know what you are doing, DO NOT edit manually!!
{VSN,
[
[{"4.3.19",
[{load_module,emqx_plugins,brutal_purge,soft_purge,[]},
{load_module,emqx_channel,brutal_purge,soft_purge,[]},
{load_module,emqx_cm,brutal_purge,soft_purge,[]}]},
{"4.3.18",
[{load_module,emqx_app,brutal_purge,soft_purge,[]},
[{load_module,emqx_channel,brutal_purge,soft_purge,[]},
{load_module,emqx_cm,brutal_purge,soft_purge,[]},
{load_module,emqx_app,brutal_purge,soft_purge,[]},
{load_module,emqx_plugins,brutal_purge,soft_purge,[]}]},
{"4.3.17",
[{load_module,emqx_exclusive_subscription,brutal_purge,soft_purge,[]},
[{load_module,emqx_channel,brutal_purge,soft_purge,[]},
{load_module,emqx_cm,brutal_purge,soft_purge,[]},
{load_module,emqx_exclusive_subscription,brutal_purge,soft_purge,[]},
{load_module,emqx_session,brutal_purge,soft_purge,[]},
{load_module,emqx_shared_sub,brutal_purge,soft_purge,[]},
{update,emqx_broker_sup,supervisor},
@ -14,7 +21,8 @@
{load_module,emqx_plugins,brutal_purge,soft_purge,[]},
{load_module,emqx_access_control,brutal_purge,soft_purge,[]}]},
{"4.3.16",
[{load_module,emqx_session,brutal_purge,soft_purge,[]},
[{load_module,emqx_cm,brutal_purge,soft_purge,[]},
{load_module,emqx_session,brutal_purge,soft_purge,[]},
{load_module,emqx_shared_sub,brutal_purge,soft_purge,[]},
{update,emqx_broker_sup,supervisor},
{load_module,emqx_access_control,brutal_purge,soft_purge,[]},
@ -30,7 +38,8 @@
{load_module,emqx_mqtt_caps,brutal_purge,soft_purge,[]},
{load_module,emqx_topic,brutal_purge,soft_purge,[]}]},
{"4.3.15",
[{add_module,emqx_calendar},
[{load_module,emqx_cm,brutal_purge,soft_purge,[]},
{add_module,emqx_calendar},
{load_module,emqx_topic,brutal_purge,soft_purge,[]},
{add_module,emqx_exclusive_subscription},
{apply,{emqx_exclusive_subscription,on_add_module,[]}},
@ -55,7 +64,8 @@
{update,emqx_os_mon,{advanced,[]}},
{load_module,emqx_app,brutal_purge,soft_purge,[]}]},
{"4.3.14",
[{add_module,emqx_calendar},
[{load_module,emqx_cm,brutal_purge,soft_purge,[]},
{add_module,emqx_calendar},
{load_module,emqx_topic,brutal_purge,soft_purge,[]},
{add_module,emqx_exclusive_subscription},
{apply,{emqx_exclusive_subscription,on_add_module,[]}},
@ -682,12 +692,19 @@
{load_module,emqx_message,brutal_purge,soft_purge,[]},
{load_module,emqx_limiter,brutal_purge,soft_purge,[]}]},
{<<".*">>,[]}],
[
[{"4.3.19",
[{load_module,emqx_plugins,brutal_purge,soft_purge,[]},
{load_module,emqx_channel,brutal_purge,soft_purge,[]},
{load_module,emqx_cm,brutal_purge,soft_purge,[]}]},
{"4.3.18",
[{load_module,emqx_app,brutal_purge,soft_purge,[]},
{load_module,emqx_plugins,brutal_purge,soft_purge,[]}]},
[{load_module,emqx_channel,brutal_purge,soft_purge,[]},
{load_module,emqx_cm,brutal_purge,soft_purge,[]},
{load_module,emqx_app,brutal_purge,soft_purge,[]},
{load_module,emqx_plugins,brutal_purge,soft_purge,[]}]},
{"4.3.17",
[{load_module,emqx_exclusive_subscription,brutal_purge,soft_purge,[]},
[{load_module,emqx_channel,brutal_purge,soft_purge,[]},
{load_module,emqx_cm,brutal_purge,soft_purge,[]},
{load_module,emqx_exclusive_subscription,brutal_purge,soft_purge,[]},
{load_module,emqx_session,brutal_purge,soft_purge,[]},
{load_module,emqx_shared_sub,brutal_purge,soft_purge,[]},
{update,emqx_broker_sup,supervisor},
@ -695,7 +712,8 @@
{load_module,emqx_plugins,brutal_purge,soft_purge,[]},
{load_module,emqx_access_control,brutal_purge,soft_purge,[]}]},
{"4.3.16",
[{load_module,emqx_session,brutal_purge,soft_purge,[]},
[{load_module,emqx_cm,brutal_purge,soft_purge,[]},
{load_module,emqx_session,brutal_purge,soft_purge,[]},
{load_module,emqx_shared_sub,brutal_purge,soft_purge,[]},
{update,emqx_broker_sup,supervisor},
{load_module,emqx_access_control,brutal_purge,soft_purge,[]},
@ -711,7 +729,8 @@
{apply,{emqx_exclusive_subscription,on_delete_module,[]}},
{delete_module,emqx_exclusive_subscription}]},
{"4.3.15",
[{delete_module,emqx_calendar},
[{load_module,emqx_cm,brutal_purge,soft_purge,[]},
{delete_module,emqx_calendar},
{apply,{emqx_exclusive_subscription,on_delete_module,[]}},
{delete_module,emqx_exclusive_subscription},
{load_module,emqx_topic,brutal_purge,soft_purge,[]},
@ -735,7 +754,8 @@
{load_module,emqx_os_mon,brutal_purge,soft_purge,[]},
{load_module,emqx_app,brutal_purge,soft_purge,[]}]},
{"4.3.14",
[{delete_module,emqx_calendar},
[{load_module,emqx_cm,brutal_purge,soft_purge,[]},
{delete_module,emqx_calendar},
{apply,{emqx_exclusive_subscription,on_delete_module,[]}},
{delete_module,emqx_exclusive_subscription},
{load_module,emqx_topic,brutal_purge,soft_purge,[]},

View File

@ -22,6 +22,8 @@
-include("logger.hrl").
-include("types.hrl").
-include_lib("snabbkaffe/include/snabbkaffe.hrl").
-logger_header("[Channel]").
-ifdef(TEST).
@ -850,11 +852,14 @@ handle_out(disconnect, ReasonCode, Channel) when is_integer(ReasonCode) ->
ReasonName = disconnect_reason(ReasonCode),
handle_out(disconnect, {ReasonCode, ReasonName}, Channel);
handle_out(disconnect, {ReasonCode, ReasonName}, Channel = ?IS_MQTT_V5) ->
Packet = ?DISCONNECT_PACKET(ReasonCode),
handle_out(disconnect, {ReasonCode, ReasonName}, Channel) ->
handle_out(disconnect, {ReasonCode, ReasonName, #{}}, Channel);
handle_out(disconnect, {ReasonCode, ReasonName, Props}, Channel = ?IS_MQTT_V5) ->
Packet = ?DISCONNECT_PACKET(ReasonCode, Props),
{ok, [{outgoing, Packet}, {close, ReasonName}], Channel};
handle_out(disconnect, {_ReasonCode, ReasonName}, Channel) ->
handle_out(disconnect, {_ReasonCode, ReasonName, _Props}, Channel) ->
{ok, {close, ReasonName}, Channel};
handle_out(auth, {ReasonCode, Properties}, Channel) ->
@ -954,11 +959,15 @@ handle_call({takeover, 'begin'}, Channel = #channel{session = Session}) ->
reply(Session, Channel#channel{takeover = true});
handle_call({takeover, 'end'}, Channel = #channel{session = Session,
pendings = Pendings}) ->
pendings = Pendings,
conninfo = #{clientid := ClientId}}) ->
ok = emqx_session:takeover(Session),
%% TODO: Should not drain deliver here (side effect)
Delivers = emqx_misc:drain_deliver(),
AllPendings = lists:append(Delivers, Pendings),
?tp(debug,
emqx_channel_takeover_end,
#{clientid => ClientId}),
disconnect_and_shutdown(takeovered, AllPendings, Channel);
handle_call(list_acl_cache, Channel) ->
@ -1019,6 +1028,9 @@ handle_info(clean_acl_cache, Channel) ->
ok = emqx_acl_cache:empty_acl_cache(),
{ok, Channel};
handle_info({disconnect, ReasonCode, ReasonName, Props}, Channel) ->
handle_out(disconnect, {ReasonCode, ReasonName, Props}, Channel);
handle_info(Info, Channel) ->
?LOG(error, "Unexpected info: ~p", [Info]),
{ok, Channel}.

View File

@ -22,6 +22,8 @@
-include("emqx.hrl").
-include("logger.hrl").
-include("types.hrl").
-include_lib("stdlib/include/qlc.hrl").
-include_lib("stdlib/include/ms_transform.hrl").
-include_lib("snabbkaffe/include/snabbkaffe.hrl").
-logger_header("[CM]").
@ -60,7 +62,9 @@
, lookup_channels/2
]).
-export([all_channels/0]).
-export([all_channels/0,
channel_with_session_table/0,
live_connection_table/0]).
%% gen_server callbacks
-export([ init/1
@ -149,8 +153,11 @@ connection_closed(ClientId) ->
connection_closed(ClientId, self()).
-spec(connection_closed(emqx_types:clientid(), chan_pid()) -> true).
connection_closed(ClientId, ChanPid) ->
ets:delete_object(?CHAN_CONN_TAB, {ClientId, ChanPid}).
connection_closed(_ClientId, _ChanPid) ->
%% We can't clean CHAN_CONN_TAB because records for dead connections
%% are required for `get_chann_conn_mod/1` function, and `get_chann_conn_mod/1`
%% is used for takeover.
true.
%% @doc Get info of a channel.
-spec(get_chan_info(emqx_types:clientid()) -> maybe(emqx_types:infos())).
@ -425,6 +432,38 @@ all_channels() ->
Pat = [{{'_', '$1'}, [], ['$1']}],
ets:select(?CHAN_TAB, Pat).
%% @doc Get clientinfo for all clients with sessions
channel_with_session_table() ->
Ms = ets:fun2ms(
fun({{ClientId, _ChanPid},
Info,
_Stats}) ->
{ClientId, Info}
end),
Table = ets:table(?CHAN_INFO_TAB, [{traverse, {select, Ms}}]),
qlc:q([ {ClientId, ConnState, ConnInfo, ClientInfo}
|| {ClientId,
#{conn_state := ConnState,
clientinfo := ClientInfo,
conninfo := #{clean_start := false} = ConnInfo}} <- Table
]).
%% @doc Get all local connection query handle
live_connection_table() ->
Ms = ets:fun2ms(
fun({{ClientId, ChanPid}, _}) ->
{ClientId, ChanPid}
end),
Table = ets:table(?CHAN_CONN_TAB, [{traverse, {select, Ms}}]),
qlc:q([{ClientId, ChanPid} || {ClientId, ChanPid} <- Table, is_channel_connected(ClientId, ChanPid)]).
is_channel_connected(ClientId, ChanPid) when node(ChanPid) =:= node() ->
case get_chan_info(ClientId, ChanPid) of
#{conn_state := disconnected} -> false;
_ -> true
end;
is_channel_connected(_ClientId, _ChanPid) -> false.
%% @doc Lookup channels.
-spec(lookup_channels(emqx_types:clientid()) -> list(chan_pid())).
lookup_channels(ClientId) ->
@ -523,4 +562,3 @@ get_chann_conn_mod(ClientId, ChanPid) when node(ChanPid) == node() ->
end;
get_chann_conn_mod(ClientId, ChanPid) ->
rpc_call(node(ChanPid), get_chann_conn_mod, [ClientId, ChanPid], ?T_GET_INFO).

View File

@ -225,6 +225,8 @@ ensure_file(File) ->
, {emqx_telemetry, true}
, {emqx_rule_engine, true}
, {emqx_bridge_mqtt, false}
, {emqx_eviction_agent, true}
, {emqx_node_rebalance, true}
],
write_loaded(DefaultPlugins);
true ->

View File

@ -0,0 +1,83 @@
%%--------------------------------------------------------------------
%% Copyright (c) 2022 EMQ Technologies Co., Ltd. All Rights Reserved.
%%
%% Licensed under the Apache License, Version 2.0 (the "License");
%% you may not use this file except in compliance with the License.
%% You may obtain a copy of the License at
%%
%% http://www.apache.org/licenses/LICENSE-2.0
%%
%% Unless required by applicable law or agreed to in writing, software
%% distributed under the License is distributed on an "AS IS" BASIS,
%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
%% See the License for the specific language governing permissions and
%% limitations under the License.
%%--------------------------------------------------------------------
-module(emqx_node_helpers).
-include_lib("eunit/include/eunit.hrl").
-define(SLAVE_START_APPS, [emqx]).
-export([start_slave/1,
start_slave/2,
stop_slave/1]).
start_slave(Name) ->
start_slave(Name, #{}).
start_slave(Name, Opts) ->
{ok, Node} = ct_slave:start(list_to_atom(atom_to_list(Name) ++ "@" ++ host()),
[{kill_if_fail, true},
{monitor_master, true},
{init_timeout, 10000},
{startup_timeout, 10000},
{erl_flags, ebin_path()}]),
pong = net_adm:ping(Node),
setup_node(Node, Opts),
Node.
stop_slave(Node) ->
rpc:call(Node, ekka, leave, []),
ct_slave:stop(Node).
host() ->
[_, Host] = string:tokens(atom_to_list(node()), "@"), Host.
ebin_path() ->
string:join(["-pa" | lists:filter(fun is_lib/1, code:get_path())], " ").
is_lib(Path) ->
string:prefix(Path, code:lib_dir()) =:= nomatch.
setup_node(Node, #{} = Opts) ->
Listeners = maps:get(listeners, Opts, []),
StartApps = maps:get(start_apps, Opts, ?SLAVE_START_APPS),
DefaultEnvHandler =
fun(emqx) ->
application:set_env(
emqx,
listeners,
Listeners),
application:set_env(gen_rpc, port_discovery, stateless),
ok;
(_) ->
ok
end,
EnvHandler = maps:get(env_handler, Opts, DefaultEnvHandler),
[ok = rpc:call(Node, application, load, [App]) || App <- [gen_rpc, emqx]],
ok = rpc:call(Node, emqx_ct_helpers, start_apps, [StartApps, EnvHandler]),
rpc:call(Node, ekka, join, [node()]),
%% Sanity check. Assert that `gen_rpc' is set up correctly:
?assertEqual( Node
, gen_rpc:call(Node, erlang, node, [])
),
?assertEqual( node()
, gen_rpc:call(Node, gen_rpc, call, [node(), erlang, node, []])
),
ok.

View File

@ -99,8 +99,10 @@ t_ensure_default_loaded_plugins_file(Config) ->
?assertEqual(
[ {emqx_bridge_mqtt, false}
, {emqx_dashboard, true}
, {emqx_eviction_agent, true}
, {emqx_management, true}
, {emqx_modules, true}
, {emqx_node_rebalance, true}
, {emqx_recon, true}
, {emqx_retainer, true}
, {emqx_rule_engine, true}

View File

@ -380,7 +380,7 @@ t_local(_) ->
emqtt:stop(ConnPid1),
emqtt:stop(ConnPid2),
stop_slave(Node),
emqx_node_helpers:stop_slave(Node),
?assertEqual(local, emqx_shared_sub:strategy(<<"local_group">>)),
?assertEqual(local, RemoteLocalGroupStrategy),
@ -415,7 +415,7 @@ t_local_fallback(_) ->
{true, UsedSubPid2} = last_message(<<"hello2">>, [ConnPid1]),
emqtt:stop(ConnPid1),
stop_slave(Node),
emqx_node_helpers:stop_slave(Node),
?assertEqual(UsedSubPid1, UsedSubPid2),
ok.
@ -536,55 +536,8 @@ recv_msgs(Count, Msgs) ->
end.
start_slave(Name, Port) ->
{ok, Node} = ct_slave:start(list_to_atom(atom_to_list(Name) ++ "@" ++ host()),
[{kill_if_fail, true},
{monitor_master, true},
{init_timeout, 10000},
{startup_timeout, 10000},
{erl_flags, ebin_path()}]),
pong = net_adm:ping(Node),
ok = setup_node(Node, Port),
Node.
stop_slave(Node) ->
rpc:call(Node, ekka, leave, []),
ct_slave:stop(Node).
host() ->
[_, Host] = string:tokens(atom_to_list(node()), "@"), Host.
ebin_path() ->
string:join(["-pa" | lists:filter(fun is_lib/1, code:get_path())], " ").
is_lib(Path) ->
string:prefix(Path, code:lib_dir()) =:= nomatch.
setup_node(Node, Port) ->
EnvHandler =
fun(emqx) ->
application:set_env(
emqx,
listeners,
[#{listen_on => {{127,0,0,1},Port},
name => "internal",
opts => [{zone,internal}],
proto => tcp}]),
application:set_env(gen_rpc, port_discovery, stateless),
ok;
(_) ->
ok
end,
[ok = rpc:call(Node, application, load, [App]) || App <- [gen_rpc, emqx]],
ok = rpc:call(Node, emqx_ct_helpers, start_apps, [[emqx], EnvHandler]),
rpc:call(Node, ekka, join, [node()]),
%% Sanity check. Assert that `gen_rpc' is set up correctly:
?assertEqual( Node
, gen_rpc:call(Node, erlang, node, [])
),
?assertEqual( node()
, gen_rpc:call(Node, gen_rpc, call, [node(), erlang, node, []])
),
ok.
Listeners = [#{listen_on => {{127,0,0,1}, Port},
name => "internal",
opts => [{zone,internal}],
proto => tcp}],
emqx_node_helpers:start_slave(Name, #{listeners => Listeners}).