Ack replayq and allow retry in tests

This commit is contained in:
spring2maz 2019-02-26 08:45:15 +01:00 committed by Gilbert Wong
parent 75163e21f3
commit 9dbc34c376
6 changed files with 77 additions and 59 deletions

View File

@ -957,8 +957,7 @@ connected(cast, ?PACKET(?PINGRESP), State) ->
end;
connected(cast, ?DISCONNECT_PACKET(ReasonCode, Properties), State) ->
ok = eval_msg_handler(State, disconnected, {ReasonCode, Properties}),
{stop, disconnected, State};
{stop, {disconnected, ReasonCode, Properties}, State};
connected(info, {timeout, _TRef, keepalive}, State = #state{force_ping = true}) ->
case send(?PACKET(?PINGREQ), State) of
@ -1042,10 +1041,18 @@ handle_event(EventType, EventContent, StateName, StateData) ->
{keep_state, StateData}.
%% Mandatory callback functions
terminate(_Reason, _State, #state{socket = undefined}) ->
ok;
terminate(_Reason, _State, #state{socket = Socket}) ->
emqx_client_sock:close(Socket).
terminate(Reason, _StateName, State = #state{socket = Socket}) ->
case Reason of
{disconnected, ReasonCode, Properties} ->
%% backward compatible
ok = eval_msg_handler(State, disconnected, {ReasonCode, Properties});
_ ->
ok = eval_msg_handler(State, disconnected, Reason)
end,
case Socket =:= undefined of
true -> ok;
_ -> emqx_client_sock:close(Socket)
end.
code_change(_Vsn, State, Data, _Extra) ->
{ok, State, Data}.
@ -1276,6 +1283,10 @@ eval_msg_handler(#state{msg_handler = ?NO_REQ_HANDLER,
%% Special handling for disconnected message when there is no handler callback
Owner ! {disconnected, ReasonCode, Properties},
ok;
eval_msg_handler(#state{msg_handler = ?NO_REQ_HANDLER},
disconnected, _OtherReason) ->
%% do nothing to be backward compatible
ok;
eval_msg_handler(#state{msg_handler = ?NO_REQ_HANDLER,
owner = Owner}, Kind, Msg) ->
Owner ! {Kind, Msg},

View File

@ -64,8 +64,7 @@
-export([start_link/2,
import_batch/2,
handle_ack/2,
stop/1
]).
stop/1]).
%% gen_statem callbacks
-export([terminate/3, code_change/4, init/1, callback_mode/0]).
@ -74,15 +73,13 @@
-export([standing_by/3, connecting/3, connected/3]).
%% management APIs
-export([start_bridge/1, stop_bridge/1, status/1]).
-export([ensure_started/2, ensure_stopped/1, ensure_stopped/2]).
-export([ensure_started/1, ensure_started/2, ensure_stopped/1, ensure_stopped/2, status/1]).
-export([get_forwards/1, ensure_forward_present/2, ensure_forward_absent/2]).
-export([get_subscriptions/1, ensure_subscription_present/3, ensure_subscription_absent/2]).
-export_type([config/0,
batch/0,
ack_ref/0
]).
ack_ref/0]).
-type id() :: atom() | string() | pid().
-type qos() :: emqx_mqtt_types:qos().
@ -112,7 +109,7 @@
%% max_inflight_batches: Max number of batches allowed to send-ahead before
%% receiving confirmation from remote node/cluster
%% mountpoint: The topic mount point for messages sent to remote node/cluster
%% `undefined', `<<>>' or `""' to disalble
%% `undefined', `<<>>' or `""' to disable
%% forwards: Local topics to subscribe.
%% queue.batch_bytes_limit: Max number of bytes to collect in a batch for each
%% send call towards emqx_portal_connect
@ -129,6 +126,9 @@ start_link(Name, Config) ->
gen_statem:start_link({local, name(Name)}, ?MODULE, Config, []).
%% @doc Manually start portal worker. State idempotency ensured.
ensure_started(Name) ->
gen_statem:call(name(Name), ensure_started).
ensure_started(Name, Config) ->
case start_link(Name, Config) of
{ok, Pid} -> {ok, Pid};
@ -162,12 +162,6 @@ ensure_stopped(Id, Timeout) ->
stop(Pid) -> gen_statem:stop(Pid).
start_bridge(Name) ->
gen_statem:call(name(Name), ensure_started).
stop_bridge(Name) ->
gen_statem:call(name(Name), ensure_stopped).
status(Pid) ->
gen_statem:call(Pid, status).
@ -279,11 +273,7 @@ standing_by(enter, _, #{start_type := manual}) ->
keep_state_and_data;
standing_by({call, From}, ensure_started, State) ->
{next_state, connecting, State,
[{reply, From, <<"starting bridge ......">>}]};
standing_by({call, From}, ensure_stopped, _State) ->
{keep_state_and_data, [{reply, From, <<"bridge not started">>}]};
standing_by({call, From}, status, _State) ->
{keep_state_and_data, [{reply, From, <<"Stopped">>}]};
[{reply, From, ok}]};
standing_by(state_timeout, do_connect, State) ->
{next_state, connecting, State};
standing_by({call, From}, _Call, _State) ->
@ -308,6 +298,7 @@ connecting(enter, _, #{reconnect_delay_ms := Timeout,
ok = subscribe_local_topics(Forwards),
case ConnectFun(Subs) of
{ok, ConnRef, Conn} ->
?INFO("Portal ~p connected", [name()]),
Action = {state_timeout, 0, connected},
{keep_state, State#{conn_ref => ConnRef, connection => Conn}, Action};
error ->
@ -318,10 +309,6 @@ connecting(state_timeout, connected, State) ->
{next_state, connected, State};
connecting(state_timeout, reconnect, _State) ->
repeat_state_and_data;
connecting({call, From}, status, _State) ->
{keep_state_and_data, [{reply, From, <<"Stopped">>}]};
connecting({call, From}, _Call, _State) ->
{keep_state_and_data, [{reply, From, <<"starting bridge ......">>}]};
connecting(info, {batch_ack, Ref}, State) ->
case do_ack(State, Ref) of
{ok, NewState} ->
@ -329,6 +316,10 @@ connecting(info, {batch_ack, Ref}, State) ->
_ ->
keep_state_and_data
end;
connecting(internal, maybe_send, _State) ->
keep_state_and_data;
connecting(info, {disconnected, _Ref, _Reason}, _State) ->
keep_state_and_data;
connecting(Type, Content, State) ->
common(connecting, Type, Content, State).
@ -353,23 +344,23 @@ connected(internal, maybe_send, State) ->
{error, NewState} ->
{next_state, connecting, disconnect(NewState)}
end;
connected({call, From}, ensure_started, _State) ->
{keep_state_and_data, [{reply, From, <<"bridge already started">>}]};
connected({call, From}, status, _State) ->
{keep_state_and_data, [{reply, From, <<"Running">>}]};
connected(info, {disconnected, ConnRef, Reason},
#{conn_ref := ConnRef, connection := Conn} = State) ->
?INFO("Portal ~p diconnected~nreason=~p",
[name(), Conn, Reason]),
#{conn_ref := ConnRefCurrent, connection := Conn} = State) ->
case ConnRefCurrent =:= ConnRef of
true ->
?INFO("Portal ~p diconnected~nreason=~p", [name(), Conn, Reason]),
{next_state, connecting,
State#{conn_ref := undefined,
connection := undefined}};
State#{conn_ref := undefined, connection := undefined}};
false ->
keep_state_and_data
end;
connected(info, {batch_ack, Ref}, State) ->
case do_ack(State, Ref) of
stale ->
keep_state_and_data;
bad_order ->
%% try re-connect then re-send
?ERROR("Bad order ack received by portal ~p", [name()]),
{next_state, connecting, disconnect(State)};
{ok, NewState} ->
{keep_state, NewState, ?maybe_send}
@ -378,6 +369,8 @@ connected(Type, Content, State) ->
common(connected, Type, Content, State).
%% Common handlers
common(StateName, {call, From}, status, _State) ->
{keep_state_and_data, [{reply, From, StateName}]};
common(_StateName, {call, From}, ensure_started, _State) ->
{keep_state_and_data, [{reply, From, ok}]};
common(_StateName, {call, From}, get_forwards, #{forwards := Forwards}) ->
@ -392,13 +385,13 @@ common(_StateName, {call, From}, {ensure_absent, What, Topic}, State) ->
{keep_state, NewState, [{reply, From, Result}]};
common(_StateName, {call, From}, ensure_stopped, _State) ->
{stop_and_reply, {shutdown, manual},
[{reply, From, <<"stop bridge successfully">>}]};
[{reply, From, ok}]};
common(_StateName, info, {dispatch, _, Msg},
#{replayq := Q} = State) ->
NewQ = replayq:append(Q, collect([Msg])),
{keep_state, State#{replayq => NewQ}, ?maybe_send};
common(StateName, Type, Content, State) ->
?INFO("Portal ~p discarded ~p type event at state ~p:~p",
?INFO("Portal ~p discarded ~p type event at state ~p:\n~p",
[name(), Type, StateName, Content]),
{keep_state, State}.
@ -497,15 +490,16 @@ do_send(State = #{inflight := Inflight}, QAckRef, [_ | _] = Batch) ->
%% this is a list of inflight BATCHes, not expecting it to be too long
NewInflight = Inflight ++ [#{q_ack_ref => QAckRef,
send_ack_ref => Ref,
batch => Batch
}],
batch => Batch}],
{ok, State#{inflight := NewInflight}};
{error, Reason} ->
?INFO("Batch produce failed\n~p", [Reason]),
{error, State}
end.
do_ack(State = #{inflight := [#{send_ack_ref := Ref} | Rest]}, Ref) ->
do_ack(State = #{inflight := [#{send_ack_ref := Refx, q_ack_ref := QAckRef} | Rest],
replayq := Q}, Ref) when Refx =:= Ref ->
ok = replayq:ack(Q, QAckRef),
{ok, State#{inflight := Rest}};
do_ack(#{inflight := Inflight}, Ref) ->
case lists:any(fun(#{send_ack_ref := Ref0}) -> Ref0 =:= Ref end, Inflight) of
@ -533,8 +527,7 @@ disconnect(#{connection := Conn,
} = State) when Conn =/= undefined ->
ok = Module:stop(ConnRef, Conn),
State#{conn_ref => undefined,
connection => undefined
};
connection => undefined};
disconnect(State) -> State.
%% Called only when replayq needs to dump it to disk.

View File

@ -74,8 +74,8 @@ start(Config = #{address := Address}) ->
end.
stop(Ref, #{ack_collector := AckCollector, client_pid := Pid}) ->
safe_stop(AckCollector, fun() -> AckCollector ! ?STOP(Ref) end, 1000),
safe_stop(Pid, fun() -> emqx_client:stop(Pid) end, 1000),
safe_stop(AckCollector, fun() -> AckCollector ! ?STOP(Ref) end, 1000),
ok.
ensure_subscribed(#{client_pid := Pid}, Topic, QoS) when is_pid(Pid) ->
@ -120,7 +120,7 @@ send(#{client_pid := ClientPid, ack_collector := AckCollector} = Conn, [Msg | Re
{ok, PktId} ->
send(Conn, Rest, [PktId | Acc]);
{error, {_PacketId, inflight_full}} ->
timer:sleep(100),
timer:sleep(10),
send(Conn, Batch, Acc);
{error, Reason} ->
%% NOTE: There is no partial sucess of a batch and recover from the middle
@ -176,7 +176,7 @@ import_msg(Msg) ->
make_hdlr(Parent, AckCollector, Ref) ->
#{puback => fun(Ack) -> handle_puback(AckCollector, Ack) end,
publish => fun(Msg) -> import_msg(Msg) end,
disconnected => fun(RC, _Properties) -> Parent ! {disconnected, Ref, RC}, ok end
disconnected => fun(Reason) -> Parent ! {disconnected, Ref, Reason}, ok end
}.
subscribe_remote_topics(ClientPid, Subscriptions) ->

View File

@ -16,7 +16,7 @@
-behavior(supervisor).
-export([start_link/0, start_link/1, portals/0]).
-export([create_portal/2, drop_portal/1]).
-export([init/1]).
-define(SUP, ?MODULE).
@ -46,3 +46,15 @@ portal_spec({Name, Config}) ->
-spec(portals() -> [{node(), map()}]).
portals() ->
[{Name, emqx_portal:status(Pid)} || {Name, Pid, _, _} <- supervisor:which_children(?SUP)].
create_portal(Id, Config) ->
supervisor:start_child(?SUP, portal_spec({Id, Config})).
drop_portal(Id) ->
case supervisor:terminate_child(?SUP, Id) of
ok ->
supervisor:delete_child(?SUP, Id);
Error ->
emqx_logger:error("[Bridge] Delete bridge failed", [Error]),
Error
end.

View File

@ -146,8 +146,6 @@ t_mqtt(Config) when is_list(Config) ->
?assertEqual([{ForwardedTopic, 1}], emqx_portal:get_subscriptions(Pid)),
ok = emqx_portal:ensure_subscription_present(Pid, ForwardedTopic2, _QoS = 1),
ok = emqx_portal:ensure_forward_present(Pid, SendToTopic2),
%% TODO: investigate why it's necessary
timer:sleep(1000),
?assertEqual([{ForwardedTopic, 1},
{ForwardedTopic2, 1}], emqx_portal:get_subscriptions(Pid)),
{ok, ConnPid} = emqx_mock_client:start_link(ClientId),
@ -182,13 +180,16 @@ receive_and_match_messages(Ref, Msgs) ->
ok.
do_receive_and_match_messages(_Ref, []) -> ok;
do_receive_and_match_messages(Ref, [I | Rest]) ->
do_receive_and_match_messages(Ref, [I | Rest] = Exp) ->
receive
{Ref, timeout} -> erlang:error(timeout);
{Ref, [#{payload := P} = Msg]} ->
case I =:= binary_to_integer(P) of
true -> ok;
false -> throw({unexpected, Msg, [I | Rest]})
end,
do_receive_and_match_messages(Ref, Rest)
case binary_to_integer(P) of
I -> %% exact match
do_receive_and_match_messages(Ref, Rest);
J when J < I -> %% allow retry
do_receive_and_match_messages(Ref, Exp);
_Other ->
throw({unexpected, Msg, Exp})
end
end.

View File

@ -139,7 +139,8 @@ match_nums([#message{payload = P} | Rest], Nums) ->
I = binary_to_integer(P),
case Nums of
[I | NumsLeft] -> match_nums(Rest, NumsLeft);
_ -> error({I, Nums})
[J | _] when J > I -> match_nums(Rest, Nums); %% allow retry
_ -> error([{received, I}, {expecting, Nums}])
end.
make_config(Ref, TestPid, Result) ->