fix(clusterlink): communicate bootstrap requirements via actor handshake
`session_present` flag is not reliable to decide whether bootstrap is needed if durable sessions is enabled. In this case, the client session may survive cluster restart while all the external routes are lost, as they are not persistent.
This commit is contained in:
parent
d5e82cdfac
commit
21711c6e0d
|
@ -170,10 +170,7 @@ actor_init(
|
||||||
case MyClusterName of
|
case MyClusterName of
|
||||||
TargetCluster ->
|
TargetCluster ->
|
||||||
Env = #{timestamp => erlang:system_time(millisecond)},
|
Env = #{timestamp => erlang:system_time(millisecond)},
|
||||||
{ok, _} = emqx_cluster_link_extrouter:actor_init(
|
emqx_cluster_link_extrouter:actor_init(ClusterName, Actor, Incr, Env);
|
||||||
ClusterName, Actor, Incr, Env
|
|
||||||
),
|
|
||||||
ok;
|
|
||||||
_ ->
|
_ ->
|
||||||
%% The remote cluster uses a different name to refer to this cluster
|
%% The remote cluster uses a different name to refer to this cluster
|
||||||
?SLOG(error, #{
|
?SLOG(error, #{
|
||||||
|
|
|
@ -19,7 +19,8 @@
|
||||||
actor_state/3,
|
actor_state/3,
|
||||||
actor_apply_operation/2,
|
actor_apply_operation/2,
|
||||||
actor_apply_operation/3,
|
actor_apply_operation/3,
|
||||||
actor_gc/1
|
actor_gc/1,
|
||||||
|
is_present_incarnation/1
|
||||||
]).
|
]).
|
||||||
|
|
||||||
%% Internal API
|
%% Internal API
|
||||||
|
@ -140,7 +141,8 @@ match_to_route(M) ->
|
||||||
cluster :: cluster(),
|
cluster :: cluster(),
|
||||||
actor :: actor(),
|
actor :: actor(),
|
||||||
incarnation :: incarnation(),
|
incarnation :: incarnation(),
|
||||||
lane :: lane() | undefined
|
lane :: lane() | undefined,
|
||||||
|
extra :: map()
|
||||||
}).
|
}).
|
||||||
|
|
||||||
-type state() :: #state{}.
|
-type state() :: #state{}.
|
||||||
|
@ -159,6 +161,12 @@ actor_init(Cluster, Actor, Incarnation, Env = #{timestamp := Now}) ->
|
||||||
actor_init(Cluster, Actor, Incarnation, Env)
|
actor_init(Cluster, Actor, Incarnation, Env)
|
||||||
end.
|
end.
|
||||||
|
|
||||||
|
-spec is_present_incarnation(state()) -> boolean().
|
||||||
|
is_present_incarnation(#state{extra = #{is_present_incarnation := IsNew}}) ->
|
||||||
|
IsNew;
|
||||||
|
is_present_incarnation(_State) ->
|
||||||
|
false.
|
||||||
|
|
||||||
mnesia_actor_init(Cluster, Actor, Incarnation, TS) ->
|
mnesia_actor_init(Cluster, Actor, Incarnation, TS) ->
|
||||||
%% NOTE
|
%% NOTE
|
||||||
%% We perform this heavy-weight transaction only in the case of a new route
|
%% We perform this heavy-weight transaction only in the case of a new route
|
||||||
|
@ -173,7 +181,7 @@ mnesia_actor_init(Cluster, Actor, Incarnation, TS) ->
|
||||||
case mnesia:read(?EXTROUTE_ACTOR_TAB, ActorID, write) of
|
case mnesia:read(?EXTROUTE_ACTOR_TAB, ActorID, write) of
|
||||||
[#actor{incarnation = Incarnation, lane = Lane} = Rec] ->
|
[#actor{incarnation = Incarnation, lane = Lane} = Rec] ->
|
||||||
ok = mnesia:write(?EXTROUTE_ACTOR_TAB, Rec#actor{until = bump_actor_ttl(TS)}, write),
|
ok = mnesia:write(?EXTROUTE_ACTOR_TAB, Rec#actor{until = bump_actor_ttl(TS)}, write),
|
||||||
{ok, State#state{lane = Lane}};
|
{ok, State#state{lane = Lane, extra = #{is_present_incarnation => true}}};
|
||||||
[] ->
|
[] ->
|
||||||
Lane = mnesia_assign_lane(Cluster),
|
Lane = mnesia_assign_lane(Cluster),
|
||||||
Rec = #actor{
|
Rec = #actor{
|
||||||
|
@ -183,7 +191,7 @@ mnesia_actor_init(Cluster, Actor, Incarnation, TS) ->
|
||||||
until = bump_actor_ttl(TS)
|
until = bump_actor_ttl(TS)
|
||||||
},
|
},
|
||||||
ok = mnesia:write(?EXTROUTE_ACTOR_TAB, Rec, write),
|
ok = mnesia:write(?EXTROUTE_ACTOR_TAB, Rec, write),
|
||||||
{ok, State#state{lane = Lane}};
|
{ok, State#state{lane = Lane, extra = #{is_present_incarnation => false}}};
|
||||||
[#actor{incarnation = Outdated} = Rec] when Incarnation > Outdated ->
|
[#actor{incarnation = Outdated} = Rec] when Incarnation > Outdated ->
|
||||||
{reincarnate, Rec};
|
{reincarnate, Rec};
|
||||||
[#actor{incarnation = Newer}] ->
|
[#actor{incarnation = Newer}] ->
|
||||||
|
@ -321,7 +329,7 @@ mnesia_clean_incarnation(#actor{id = Actor, incarnation = Incarnation, lane = La
|
||||||
clean_lane(Lane) ->
|
clean_lane(Lane) ->
|
||||||
ets:foldl(
|
ets:foldl(
|
||||||
fun(#extroute{entry = Entry, mcounter = MCounter}, _) ->
|
fun(#extroute{entry = Entry, mcounter = MCounter}, _) ->
|
||||||
apply_operation(Entry, MCounter, del, Lane)
|
apply_operation(Entry, MCounter, delete, Lane)
|
||||||
end,
|
end,
|
||||||
0,
|
0,
|
||||||
?EXTROUTE_TAB
|
?EXTROUTE_TAB
|
||||||
|
|
|
@ -71,6 +71,7 @@
|
||||||
-define(F_TARGET_CLUSTER, 13).
|
-define(F_TARGET_CLUSTER, 13).
|
||||||
-define(F_PROTO_VER, 14).
|
-define(F_PROTO_VER, 14).
|
||||||
-define(F_RESULT, 15).
|
-define(F_RESULT, 15).
|
||||||
|
-define(F_NEED_BOOTSTRAP, 16).
|
||||||
|
|
||||||
-define(ROUTE_DELETE, 100).
|
-define(ROUTE_DELETE, 100).
|
||||||
|
|
||||||
|
@ -279,18 +280,29 @@ actor_init_ack_resp_msg(Actor, InitRes, ReqId, RespTopic) ->
|
||||||
Payload = #{
|
Payload = #{
|
||||||
?F_OPERATION => ?OP_ACTOR_INIT_ACK,
|
?F_OPERATION => ?OP_ACTOR_INIT_ACK,
|
||||||
?F_PROTO_VER => ?PROTO_VER,
|
?F_PROTO_VER => ?PROTO_VER,
|
||||||
?F_ACTOR => Actor,
|
?F_ACTOR => Actor
|
||||||
?F_RESULT => InitRes
|
|
||||||
},
|
},
|
||||||
|
Payload1 = with_res_and_bootstrap(Payload, InitRes),
|
||||||
emqx_message:make(
|
emqx_message:make(
|
||||||
undefined,
|
undefined,
|
||||||
?QOS_1,
|
?QOS_1,
|
||||||
RespTopic,
|
RespTopic,
|
||||||
?ENCODE(Payload),
|
?ENCODE(Payload1),
|
||||||
#{},
|
#{},
|
||||||
#{properties => #{'Correlation-Data' => ReqId}}
|
#{properties => #{'Correlation-Data' => ReqId}}
|
||||||
).
|
).
|
||||||
|
|
||||||
|
with_res_and_bootstrap(Payload, {ok, ActorState}) ->
|
||||||
|
Payload#{
|
||||||
|
?F_RESULT => ok,
|
||||||
|
?F_NEED_BOOTSTRAP => not emqx_cluster_link_extrouter:is_present_incarnation(ActorState)
|
||||||
|
};
|
||||||
|
with_res_and_bootstrap(Payload, Error) ->
|
||||||
|
Payload#{
|
||||||
|
?F_RESULT => Error,
|
||||||
|
?F_NEED_BOOTSTRAP => false
|
||||||
|
}.
|
||||||
|
|
||||||
publish_route_sync(ClientPid, Actor, Incarnation, Updates) ->
|
publish_route_sync(ClientPid, Actor, Incarnation, Updates) ->
|
||||||
PubTopic = ?ROUTE_TOPIC,
|
PubTopic = ?ROUTE_TOPIC,
|
||||||
Payload = #{
|
Payload = #{
|
||||||
|
@ -339,9 +351,12 @@ decode_resp1(#{
|
||||||
?F_OPERATION := ?OP_ACTOR_INIT_ACK,
|
?F_OPERATION := ?OP_ACTOR_INIT_ACK,
|
||||||
?F_ACTOR := Actor,
|
?F_ACTOR := Actor,
|
||||||
?F_PROTO_VER := ProtoVer,
|
?F_PROTO_VER := ProtoVer,
|
||||||
?F_RESULT := InitResult
|
?F_RESULT := InitResult,
|
||||||
|
?F_NEED_BOOTSTRAP := NeedBootstrap
|
||||||
}) ->
|
}) ->
|
||||||
{actor_init_ack, #{actor => Actor, result => InitResult, proto_ver => ProtoVer}}.
|
{actor_init_ack, #{
|
||||||
|
actor => Actor, result => InitResult, proto_ver => ProtoVer, need_bootstrap => NeedBootstrap
|
||||||
|
}}.
|
||||||
|
|
||||||
decode_forwarded_msg(Payload) ->
|
decode_forwarded_msg(Payload) ->
|
||||||
case ?DECODE(Payload) of
|
case ?DECODE(Payload) of
|
||||||
|
|
|
@ -164,14 +164,6 @@ refine_client_options(Options = #{clientid := ClientID}, Actor) ->
|
||||||
retry_interval => 0
|
retry_interval => 0
|
||||||
}.
|
}.
|
||||||
|
|
||||||
client_session_present(ClientPid) ->
|
|
||||||
Info = emqtt:info(ClientPid),
|
|
||||||
%% FIXME: waitnig for emqtt release that fixes session_present type (must be a boolean)
|
|
||||||
case proplists:get_value(session_present, Info, 0) of
|
|
||||||
0 -> false;
|
|
||||||
1 -> true
|
|
||||||
end.
|
|
||||||
|
|
||||||
announce_client(Actor, TargetCluster, Pid) ->
|
announce_client(Actor, TargetCluster, Pid) ->
|
||||||
Name =
|
Name =
|
||||||
case Actor of
|
case Actor of
|
||||||
|
@ -334,13 +326,15 @@ handle_info(
|
||||||
{publish, #{payload := Payload, properties := #{'Correlation-Data' := ReqId}}},
|
{publish, #{payload := Payload, properties := #{'Correlation-Data' := ReqId}}},
|
||||||
St = #st{actor_init_req_id = ReqId}
|
St = #st{actor_init_req_id = ReqId}
|
||||||
) ->
|
) ->
|
||||||
{actor_init_ack, #{result := Res} = AckInfoMap} = emqx_cluster_link_mqtt:decode_resp(Payload),
|
{actor_init_ack, #{result := Res, need_bootstrap := NeedBootstrap} = AckInfoMap} = emqx_cluster_link_mqtt:decode_resp(
|
||||||
|
Payload
|
||||||
|
),
|
||||||
St1 = St#st{
|
St1 = St#st{
|
||||||
actor_init_req_id = undefined, actor_init_timer = undefined, remote_actor_info = AckInfoMap
|
actor_init_req_id = undefined, actor_init_timer = undefined, remote_actor_info = AckInfoMap
|
||||||
},
|
},
|
||||||
case Res of
|
case Res of
|
||||||
ok ->
|
ok ->
|
||||||
{noreply, post_actor_init(St1)};
|
{noreply, post_actor_init(St1, NeedBootstrap)};
|
||||||
Error ->
|
Error ->
|
||||||
?SLOG(error, #{
|
?SLOG(error, #{
|
||||||
msg => "failed_to_init_link",
|
msg => "failed_to_init_link",
|
||||||
|
@ -410,10 +404,11 @@ init_remote_actor(
|
||||||
St#st{actor_init_req_id = ReqId, actor_init_timer = TRef}.
|
St#st{actor_init_req_id = ReqId, actor_init_timer = TRef}.
|
||||||
|
|
||||||
post_actor_init(
|
post_actor_init(
|
||||||
St = #st{client = ClientPid, target = TargetCluster, actor = Actor, incarnation = Incr}
|
St = #st{client = ClientPid, target = TargetCluster, actor = Actor, incarnation = Incr},
|
||||||
|
NeedBootstrap
|
||||||
) ->
|
) ->
|
||||||
ok = start_syncer(TargetCluster, Actor, Incr),
|
ok = start_syncer(TargetCluster, Actor, Incr),
|
||||||
process_bootstrap(St#st{client = ClientPid}).
|
process_bootstrap(St#st{client = ClientPid}, NeedBootstrap).
|
||||||
|
|
||||||
handle_connect_error(_Reason, St) ->
|
handle_connect_error(_Reason, St) ->
|
||||||
%% TODO: logs
|
%% TODO: logs
|
||||||
|
@ -426,14 +421,14 @@ handle_client_down(_Reason, St = #st{target = TargetCluster, actor = Actor}) ->
|
||||||
ok = close_syncer(TargetCluster, Actor),
|
ok = close_syncer(TargetCluster, Actor),
|
||||||
process_connect(St#st{client = undefined}).
|
process_connect(St#st{client = undefined}).
|
||||||
|
|
||||||
process_bootstrap(St = #st{bootstrapped = false}) ->
|
process_bootstrap(St = #st{bootstrapped = false}, _NeedBootstrap) ->
|
||||||
run_bootstrap(St);
|
run_bootstrap(St);
|
||||||
process_bootstrap(St = #st{client = ClientPid, bootstrapped = true}) ->
|
process_bootstrap(St = #st{bootstrapped = true}, NeedBootstrap) ->
|
||||||
case client_session_present(ClientPid) of
|
case NeedBootstrap of
|
||||||
true ->
|
true ->
|
||||||
process_bootstrapped(St);
|
run_bootstrap(St);
|
||||||
false ->
|
false ->
|
||||||
run_bootstrap(St)
|
process_bootstrapped(St)
|
||||||
end.
|
end.
|
||||||
|
|
||||||
%% Bootstrapping.
|
%% Bootstrapping.
|
||||||
|
|
Loading…
Reference in New Issue