diff --git a/apps/emqx/i18n/emqx_schema_i18n.conf b/apps/emqx/i18n/emqx_schema_i18n.conf index e7c4890f7..750c0c2cd 100644 --- a/apps/emqx/i18n/emqx_schema_i18n.conf +++ b/apps/emqx/i18n/emqx_schema_i18n.conf @@ -1078,11 +1078,13 @@ Supported configurations are the following: broker_shared_dispatch_ack_enabled { desc { - en: """Enable/disable shared dispatch acknowledgement for QoS 1 and QoS 2 messages. + en: """Deprecated, will be removed in 5.1. +Enable/disable shared dispatch acknowledgement for QoS 1 and QoS 2 messages. This should allow messages to be dispatched to a different subscriber in the group in case the picked (based on `shared_subscription_strategy`) subscriber is offline. """ - zh: """启用/禁用 QoS 1 和 QoS 2 消息的共享派发确认。 + zh: """该配置项已废弃,会在 5.1 中移除。 +启用/禁用 QoS 1 和 QoS 2 消息的共享派发确认。 开启后,允许将消息从未及时回复 ACK 的订阅者 (例如,客户端离线)重新派发给另外一个订阅者。 """ } diff --git a/apps/emqx/src/emqx_schema.erl b/apps/emqx/src/emqx_schema.erl index 171b6dc42..b1afcac7f 100644 --- a/apps/emqx/src/emqx_schema.erl +++ b/apps/emqx/src/emqx_schema.erl @@ -1198,6 +1198,10 @@ fields("broker") -> sc( boolean(), #{ + %% TODO: deprecated => {since, "5.1.0"} + %% in favor of session message re-dispatch at termination + %% we will stop supporting dispatch acks for shared + %% subscriptions. default => false, desc => ?DESC(broker_shared_dispatch_ack_enabled) } diff --git a/apps/emqx/src/emqx_shared_sub.erl b/apps/emqx/src/emqx_shared_sub.erl index 975b403b9..e20297703 100644 --- a/apps/emqx/src/emqx_shared_sub.erl +++ b/apps/emqx/src/emqx_shared_sub.erl @@ -81,8 +81,6 @@ | round_robin_per_group | sticky | local - %% same as hash_clientid, backward compatible - | hash | hash_clientid | hash_topic. @@ -97,6 +95,7 @@ -define(NACK(Reason), {shared_sub_nack, Reason}). -define(NO_ACK, no_ack). -define(REDISPATCH_TO(GROUP, TOPIC), {GROUP, TOPIC}). +-define(SUBSCRIBER_DOWN, noproc). -type redispatch_to() :: ?REDISPATCH_TO(emqx_topic:group(), emqx_topic:topic()). @@ -139,7 +138,7 @@ record(Group, Topic, SubPid) -> -spec dispatch(emqx_types:group(), emqx_types:topic(), emqx_types:delivery()) -> emqx_types:deliver_result(). dispatch(Group, Topic, Delivery) -> - dispatch(Group, Topic, Delivery, _FailedSubs = []). + dispatch(Group, Topic, Delivery, _FailedSubs = #{}). dispatch(Group, Topic, Delivery = #delivery{message = Msg}, FailedSubs) -> #message{from = ClientId, topic = SourceTopic} = Msg, @@ -151,9 +150,9 @@ dispatch(Group, Topic, Delivery = #delivery{message = Msg}, FailedSubs) -> case do_dispatch(SubPid, Group, Topic, Msg1, Type) of ok -> {ok, 1}; - {error, _Reason} -> + {error, Reason} -> %% Failed to dispatch to this sub, try next. - dispatch(Group, Topic, Delivery, [SubPid | FailedSubs]) + dispatch(Group, Topic, Delivery, FailedSubs#{SubPid => Reason}) end end. @@ -262,7 +261,9 @@ redispatch_shared_message(#message{} = Msg) -> %% Note that dispatch is called with self() in failed subs %% This is done to avoid dispatching back to caller Delivery = #delivery{sender = self(), message = Msg}, - dispatch(Group, Topic, Delivery, [self()]). + %% Self is terminating, it makes no sense to loop-back the dispatch + FailedSubs = #{self() => ?SUBSCRIBER_DOWN}, + dispatch(Group, Topic, Delivery, FailedSubs). %% @hidden Return the `redispatch_to` group-topic in the message header. %% `false` is returned if the message is not a shared dispatch. @@ -307,27 +308,33 @@ maybe_ack(Msg) -> pick(sticky, ClientId, SourceTopic, Group, Topic, FailedSubs) -> Sub0 = erlang:get({shared_sub_sticky, Group, Topic}), - case is_active_sub(Sub0, FailedSubs) of + All = subscribers(Group, Topic, FailedSubs), + case is_active_sub(Sub0, FailedSubs, All) of true -> %% the old subscriber is still alive %% keep using it for sticky strategy {fresh, Sub0}; false -> %% randomly pick one for the first message - {Type, Sub} = do_pick(random, ClientId, SourceTopic, Group, Topic, [Sub0 | FailedSubs]), - %% stick to whatever pick result - erlang:put({shared_sub_sticky, Group, Topic}, Sub), - {Type, Sub} + FailedSubs1 = FailedSubs#{Sub0 => ?SUBSCRIBER_DOWN}, + Res = do_pick(All, random, ClientId, SourceTopic, Group, Topic, FailedSubs1), + case Res of + {_, Sub} -> + %% stick to whatever pick result + erlang:put({shared_sub_sticky, Group, Topic}, Sub); + _ -> + ok + end, + Res end; pick(Strategy, ClientId, SourceTopic, Group, Topic, FailedSubs) -> - do_pick(Strategy, ClientId, SourceTopic, Group, Topic, FailedSubs). + All = subscribers(Group, Topic, FailedSubs), + do_pick(All, Strategy, ClientId, SourceTopic, Group, Topic, FailedSubs). -do_pick(Strategy, ClientId, SourceTopic, Group, Topic, FailedSubs) -> - All = subscribers(Group, Topic), - case All -- FailedSubs of - [] when All =:= [] -> - %% Genuinely no subscriber - false; +do_pick([], _Strategy, _ClientId, _SourceTopic, _Group, _Topic, _FailedSubs) -> + false; +do_pick(All, Strategy, ClientId, SourceTopic, Group, Topic, FailedSubs) -> + case lists:filter(fun(Sub) -> not maps:is_key(Sub, FailedSubs) end, All) of [] -> %% All offline? pick one anyway {retry, pick_subscriber(Group, Topic, Strategy, ClientId, SourceTopic, All)}; @@ -351,9 +358,6 @@ pick_subscriber(Group, Topic, Strategy, ClientId, SourceTopic, Subs) -> do_pick_subscriber(_Group, _Topic, random, _ClientId, _SourceTopic, Count) -> rand:uniform(Count); -do_pick_subscriber(Group, Topic, hash, ClientId, SourceTopic, Count) -> - %% backward compatible - do_pick_subscriber(Group, Topic, hash_clientid, ClientId, SourceTopic, Count); do_pick_subscriber(_Group, _Topic, hash_clientid, ClientId, _SourceTopic, Count) -> 1 + erlang:phash2(ClientId) rem Count; do_pick_subscriber(_Group, _Topic, hash_topic, _ClientId, SourceTopic, Count) -> @@ -374,6 +378,16 @@ do_pick_subscriber(Group, Topic, round_robin_per_group, _ClientId, _SourceTopic, {Group, Topic}, 0 }). +%% Select ETS table to get all subscriber pids which are not down. +subscribers(Group, Topic, FailedSubs) -> + lists:filter( + fun(P) -> + ?SUBSCRIBER_DOWN =/= maps:get(P, FailedSubs, false) + end, + subscribers(Group, Topic) + ). + +%% Select ETS table to get all subscriber pids. subscribers(Group, Topic) -> ets:select(?TAB, [{{emqx_shared_subscription, Group, Topic, '$1'}, [], ['$1']}]). @@ -427,6 +441,7 @@ handle_info( {mnesia_table_event, {write, #emqx_shared_subscription{subpid = SubPid}, _}}, State = #state{pmon = PMon} ) -> + ok = maybe_insert_alive_tab(SubPid), {noreply, update_stats(State#state{pmon = emqx_pmon:monitor(SubPid, PMon)})}; %% The subscriber may have subscribed multiple topics, so we need to keep monitoring the PID until %% it `unsubscribed` the last topic. @@ -512,8 +527,10 @@ update_stats(State) -> State. %% Return 'true' if the subscriber process is alive AND not in the failed list -is_active_sub(Pid, FailedSubs) -> - is_alive_sub(Pid) andalso not lists:member(Pid, FailedSubs). +is_active_sub(Pid, FailedSubs, All) -> + lists:member(Pid, All) andalso + (not maps:is_key(Pid, FailedSubs)) andalso + is_alive_sub(Pid). %% erlang:is_process_alive/1 does not work with remote pid. is_alive_sub(Pid) when ?IS_LOCAL_PID(Pid) -> diff --git a/apps/emqx/test/emqx_session_SUITE.erl b/apps/emqx/test/emqx_session_SUITE.erl index c171382c7..91bf4659b 100644 --- a/apps/emqx/test/emqx_session_SUITE.erl +++ b/apps/emqx/test/emqx_session_SUITE.erl @@ -190,7 +190,7 @@ t_publish_qos2_with_error_return(_) -> begin Msg2 = emqx_message:make(clientid, ?QOS_2, <<"t">>, <<"payload2">>), - {ok, [], Session1} = emqx_session:publish(clientinfo(), PacketId2 = 2, Msg2, Session), + {ok, [], Session1} = emqx_session:publish(clientinfo(), _PacketId2 = 2, Msg2, Session), ?assertEqual(2, emqx_session:info(awaiting_rel_cnt, Session1)), {error, RC2 = ?RC_RECEIVE_MAXIMUM_EXCEEDED} = emqx_session:publish( clientinfo(), _PacketId3 = 3, Msg2, Session1 diff --git a/apps/emqx/test/emqx_shared_sub_SUITE.erl b/apps/emqx/test/emqx_shared_sub_SUITE.erl index a428a6fa9..3455c41f9 100644 --- a/apps/emqx/test/emqx_shared_sub_SUITE.erl +++ b/apps/emqx/test/emqx_shared_sub_SUITE.erl @@ -344,9 +344,42 @@ t_sticky(Config) when is_list(Config) -> ok = ensure_config(sticky, true), test_two_messages(sticky). +%% two subscribers in one shared group +%% one unsubscribe after receiving a message +%% the other one in the group should receive the next message +t_sticky_unsubscribe(Config) when is_list(Config) -> + ok = ensure_config(sticky, false), + Topic = <<"foo/bar/sticky-unsub">>, + ClientId1 = <<"c1-sticky-unsub">>, + ClientId2 = <<"c2-sticky-unsub">>, + Group = <<"gsu">>, + {ok, ConnPid1} = emqtt:start_link([{clientid, ClientId1}]), + {ok, ConnPid2} = emqtt:start_link([{clientid, ClientId2}]), + {ok, _} = emqtt:connect(ConnPid1), + {ok, _} = emqtt:connect(ConnPid2), + + ShareTopic = <<"$share/", Group/binary, "/", Topic/binary>>, + emqtt:subscribe(ConnPid1, {ShareTopic, 0}), + emqtt:subscribe(ConnPid2, {ShareTopic, 0}), + + Message1 = emqx_message:make(ClientId1, 0, Topic, <<"hello1">>), + Message2 = emqx_message:make(ClientId2, 0, Topic, <<"hello2">>), + ct:sleep(100), + + emqx:publish(Message1), + {true, UsedSubPid1} = last_message(<<"hello1">>, [ConnPid1, ConnPid2]), + emqtt:unsubscribe(UsedSubPid1, ShareTopic), + emqx:publish(Message2), + {true, UsedSubPid2} = last_message(<<"hello2">>, [ConnPid1, ConnPid2]), + ?assertNotEqual(UsedSubPid1, UsedSubPid2), + + kill_process(ConnPid1, fun(_) -> emqtt:stop(ConnPid1) end), + kill_process(ConnPid2, fun(_) -> emqtt:stop(ConnPid2) end), + ok. + t_hash(Config) when is_list(Config) -> - ok = ensure_config(hash, false), - test_two_messages(hash). + ok = ensure_config(hash_clientid, false), + test_two_messages(hash_clientid). t_hash_clinetid(Config) when is_list(Config) -> ok = ensure_config(hash_clientid, false), @@ -453,7 +486,7 @@ test_two_messages(Strategy, Group) -> sticky -> ?assertEqual(UsedSubPid1, UsedSubPid2); round_robin -> ?assertNotEqual(UsedSubPid1, UsedSubPid2); round_robin_per_group -> ?assertNotEqual(UsedSubPid1, UsedSubPid2); - hash -> ?assertEqual(UsedSubPid1, UsedSubPid2); + hash_clientid -> ?assertEqual(UsedSubPid1, UsedSubPid2); _ -> ok end, ok. diff --git a/apps/emqx_connector/src/emqx_connector_jwt.erl b/apps/emqx_connector/src/emqx_connector_jwt.erl index 3dc39c675..c5cd54cb9 100644 --- a/apps/emqx_connector/src/emqx_connector_jwt.erl +++ b/apps/emqx_connector/src/emqx_connector_jwt.erl @@ -16,7 +16,7 @@ -module(emqx_connector_jwt). --include("emqx_connector_tables.hrl"). +-include_lib("emqx_connector/include/emqx_connector_tables.hrl"). -include_lib("emqx_resource/include/emqx_resource.hrl"). %% API diff --git a/apps/emqx_connector/src/emqx_connector_jwt_sup.erl b/apps/emqx_connector/src/emqx_connector_jwt_sup.erl index 611e152bd..ac1d22b71 100644 --- a/apps/emqx_connector/src/emqx_connector_jwt_sup.erl +++ b/apps/emqx_connector/src/emqx_connector_jwt_sup.erl @@ -18,7 +18,7 @@ -behaviour(supervisor). --include("emqx_connector_tables.hrl"). +-include_lib("emqx_connector/include/emqx_connector_tables.hrl"). -export([ start_link/0, diff --git a/changes/v5.0.13-en.md b/changes/v5.0.13-en.md index 1691e3b81..acd210e71 100644 --- a/changes/v5.0.13-en.md +++ b/changes/v5.0.13-en.md @@ -17,3 +17,6 @@ ## Bug fixes - Trigger `message.dropped` hook when QoS2 message is resend by client with a same packet id, or 'awaiting_rel' queue is full [#9487](https://github.com/emqx/emqx/pull/9487). + +- Fix shared subscription 'sticky' strategy [#9578](https://github.com/emqx/emqx/pull/9578). + Prior to this change, a 'sticky' subscriber may continue to receive messages after unsubscribing. diff --git a/changes/v5.0.13-zh.md b/changes/v5.0.13-zh.md index 3f5d5b3a0..9b74dc5f0 100644 --- a/changes/v5.0.13-zh.md +++ b/changes/v5.0.13-zh.md @@ -17,3 +17,6 @@ ## 修复 - 当 QoS2 消息被重发(使用相同 Packet ID),或当 'awaiting_rel' 队列已满时,触发消息丢弃钩子(`message.dropped`)及计数器 [#9487](https://github.com/emqx/emqx/pull/9487)。 + +- 修复共享订阅的 'sticky' 策略 [#9578](https://github.com/emqx/emqx/pull/9578)。 + 在此修复前,使用 'sticky' 策略的订阅客户端可能在取消订阅后继续收到消息。