From 2348e612faf1b44e0d706cbc8a4992fedf49d8ab Mon Sep 17 00:00:00 2001 From: Tobias Lindahl Date: Tue, 7 Dec 2021 16:05:49 +0100 Subject: [PATCH] fix(emqx_channel): fix race condition in session takeover Sessions must not enqueue messages when another process is taking over the client id, since it already passed on the message queue in the session state. Without this fix, messages arriving after `{takeover, 'begin'} to a channel with no connection (i.e., a persistent session) would be lost. --- src/emqx_channel.erl | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/src/emqx_channel.erl b/src/emqx_channel.erl index 7bfef472d..f33b3b337 100644 --- a/src/emqx_channel.erl +++ b/src/emqx_channel.erl @@ -720,20 +720,25 @@ maybe_update_expiry_interval(_Properties, Channel) -> Channel. -spec(handle_deliver(list(emqx_types:deliver()), channel()) -> {ok, channel()} | {ok, replies(), channel()}). +handle_deliver(Delivers, Channel = #channel{takeover = true, + pendings = Pendings, + session = Session, + clientinfo = #{clientid := ClientId}}) -> + %% NOTE: Order is important here. While the takeover is in + %% progress, the session cannot enqueue messages, since it already + %% passed on the queue to the new connection in the session state. + NPendings = lists:append(Pendings, ignore_local(maybe_nack(Delivers), ClientId, Session)), + {ok, Channel#channel{pendings = NPendings}}; + handle_deliver(Delivers, Channel = #channel{conn_state = disconnected, + takeover = false, session = Session, clientinfo = #{clientid := ClientId}}) -> NSession = emqx_session:enqueue(ignore_local(maybe_nack(Delivers), ClientId, Session), Session), {ok, Channel#channel{session = NSession}}; -handle_deliver(Delivers, Channel = #channel{takeover = true, - pendings = Pendings, - session = Session, - clientinfo = #{clientid := ClientId}}) -> - NPendings = lists:append(Pendings, ignore_local(maybe_nack(Delivers), ClientId, Session)), - {ok, Channel#channel{pendings = NPendings}}; - handle_deliver(Delivers, Channel = #channel{session = Session, + takeover = false, clientinfo = #{clientid := ClientId}}) -> case emqx_session:deliver(ignore_local(Delivers, ClientId, Session), Session) of {ok, Publishes, NSession} ->