From e73bf716ae113008ffb936b30b27816ce775ce1d Mon Sep 17 00:00:00 2001 From: "Zaiming (Stone) Shi" Date: Sun, 19 Nov 2023 22:08:42 +0100 Subject: [PATCH] fix(emqx_channel): do not log stale sock_close event as error In some cases, EMQX may decide to close socket and mark connection at 'disconnected' state, for example, when DISCONNECTE packet is received, or, when failed to write data to socket. However, by the time EMQX decided to close the socket, the socket might have already been closed by peer, and the `tcp_closed` envet is already delivered to the process mailbox -- causing EMQX to handle sock_close event at 'disconnected' state. --- apps/emqx/src/emqx_channel.erl | 6 ++++-- apps/emqx_gateway_stomp/src/emqx_stomp_channel.erl | 9 ++++----- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/apps/emqx/src/emqx_channel.erl b/apps/emqx/src/emqx_channel.erl index 4f6d5ac6f..c2f62c840 100644 --- a/apps/emqx/src/emqx_channel.erl +++ b/apps/emqx/src/emqx_channel.erl @@ -1246,8 +1246,10 @@ handle_info( {ok, Channel3} -> {ok, ?REPLY_EVENT(disconnected), Channel3}; Shutdown -> Shutdown end; -handle_info({sock_closed, Reason}, Channel = #channel{conn_state = disconnected}) -> - ?SLOG(error, #{msg => "unexpected_sock_close", reason => Reason}), +handle_info({sock_closed, _Reason}, Channel = #channel{conn_state = disconnected}) -> + %% This can happen as a race: + %% EMQX closes socket and marks 'disconnected' but 'tcp_closed' or 'ssl_closed' + %% is already in process mailbox {ok, Channel}; handle_info(clean_authz_cache, Channel) -> ok = emqx_authz_cache:empty_authz_cache(), diff --git a/apps/emqx_gateway_stomp/src/emqx_stomp_channel.erl b/apps/emqx_gateway_stomp/src/emqx_stomp_channel.erl index 453fa9fd2..10d081e57 100644 --- a/apps/emqx_gateway_stomp/src/emqx_stomp_channel.erl +++ b/apps/emqx_gateway_stomp/src/emqx_stomp_channel.erl @@ -963,13 +963,12 @@ handle_info( NChannel = ensure_disconnected(Reason, Channel), shutdown(Reason, NChannel); handle_info( - {sock_closed, Reason}, + {sock_closed, _Reason}, Channel = #channel{conn_state = disconnected} ) -> - ?SLOG(error, #{ - msg => "unexpected_sock_closed", - reason => Reason - }), + %% This can happen as a race: + %% EMQX closes socket and marks 'disconnected' but 'tcp_closed' or 'ssl_closed' + %% is already in process mailbox {ok, Channel}; handle_info(clean_authz_cache, Channel) -> ok = emqx_authz_cache:empty_authz_cache(),