Merge pull request #11975 from zmstone/1119-fix-socket-close-race-condition

fix(emqx_channel): do not log stale sock_close event as error
This commit is contained in:
Zaiming (Stone) Shi 2023-11-21 16:47:03 +01:00 committed by GitHub
commit fa91bacdfb
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 13 additions and 7 deletions

View File

@ -1246,8 +1246,10 @@ handle_info(
{ok, Channel3} -> {ok, ?REPLY_EVENT(disconnected), Channel3}; {ok, Channel3} -> {ok, ?REPLY_EVENT(disconnected), Channel3};
Shutdown -> Shutdown Shutdown -> Shutdown
end; end;
handle_info({sock_closed, Reason}, Channel = #channel{conn_state = disconnected}) -> handle_info({sock_closed, _Reason}, Channel = #channel{conn_state = disconnected}) ->
?SLOG(error, #{msg => "unexpected_sock_close", reason => Reason}), %% This can happen as a race:
%% EMQX closes socket and marks 'disconnected' but 'tcp_closed' or 'ssl_closed'
%% is already in process mailbox
{ok, Channel}; {ok, Channel};
handle_info(clean_authz_cache, Channel) -> handle_info(clean_authz_cache, Channel) ->
ok = emqx_authz_cache:empty_authz_cache(), ok = emqx_authz_cache:empty_authz_cache(),

View File

@ -963,13 +963,12 @@ handle_info(
NChannel = ensure_disconnected(Reason, Channel), NChannel = ensure_disconnected(Reason, Channel),
shutdown(Reason, NChannel); shutdown(Reason, NChannel);
handle_info( handle_info(
{sock_closed, Reason}, {sock_closed, _Reason},
Channel = #channel{conn_state = disconnected} Channel = #channel{conn_state = disconnected}
) -> ) ->
?SLOG(error, #{ %% This can happen as a race:
msg => "unexpected_sock_closed", %% EMQX closes socket and marks 'disconnected' but 'tcp_closed' or 'ssl_closed'
reason => Reason %% is already in process mailbox
}),
{ok, Channel}; {ok, Channel};
handle_info(clean_authz_cache, Channel) -> handle_info(clean_authz_cache, Channel) ->
ok = emqx_authz_cache:empty_authz_cache(), ok = emqx_authz_cache:empty_authz_cache(),

View File

@ -0,0 +1,5 @@
Resolve redundant error logging on socket closure
Addressed a race condition causing duplicate error logs when a socket is closed by both a peer and the server.
Dual socket close events from the OS and EMQX previously led to excessive error logging.
The fix improves event handling to avoid redundant error-level logging.