feat: enable log throttling for potentially flooding log events

This commit is contained in:
Serge Tupchii 2024-02-19 21:30:25 +02:00
parent 98ba300f7c
commit 9bd0d1ba1b
9 changed files with 80 additions and 39 deletions

View File

@ -40,8 +40,8 @@
end
).
%% NOTE: do not forget to add every used msg to the default value of
%% `log.thorttling.msgs` list.
%% NOTE: do not forget to use atom for msg and add every used msg to
%% the default value of `log.thorttling.msgs` list.
-define(SLOG_THROTTLE(Level, Data),
?SLOG_THROTTLE(Level, Data, #{})
).

View File

@ -183,8 +183,13 @@ log_result(#{username := Username}, Topic, Action, From, Result) ->
}
end,
case Result of
allow -> ?SLOG(info, (LogMeta())#{msg => "authorization_permission_allowed"});
deny -> ?SLOG(info, (LogMeta())#{msg => "authorization_permission_denied"})
allow ->
?SLOG(info, (LogMeta())#{msg => "authorization_permission_allowed"});
deny ->
?SLOG_THROTTLE(
warning,
(LogMeta())#{msg => authorization_permission_denied}
)
end.
%% @private Format authorization rules source.

View File

@ -616,10 +616,10 @@ process_publish(Packet = ?PUBLISH_PACKET(QoS, Topic, PacketId), Channel) ->
Msg = packet_to_message(NPacket, NChannel),
do_publish(PacketId, Msg, NChannel);
{error, Rc = ?RC_NOT_AUTHORIZED, NChannel} ->
?SLOG(
info,
?SLOG_THROTTLE(
warning,
#{
msg => "cannot_publish_to_topic",
msg => cannot_publish_to_topic_due_to_not_authorized,
reason => emqx_reason_codes:name(Rc)
},
#{topic => Topic}
@ -635,10 +635,10 @@ process_publish(Packet = ?PUBLISH_PACKET(QoS, Topic, PacketId), Channel) ->
handle_out(disconnect, Rc, NChannel)
end;
{error, Rc = ?RC_QUOTA_EXCEEDED, NChannel} ->
?SLOG(
info,
?SLOG_THROTTLE(
warning,
#{
msg => "cannot_publish_to_topic",
msg => cannot_publish_to_topic_due_to_quota_exceeded,
reason => emqx_reason_codes:name(Rc)
},
#{topic => Topic}

View File

@ -50,10 +50,10 @@
-define(MSGS_LIST, emqx:get_config([log, throttling, msgs], [])).
-define(TIME_WINDOW_MS, timer:seconds(emqx:get_config([log, throttling, time_window], 60))).
-spec allow(logger:level(), string()) -> boolean().
-spec allow(logger:level(), atom()) -> boolean().
allow(debug, _Msg) ->
true;
allow(_Level, Msg) ->
allow(_Level, Msg) when is_atom(Msg) ->
Seq = persistent_term:get(?SEQ_ID(Msg), undefined),
case Seq of
undefined ->
@ -79,8 +79,9 @@ start_link() ->
init([]) ->
ok = lists:foreach(fun(Msg) -> ?NEW_THROTTLE(Msg, ?NEW_SEQ) end, ?MSGS_LIST),
TimerRef = schedule_refresh(?TIME_WINDOW_MS),
{ok, #{timer_ref => TimerRef}}.
CurrentPeriodMs = ?TIME_WINDOW_MS,
TimerRef = schedule_refresh(CurrentPeriodMs),
{ok, #{timer_ref => TimerRef, current_period_ms => CurrentPeriodMs}}.
handle_call(Req, _From, State) ->
?SLOG(error, #{msg => "unexpected_call", call => Req}),
@ -90,8 +91,7 @@ handle_cast(Msg, State) ->
?SLOG(error, #{msg => "unexpected_cast", cast => Msg}),
{noreply, State}.
handle_info(refresh, State) ->
PeriodMs = ?TIME_WINDOW_MS,
handle_info(refresh, #{current_period_ms := PeriodMs} = State) ->
Msgs = ?MSGS_LIST,
DroppedStats = lists:foldl(
fun(Msg, Acc) ->
@ -112,7 +112,11 @@ handle_info(refresh, State) ->
Msgs
),
maybe_log_dropped(DroppedStats, PeriodMs),
State1 = State#{timer_ref => schedule_refresh(PeriodMs)},
NewPeriodMs = ?TIME_WINDOW_MS,
State1 = State#{
timer_ref => schedule_refresh(NewPeriodMs),
current_period_ms => NewPeriodMs
},
{noreply, State1};
handle_info(Info, State) ->
?SLOG(error, #{msg => "unxpected_info", info => Info}),
@ -143,4 +147,5 @@ maybe_log_dropped(_DroppedStats, _PeriodMs) ->
ok.
schedule_refresh(PeriodMs) ->
?tp(log_throttler_sched_refresh, #{new_period_ms => PeriodMs}),
erlang:send_after(PeriodMs, ?MODULE, refresh).

View File

@ -62,10 +62,10 @@ handle_event(ClientInfo, {dropped, Msg, #{reason := queue_full, logctx := Ctx}})
ok = emqx_metrics:inc('delivery.dropped.queue_full'),
ok = inc_pd('send_msg.dropped', 1),
ok = inc_pd('send_msg.dropped.queue_full', 1),
?SLOG(
info,
?SLOG_THROTTLE(
warning,
Ctx#{
msg => "dropped_msg_due_to_mqueue_is_full",
msg => dropped_msg_due_to_mqueue_is_full,
payload => Msg#message.payload
},
#{topic => Msg#message.topic}

View File

@ -23,8 +23,10 @@
-include_lib("common_test/include/ct.hrl").
-include_lib("snabbkaffe/include/snabbkaffe.hrl").
-define(THROTTLE_MSG, "test_throttle_msg").
-define(THROTTLE_MSG1, "test_throttle_msg1").
%% Have to use real msgs, as the schema is guarded by enum.
-define(THROTTLE_MSG, authorization_permission_denied).
-define(THROTTLE_MSG1, cannot_publish_to_topic_due_to_not_authorized).
-define(TIME_WINDOW, <<"1s">>).
all() -> emqx_common_test_helpers:all(?MODULE).
@ -39,7 +41,7 @@ init_per_suite(Config) ->
#{
log => #{
throttling => #{
time_window => <<"1s">>, msgs => [?THROTTLE_MSG]
time_window => ?TIME_WINDOW, msgs => [?THROTTLE_MSG]
}
}
}
@ -70,6 +72,10 @@ end_per_testcase(t_throttle_add_new_msg, _Config) ->
ok = snabbkaffe:stop(),
{ok, _} = emqx_conf:update([log, throttling, msgs], [?THROTTLE_MSG], #{}),
ok;
end_per_testcase(t_update_time_window, _Config) ->
ok = snabbkaffe:stop(),
{ok, _} = emqx_conf:update([log, throttling, time_window], ?TIME_WINDOW, #{}),
ok;
end_per_testcase(_TC, _Config) ->
ok = snabbkaffe:stop().
@ -87,7 +93,7 @@ t_throttle(_Config) ->
lists:seq(1, 100)
),
{ok, _} = ?block_until(
#{?snk_kind := log_throttler_dropped, throttled_msg := ?THROTTLE_MSG}, 3000
#{?snk_kind := log_throttler_dropped, throttled_msg := ?THROTTLE_MSG}, 5000
),
?assert(emqx_log_throttler:allow(warning, ?THROTTLE_MSG)),
@ -110,7 +116,7 @@ t_throttle_add_new_msg(_Config) ->
?check_trace(
begin
?block_until(
#{?snk_kind := log_throttler_new_msg, throttled_msg := ?THROTTLE_MSG1}, 3000
#{?snk_kind := log_throttler_new_msg, throttled_msg := ?THROTTLE_MSG1}, 5000
),
?assert(emqx_log_throttler:allow(warning, ?THROTTLE_MSG1)),
?assertNot(emqx_log_throttler:allow(warning, ?THROTTLE_MSG1)),
@ -128,11 +134,25 @@ t_throttle_add_new_msg(_Config) ->
t_throttle_no_msg(_Config) ->
%% Must simply pass with no crashes
?assert(emqx_log_throttler:allow(warning, "no_test_throttle_msg")),
?assert(emqx_log_throttler:allow(warning, "no_test_throttle_msg")),
?assert(emqx_log_throttler:allow(warning, no_test_throttle_msg)),
?assert(emqx_log_throttler:allow(warning, no_test_throttle_msg)),
timer:sleep(10),
?assert(erlang:is_process_alive(erlang:whereis(emqx_log_throttler))).
t_update_time_window(_Config) ->
?check_trace(
begin
?wait_async_action(
emqx_conf:update([log, throttling, time_window], <<"2s">>, #{}),
#{?snk_kind := log_throttler_sched_refresh, new_period_ms := 2000},
5000
),
timer:sleep(10),
?assert(erlang:is_process_alive(erlang:whereis(emqx_log_throttler)))
end,
[]
).
%%--------------------------------------------------------------------
%% internal functions
%%--------------------------------------------------------------------

View File

@ -75,6 +75,14 @@
%% 1 million default ports counter
-define(DEFAULT_MAX_PORTS, 1024 * 1024).
-define(LOG_THROTTLING_MSGS, [
authorization_permission_denied,
cannot_publish_to_topic_due_to_not_authorized,
cannot_publish_to_topic_due_to_quota_exceeded,
connection_rejected_due_to_license_limit_reached,
dropped_msg_due_to_mqueue_is_full
]).
%% Callback to upgrade config after loaded from config file but before validation.
upgrade_raw_conf(Raw0) ->
Raw1 = emqx_connector_schema:transform_bridges_v1_to_connectors_and_bridges_v2(Raw0),
@ -910,7 +918,7 @@ fields("log") ->
importance => ?IMPORTANCE_HIGH
}
)},
{"throttling",
{throttling,
sc(?R_REF("log_throttling"), #{
desc => ?DESC("log_throttling"),
importance => ?IMPORTANCE_MEDIUM
@ -1019,22 +1027,22 @@ fields("log_burst_limit") ->
];
fields("log_throttling") ->
[
{"window_time",
{time_window,
sc(
emqx_schema:duration_s(),
#{
default => <<"1m">>,
desc => ?DESC("log_throttling_window_time"),
desc => ?DESC("log_throttling_time_window"),
importance => ?IMPORTANCE_MEDIUM
}
)},
%% A static list of event ids used in ?SLOG_THROTTLE/3,4 macro.
%% A static list of msgs used in ?SLOG_THROTTLE/2,3 macro.
%% For internal (developer) use only.
{"event_ids",
{msgs,
sc(
hoconsc:array(atom()),
hoconsc:array(hoconsc:enum(?LOG_THROTTLING_MSGS)),
#{
default => [],
default => ?LOG_THROTTLING_MSGS,
importance => ?IMPORTANCE_HIDDEN
}
)}

View File

@ -85,7 +85,10 @@ check(_ConnInfo, AckProps) ->
{ok, #{max_connections := MaxClients}} ->
case check_max_clients_exceeded(MaxClients) of
true ->
?SLOG(info, #{msg => "connection_rejected_due_to_license_limit_reached"}),
?SLOG_THROTTLE(
error,
#{msg => connection_rejected_due_to_license_limit_reached}
),
{stop, {error, ?RC_QUOTA_EXCEEDED}};
false ->
{ok, AckProps}

View File

@ -482,11 +482,11 @@ desc_log_throttling.desc:
"""Log throttling feature reduces the number of potentially flooding logged events by
dropping all but the first event within a configured time window."""
log_throttling_window_time.desc:
"""A time interval at which log throttling is applied. Defaults to 1 minute."""
log_throttling_time_window.desc:
"""For throttled messages, only log 1 in each time window."""
log_throttling_window_time.label:
"""Log Throttling Window Time"""
log_throttling_time_window.label:
"""Log Throttling Time Window"""
cluster_dns_record_type.desc:
"""DNS record type."""