Merge pull request #13076 from thalesmg/fix-ds-session-count-client-api-r57-20240520
fix(client mgmt api): cache disconnected durable session count for `/clients` api
This commit is contained in:
commit
31a35f2a15
|
@ -21,7 +21,8 @@
|
||||||
%% API
|
%% API
|
||||||
-export([
|
-export([
|
||||||
start_link/0,
|
start_link/0,
|
||||||
get_subscription_count/0
|
get_subscription_count/0,
|
||||||
|
get_disconnected_session_count/0
|
||||||
]).
|
]).
|
||||||
|
|
||||||
%% `gen_server' API
|
%% `gen_server' API
|
||||||
|
@ -39,7 +40,9 @@
|
||||||
|
|
||||||
%% call/cast/info events
|
%% call/cast/info events
|
||||||
-record(tally_subs, {}).
|
-record(tally_subs, {}).
|
||||||
|
-record(tally_disconnected_sessions, {}).
|
||||||
-record(get_subscription_count, {}).
|
-record(get_subscription_count, {}).
|
||||||
|
-record(get_disconnected_session_count, {}).
|
||||||
|
|
||||||
%%------------------------------------------------------------------------------
|
%%------------------------------------------------------------------------------
|
||||||
%% API
|
%% API
|
||||||
|
@ -59,6 +62,16 @@ get_subscription_count() ->
|
||||||
0
|
0
|
||||||
end.
|
end.
|
||||||
|
|
||||||
|
%% @doc Gets a cached view of the cluster-global count of disconnected persistent sessions.
|
||||||
|
-spec get_disconnected_session_count() -> non_neg_integer().
|
||||||
|
get_disconnected_session_count() ->
|
||||||
|
case emqx_persistent_message:is_persistence_enabled() of
|
||||||
|
true ->
|
||||||
|
gen_server:call(?MODULE, #get_disconnected_session_count{}, infinity);
|
||||||
|
false ->
|
||||||
|
0
|
||||||
|
end.
|
||||||
|
|
||||||
%%------------------------------------------------------------------------------
|
%%------------------------------------------------------------------------------
|
||||||
%% `gen_server' API
|
%% `gen_server' API
|
||||||
%%------------------------------------------------------------------------------
|
%%------------------------------------------------------------------------------
|
||||||
|
@ -66,7 +79,10 @@ get_subscription_count() ->
|
||||||
init(_Opts) ->
|
init(_Opts) ->
|
||||||
case emqx_persistent_message:is_persistence_enabled() of
|
case emqx_persistent_message:is_persistence_enabled() of
|
||||||
true ->
|
true ->
|
||||||
State = #{subs_count => 0},
|
State = #{
|
||||||
|
subs_count => 0,
|
||||||
|
disconnected_session_count => 0
|
||||||
|
},
|
||||||
{ok, State, {continue, #tally_subs{}}};
|
{ok, State, {continue, #tally_subs{}}};
|
||||||
false ->
|
false ->
|
||||||
ignore
|
ignore
|
||||||
|
@ -75,11 +91,18 @@ init(_Opts) ->
|
||||||
handle_continue(#tally_subs{}, State0) ->
|
handle_continue(#tally_subs{}, State0) ->
|
||||||
State = tally_persistent_subscriptions(State0),
|
State = tally_persistent_subscriptions(State0),
|
||||||
ensure_subs_tally_timer(),
|
ensure_subs_tally_timer(),
|
||||||
|
{noreply, State, {continue, #tally_disconnected_sessions{}}};
|
||||||
|
handle_continue(#tally_disconnected_sessions{}, State0) ->
|
||||||
|
State = tally_disconnected_persistent_sessions(State0),
|
||||||
|
ensure_disconnected_sessions_tally_timer(),
|
||||||
{noreply, State}.
|
{noreply, State}.
|
||||||
|
|
||||||
handle_call(#get_subscription_count{}, _From, State) ->
|
handle_call(#get_subscription_count{}, _From, State) ->
|
||||||
#{subs_count := N} = State,
|
#{subs_count := N} = State,
|
||||||
{reply, N, State};
|
{reply, N, State};
|
||||||
|
handle_call(#get_disconnected_session_count{}, _From, State) ->
|
||||||
|
#{disconnected_session_count := N} = State,
|
||||||
|
{reply, N, State};
|
||||||
handle_call(_Call, _From, State) ->
|
handle_call(_Call, _From, State) ->
|
||||||
{reply, {error, bad_call}, State}.
|
{reply, {error, bad_call}, State}.
|
||||||
|
|
||||||
|
@ -90,6 +113,10 @@ handle_info(#tally_subs{}, State0) ->
|
||||||
State = tally_persistent_subscriptions(State0),
|
State = tally_persistent_subscriptions(State0),
|
||||||
ensure_subs_tally_timer(),
|
ensure_subs_tally_timer(),
|
||||||
{noreply, State};
|
{noreply, State};
|
||||||
|
handle_info(#tally_disconnected_sessions{}, State0) ->
|
||||||
|
State = tally_disconnected_persistent_sessions(State0),
|
||||||
|
ensure_disconnected_sessions_tally_timer(),
|
||||||
|
{noreply, State};
|
||||||
handle_info(_Info, State) ->
|
handle_info(_Info, State) ->
|
||||||
{noreply, State}.
|
{noreply, State}.
|
||||||
|
|
||||||
|
@ -101,7 +128,38 @@ tally_persistent_subscriptions(State0) ->
|
||||||
N = emqx_persistent_session_ds_state:total_subscription_count(),
|
N = emqx_persistent_session_ds_state:total_subscription_count(),
|
||||||
State0#{subs_count := N}.
|
State0#{subs_count := N}.
|
||||||
|
|
||||||
|
tally_disconnected_persistent_sessions(State0) ->
|
||||||
|
N = do_tally_disconnected_persistent_sessions(),
|
||||||
|
State0#{disconnected_session_count := N}.
|
||||||
|
|
||||||
ensure_subs_tally_timer() ->
|
ensure_subs_tally_timer() ->
|
||||||
Timeout = emqx_config:get([durable_sessions, subscription_count_refresh_interval]),
|
Timeout = emqx_config:get([durable_sessions, subscription_count_refresh_interval]),
|
||||||
_ = erlang:send_after(Timeout, self(), #tally_subs{}),
|
_ = erlang:send_after(Timeout, self(), #tally_subs{}),
|
||||||
ok.
|
ok.
|
||||||
|
|
||||||
|
ensure_disconnected_sessions_tally_timer() ->
|
||||||
|
Timeout = emqx_config:get([durable_sessions, disconnected_session_count_refresh_interval]),
|
||||||
|
_ = erlang:send_after(Timeout, self(), #tally_disconnected_sessions{}),
|
||||||
|
ok.
|
||||||
|
|
||||||
|
do_tally_disconnected_persistent_sessions() ->
|
||||||
|
Iter = emqx_persistent_session_ds_state:make_session_iterator(),
|
||||||
|
do_tally_disconnected_persistent_sessions(Iter, 0).
|
||||||
|
|
||||||
|
do_tally_disconnected_persistent_sessions('$end_of_table', N) ->
|
||||||
|
N;
|
||||||
|
do_tally_disconnected_persistent_sessions(Iter0, N) ->
|
||||||
|
case emqx_persistent_session_ds_state:session_iterator_next(Iter0, 1) of
|
||||||
|
{[], _} ->
|
||||||
|
N;
|
||||||
|
{[{Id, _Meta}], Iter} ->
|
||||||
|
case is_live_session(Id) of
|
||||||
|
true ->
|
||||||
|
do_tally_disconnected_persistent_sessions(Iter, N);
|
||||||
|
false ->
|
||||||
|
do_tally_disconnected_persistent_sessions(Iter, N + 1)
|
||||||
|
end
|
||||||
|
end.
|
||||||
|
|
||||||
|
is_live_session(Id) ->
|
||||||
|
[] =/= emqx_cm_registry:lookup_channels(Id).
|
||||||
|
|
|
@ -1719,6 +1719,14 @@ fields("durable_sessions") ->
|
||||||
importance => ?IMPORTANCE_HIDDEN
|
importance => ?IMPORTANCE_HIDDEN
|
||||||
}
|
}
|
||||||
)},
|
)},
|
||||||
|
{"disconnected_session_count_refresh_interval",
|
||||||
|
sc(
|
||||||
|
timeout_duration(),
|
||||||
|
#{
|
||||||
|
default => <<"5s">>,
|
||||||
|
importance => ?IMPORTANCE_HIDDEN
|
||||||
|
}
|
||||||
|
)},
|
||||||
{"message_retention_period",
|
{"message_retention_period",
|
||||||
sc(
|
sc(
|
||||||
timeout_duration(),
|
timeout_duration(),
|
||||||
|
|
|
@ -476,6 +476,7 @@ zone_global_defaults() ->
|
||||||
renew_streams_interval => 5000,
|
renew_streams_interval => 5000,
|
||||||
session_gc_batch_size => 100,
|
session_gc_batch_size => 100,
|
||||||
session_gc_interval => 600000,
|
session_gc_interval => 600000,
|
||||||
subscription_count_refresh_interval => 5000
|
subscription_count_refresh_interval => 5000,
|
||||||
|
disconnected_session_count_refresh_interval => 5000
|
||||||
}
|
}
|
||||||
}.
|
}.
|
||||||
|
|
|
@ -72,6 +72,7 @@
|
||||||
]).
|
]).
|
||||||
|
|
||||||
-define(GAUGE_SAMPLER_LIST, [
|
-define(GAUGE_SAMPLER_LIST, [
|
||||||
|
disconnected_durable_sessions,
|
||||||
durable_subscriptions,
|
durable_subscriptions,
|
||||||
subscriptions,
|
subscriptions,
|
||||||
topics,
|
topics,
|
||||||
|
|
|
@ -262,6 +262,8 @@ merge_cluster_rate(Node, Cluster) ->
|
||||||
Fun =
|
Fun =
|
||||||
fun
|
fun
|
||||||
%% cluster-synced values
|
%% cluster-synced values
|
||||||
|
(disconnected_durable_sessions, V, NCluster) ->
|
||||||
|
NCluster#{disconnected_durable_sessions => V};
|
||||||
(durable_subscriptions, V, NCluster) ->
|
(durable_subscriptions, V, NCluster) ->
|
||||||
NCluster#{durable_subscriptions => V};
|
NCluster#{durable_subscriptions => V};
|
||||||
(topics, V, NCluster) ->
|
(topics, V, NCluster) ->
|
||||||
|
@ -417,22 +419,40 @@ getstats(Key) ->
|
||||||
_:_ -> 0
|
_:_ -> 0
|
||||||
end.
|
end.
|
||||||
|
|
||||||
stats(connections) -> emqx_stats:getstat('connections.count');
|
stats(connections) ->
|
||||||
stats(durable_subscriptions) -> emqx_stats:getstat('durable_subscriptions.count');
|
emqx_stats:getstat('connections.count');
|
||||||
stats(live_connections) -> emqx_stats:getstat('live_connections.count');
|
stats(disconnected_durable_sessions) ->
|
||||||
stats(cluster_sessions) -> emqx_stats:getstat('cluster_sessions.count');
|
emqx_persistent_session_bookkeeper:get_disconnected_session_count();
|
||||||
stats(topics) -> emqx_stats:getstat('topics.count');
|
stats(durable_subscriptions) ->
|
||||||
stats(subscriptions) -> emqx_stats:getstat('subscriptions.count');
|
emqx_stats:getstat('durable_subscriptions.count');
|
||||||
stats(shared_subscriptions) -> emqx_stats:getstat('subscriptions.shared.count');
|
stats(live_connections) ->
|
||||||
stats(retained_msg_count) -> emqx_stats:getstat('retained.count');
|
emqx_stats:getstat('live_connections.count');
|
||||||
stats(received) -> emqx_metrics:val('messages.received');
|
stats(cluster_sessions) ->
|
||||||
stats(received_bytes) -> emqx_metrics:val('bytes.received');
|
emqx_stats:getstat('cluster_sessions.count');
|
||||||
stats(sent) -> emqx_metrics:val('messages.sent');
|
stats(topics) ->
|
||||||
stats(sent_bytes) -> emqx_metrics:val('bytes.sent');
|
emqx_stats:getstat('topics.count');
|
||||||
stats(validation_succeeded) -> emqx_metrics:val('messages.validation_succeeded');
|
stats(subscriptions) ->
|
||||||
stats(validation_failed) -> emqx_metrics:val('messages.validation_failed');
|
emqx_stats:getstat('subscriptions.count');
|
||||||
stats(dropped) -> emqx_metrics:val('messages.dropped');
|
stats(shared_subscriptions) ->
|
||||||
stats(persisted) -> emqx_metrics:val('messages.persisted').
|
emqx_stats:getstat('subscriptions.shared.count');
|
||||||
|
stats(retained_msg_count) ->
|
||||||
|
emqx_stats:getstat('retained.count');
|
||||||
|
stats(received) ->
|
||||||
|
emqx_metrics:val('messages.received');
|
||||||
|
stats(received_bytes) ->
|
||||||
|
emqx_metrics:val('bytes.received');
|
||||||
|
stats(sent) ->
|
||||||
|
emqx_metrics:val('messages.sent');
|
||||||
|
stats(sent_bytes) ->
|
||||||
|
emqx_metrics:val('bytes.sent');
|
||||||
|
stats(validation_succeeded) ->
|
||||||
|
emqx_metrics:val('messages.validation_succeeded');
|
||||||
|
stats(validation_failed) ->
|
||||||
|
emqx_metrics:val('messages.validation_failed');
|
||||||
|
stats(dropped) ->
|
||||||
|
emqx_metrics:val('messages.dropped');
|
||||||
|
stats(persisted) ->
|
||||||
|
emqx_metrics:val('messages.persisted').
|
||||||
|
|
||||||
%% -------------------------------------------------------------------------------------------------
|
%% -------------------------------------------------------------------------------------------------
|
||||||
%% Retained && License Quota
|
%% Retained && License Quota
|
||||||
|
|
|
@ -194,6 +194,8 @@ swagger_desc(validation_failed) ->
|
||||||
swagger_desc_format("Schema validations failed ");
|
swagger_desc_format("Schema validations failed ");
|
||||||
swagger_desc(persisted) ->
|
swagger_desc(persisted) ->
|
||||||
swagger_desc_format("Messages saved to the durable storage ");
|
swagger_desc_format("Messages saved to the durable storage ");
|
||||||
|
swagger_desc(disconnected_durable_sessions) ->
|
||||||
|
<<"Disconnected durable sessions at the time of sampling.", ?APPROXIMATE_DESC>>;
|
||||||
swagger_desc(durable_subscriptions) ->
|
swagger_desc(durable_subscriptions) ->
|
||||||
<<"Subscriptions from durable sessions at the time of sampling.", ?APPROXIMATE_DESC>>;
|
<<"Subscriptions from durable sessions at the time of sampling.", ?APPROXIMATE_DESC>>;
|
||||||
swagger_desc(subscriptions) ->
|
swagger_desc(subscriptions) ->
|
||||||
|
|
|
@ -341,6 +341,8 @@ t_persistent_session_stats(_Config) ->
|
||||||
?retry(1_000, 10, begin
|
?retry(1_000, 10, begin
|
||||||
?assertMatch(
|
?assertMatch(
|
||||||
{ok, #{
|
{ok, #{
|
||||||
|
<<"connections">> := 2,
|
||||||
|
<<"disconnected_durable_sessions">> := 0,
|
||||||
%% N.B.: we currently don't perform any deduplication between persistent
|
%% N.B.: we currently don't perform any deduplication between persistent
|
||||||
%% and non-persistent routes, so we count `commont/topic' twice and get 8
|
%% and non-persistent routes, so we count `commont/topic' twice and get 8
|
||||||
%% instead of 6 here.
|
%% instead of 6 here.
|
||||||
|
@ -356,6 +358,29 @@ t_persistent_session_stats(_Config) ->
|
||||||
?assert(PSRouteCount > 0, #{ps_route_count => PSRouteCount}),
|
?assert(PSRouteCount > 0, #{ps_route_count => PSRouteCount}),
|
||||||
PSSubCount = emqx_persistent_session_bookkeeper:get_subscription_count(),
|
PSSubCount = emqx_persistent_session_bookkeeper:get_subscription_count(),
|
||||||
?assert(PSSubCount > 0, #{ps_sub_count => PSSubCount}),
|
?assert(PSSubCount > 0, #{ps_sub_count => PSSubCount}),
|
||||||
|
|
||||||
|
%% Now with disconnected but alive persistent sessions
|
||||||
|
{ok, {ok, _}} =
|
||||||
|
?wait_async_action(
|
||||||
|
emqtt:disconnect(PSClient),
|
||||||
|
#{?snk_kind := dashboard_monitor_flushed}
|
||||||
|
),
|
||||||
|
?retry(1_000, 10, begin
|
||||||
|
?assertMatch(
|
||||||
|
{ok, #{
|
||||||
|
<<"connections">> := 1,
|
||||||
|
<<"disconnected_durable_sessions">> := 1,
|
||||||
|
%% N.B.: we currently don't perform any deduplication between persistent
|
||||||
|
%% and non-persistent routes, so we count `commont/topic' twice and get 8
|
||||||
|
%% instead of 6 here.
|
||||||
|
<<"topics">> := 8,
|
||||||
|
<<"durable_subscriptions">> := 4,
|
||||||
|
<<"subscriptions">> := 4
|
||||||
|
}},
|
||||||
|
request(["monitor_current"])
|
||||||
|
)
|
||||||
|
end),
|
||||||
|
|
||||||
ok.
|
ok.
|
||||||
|
|
||||||
request(Path) ->
|
request(Path) ->
|
||||||
|
|
|
@ -1384,7 +1384,6 @@ do_list_clients_cluster_query(
|
||||||
{Rows, QueryState1 = #{complete := Complete0}} ->
|
{Rows, QueryState1 = #{complete := Complete0}} ->
|
||||||
case emqx_mgmt_api:accumulate_query_rows(Node, Rows, QueryState1, ResultAcc) of
|
case emqx_mgmt_api:accumulate_query_rows(Node, Rows, QueryState1, ResultAcc) of
|
||||||
{enough, NResultAcc} ->
|
{enough, NResultAcc} ->
|
||||||
%% TODO: add persistent session count?
|
|
||||||
%% TODO: this may return `{error, _, _}'...
|
%% TODO: this may return `{error, _, _}'...
|
||||||
QueryState2 = emqx_mgmt_api:maybe_collect_total_from_tail_nodes(
|
QueryState2 = emqx_mgmt_api:maybe_collect_total_from_tail_nodes(
|
||||||
Tail, QueryState1
|
Tail, QueryState1
|
||||||
|
@ -1428,8 +1427,9 @@ add_persistent_session_count(QueryState0 = #{total := Totals0}) ->
|
||||||
%% to traverse the whole table), but also hard to deduplicate live connections
|
%% to traverse the whole table), but also hard to deduplicate live connections
|
||||||
%% from it... So this count will possibly overshoot the true count of
|
%% from it... So this count will possibly overshoot the true count of
|
||||||
%% sessions.
|
%% sessions.
|
||||||
SessionCount = persistent_session_count(),
|
DisconnectedSessionCount =
|
||||||
Totals = Totals0#{undefined => SessionCount},
|
emqx_persistent_session_bookkeeper:get_disconnected_session_count(),
|
||||||
|
Totals = Totals0#{undefined => DisconnectedSessionCount},
|
||||||
QueryState0#{total := Totals};
|
QueryState0#{total := Totals};
|
||||||
false ->
|
false ->
|
||||||
QueryState0
|
QueryState0
|
||||||
|
@ -1477,27 +1477,6 @@ no_persistent_sessions() ->
|
||||||
true
|
true
|
||||||
end.
|
end.
|
||||||
|
|
||||||
-spec persistent_session_count() -> non_neg_integer().
|
|
||||||
persistent_session_count() ->
|
|
||||||
%% N.B.: this is potentially costly. Should not be called in hot paths.
|
|
||||||
%% `mnesia:table_info(_, size)' is always zero for rocksdb, so we need to traverse...
|
|
||||||
do_persistent_session_count(init_persistent_session_iterator(), 0).
|
|
||||||
|
|
||||||
do_persistent_session_count('$end_of_table', N) ->
|
|
||||||
N;
|
|
||||||
do_persistent_session_count(Cursor, N) ->
|
|
||||||
case emqx_persistent_session_ds_state:session_iterator_next(Cursor, 1) of
|
|
||||||
{[], _} ->
|
|
||||||
N;
|
|
||||||
{[{_Id, Meta}], NextCursor} ->
|
|
||||||
case is_expired(Meta) of
|
|
||||||
true ->
|
|
||||||
do_persistent_session_count(NextCursor, N);
|
|
||||||
false ->
|
|
||||||
do_persistent_session_count(NextCursor, N + 1)
|
|
||||||
end
|
|
||||||
end.
|
|
||||||
|
|
||||||
is_expired(#{last_alive_at := LastAliveAt, expiry_interval := ExpiryInterval}) ->
|
is_expired(#{last_alive_at := LastAliveAt, expiry_interval := ExpiryInterval}) ->
|
||||||
LastAliveAt + ExpiryInterval < erlang:system_time(millisecond).
|
LastAliveAt + ExpiryInterval < erlang:system_time(millisecond).
|
||||||
|
|
||||||
|
|
|
@ -79,7 +79,9 @@ end_per_suite(Config) ->
|
||||||
|
|
||||||
init_per_group(persistent_sessions, Config) ->
|
init_per_group(persistent_sessions, Config) ->
|
||||||
AppSpecs = [
|
AppSpecs = [
|
||||||
{emqx, "durable_sessions.enable = true"},
|
{emqx,
|
||||||
|
"durable_sessions.enable = true\n"
|
||||||
|
"durable_sessions.disconnected_session_count_refresh_interval = 100ms"},
|
||||||
emqx_management
|
emqx_management
|
||||||
],
|
],
|
||||||
Dashboard = emqx_mgmt_api_test_util:emqx_dashboard(
|
Dashboard = emqx_mgmt_api_test_util:emqx_dashboard(
|
||||||
|
@ -457,9 +459,7 @@ t_persistent_sessions5(Config) ->
|
||||||
{{_, 200, _}, _, #{
|
{{_, 200, _}, _, #{
|
||||||
<<"data">> := [_, _, _],
|
<<"data">> := [_, _, _],
|
||||||
<<"meta">> := #{
|
<<"meta">> := #{
|
||||||
%% TODO: if/when we fix the persistent session count, this
|
<<"count">> := 4,
|
||||||
%% should be 4.
|
|
||||||
<<"count">> := 6,
|
|
||||||
<<"hasnext">> := true
|
<<"hasnext">> := true
|
||||||
}
|
}
|
||||||
}}},
|
}}},
|
||||||
|
@ -470,9 +470,7 @@ t_persistent_sessions5(Config) ->
|
||||||
{{_, 200, _}, _, #{
|
{{_, 200, _}, _, #{
|
||||||
<<"data">> := [_],
|
<<"data">> := [_],
|
||||||
<<"meta">> := #{
|
<<"meta">> := #{
|
||||||
%% TODO: if/when we fix the persistent session count, this
|
<<"count">> := 4,
|
||||||
%% should be 4.
|
|
||||||
<<"count">> := 6,
|
|
||||||
<<"hasnext">> := false
|
<<"hasnext">> := false
|
||||||
}
|
}
|
||||||
}}},
|
}}},
|
||||||
|
@ -489,9 +487,7 @@ t_persistent_sessions5(Config) ->
|
||||||
{{_, 200, _}, _, #{
|
{{_, 200, _}, _, #{
|
||||||
<<"data">> := [_, _],
|
<<"data">> := [_, _],
|
||||||
<<"meta">> := #{
|
<<"meta">> := #{
|
||||||
%% TODO: if/when we fix the persistent session count, this
|
<<"count">> := 4,
|
||||||
%% should be 4.
|
|
||||||
<<"count">> := 6,
|
|
||||||
<<"hasnext">> := true
|
<<"hasnext">> := true
|
||||||
}
|
}
|
||||||
}}},
|
}}},
|
||||||
|
@ -1996,7 +1992,11 @@ assert_single_client(Opts) ->
|
||||||
100,
|
100,
|
||||||
20,
|
20,
|
||||||
?assertMatch(
|
?assertMatch(
|
||||||
{ok, {{_, 200, _}, _, #{<<"data">> := [#{<<"connected">> := IsConnected}]}}},
|
{ok,
|
||||||
|
{{_, 200, _}, _, #{
|
||||||
|
<<"data">> := [#{<<"connected">> := IsConnected}],
|
||||||
|
<<"meta">> := #{<<"count">> := 1}
|
||||||
|
}}},
|
||||||
list_request(APIPort)
|
list_request(APIPort)
|
||||||
)
|
)
|
||||||
),
|
),
|
||||||
|
|
Loading…
Reference in New Issue