From 0b1f0db73c1ca6b065875de70c15493e78c5586e Mon Sep 17 00:00:00 2001 From: Thales Macedo Garitezi Date: Mon, 15 Jul 2024 17:54:21 -0300 Subject: [PATCH 01/19] chore(cluster link): refactor HTTP API for CRUD operations Fixes https://emqx.atlassian.net/browse/EMQX-12627 --- .../src/emqx_cluster_link_api.erl | 168 +++++++++++++++--- .../src/emqx_cluster_link_config.erl | 102 ++++++++++- .../src/emqx_cluster_link_schema.erl | 5 +- .../test/emqx_cluster_link_api_SUITE.erl | 144 ++++++++++----- .../test/emqx_mgmt_api_test_util.erl | 27 +++ apps/emqx_utils/include/emqx_utils_api.hrl | 2 + 6 files changed, 380 insertions(+), 68 deletions(-) diff --git a/apps/emqx_cluster_link/src/emqx_cluster_link_api.erl b/apps/emqx_cluster_link/src/emqx_cluster_link_api.erl index 33634607e..324d6dd68 100644 --- a/apps/emqx_cluster_link/src/emqx_cluster_link_api.erl +++ b/apps/emqx_cluster_link/src/emqx_cluster_link_api.erl @@ -7,6 +7,7 @@ -include_lib("hocon/include/hoconsc.hrl"). -include_lib("emqx/include/http_api.hrl"). +-include_lib("emqx_utils/include/emqx_utils_api.hrl"). -export([ api_spec/0, @@ -14,7 +15,10 @@ schema/1 ]). --export([config/2]). +-export([ + '/cluster/links'/2, + '/cluster/links/:name'/2 +]). -define(CONF_PATH, [cluster, links]). -define(TAGS, [<<"Cluster">>]). @@ -24,12 +28,13 @@ api_spec() -> paths() -> [ - "/cluster/links" + "/cluster/links", + "/cluster/links/:name" ]. schema("/cluster/links") -> #{ - 'operationId' => config, + 'operationId' => '/cluster/links', get => #{ description => "Get cluster links configuration", @@ -37,14 +42,63 @@ schema("/cluster/links") -> responses => #{200 => links_config_schema()} }, - put => + post => #{ - description => "Update cluster links configuration", + description => "Create a cluster link configuration", tags => ?TAGS, - 'requestBody' => links_config_schema(), + 'requestBody' => link_config_schema(), responses => #{ - 200 => links_config_schema(), + 200 => link_config_schema(), + 400 => + emqx_dashboard_swagger:error_codes( + [?BAD_REQUEST, ?ALREADY_EXISTS], + <<"Update Config Failed">> + ) + } + } + }; +schema("/cluster/links/:name") -> + #{ + 'operationId' => '/cluster/links/:name', + get => + #{ + description => "Get a cluster link configuration", + tags => ?TAGS, + parameters => [param_path_name()], + responses => + #{ + 200 => link_config_schema(), + 404 => emqx_dashboard_swagger:error_codes( + [?NOT_FOUND], <<"Cluster link not found">> + ) + } + }, + delete => + #{ + description => "Delete a cluster link configuration", + tags => ?TAGS, + parameters => [param_path_name()], + responses => + #{ + 204 => <<"Link deleted">>, + 404 => emqx_dashboard_swagger:error_codes( + [?NOT_FOUND], <<"Cluster link not found">> + ) + } + }, + put => + #{ + description => "Update a cluster link configuration", + tags => ?TAGS, + parameters => [param_path_name()], + 'requestBody' => update_link_config_schema(), + responses => + #{ + 200 => link_config_schema(), + 404 => emqx_dashboard_swagger:error_codes( + [?NOT_FOUND], <<"Cluster link not found">> + ), 400 => emqx_dashboard_swagger:error_codes( [?BAD_REQUEST], <<"Update Config Failed">> @@ -57,28 +111,66 @@ schema("/cluster/links") -> %% API Handler funcs %%-------------------------------------------------------------------- -config(get, _Params) -> - {200, get_raw()}; -config(put, #{body := Body}) -> - case emqx_cluster_link_config:update(Body) of - {ok, NewConfig} -> - {200, NewConfig}; - {error, Reason} -> - Message = list_to_binary(io_lib:format("Update config failed ~p", [Reason])), - {400, ?BAD_REQUEST, Message} - end. +'/cluster/links'(get, _Params) -> + ?OK(get_raw()); +'/cluster/links'(post, #{body := Body = #{<<"name">> := Name}}) -> + with_link( + Name, + return(?BAD_REQUEST('ALREADY_EXISTS', <<"Cluster link already exists">>)), + fun() -> + case emqx_cluster_link_config:create(Body) of + {ok, Res} -> + ?CREATED(Res); + {error, Reason} -> + Message = list_to_binary(io_lib:format("Create link failed ~p", [Reason])), + ?BAD_REQUEST(Message) + end + end + ). + +'/cluster/links/:name'(get, #{bindings := #{name := Name}}) -> + with_link(Name, fun(Link) -> ?OK(Link) end, not_found()); +'/cluster/links/:name'(put, #{bindings := #{name := Name}, body := Params0}) -> + with_link( + Name, + fun(Link) -> + Params = Params0#{<<"name">> => Name}, + case emqx_cluster_link_config:update_one_link(Params) of + {ok, Res} -> + ?OK(Res); + {error, Reason} -> + Message = list_to_binary(io_lib:format("Update link failed ~p", [Reason])), + ?BAD_REQUEST(Message) + end + end, + not_found() + ); +'/cluster/links/:name'(delete, #{bindings := #{name := Name}}) -> + with_link( + Name, + fun() -> + case emqx_cluster_link_config:delete(Name) of + ok -> + ?NO_CONTENT; + {error, Reason} -> + Message = list_to_binary(io_lib:format("Delete link failed ~p", [Reason])), + ?BAD_REQUEST(Message) + end + end, + not_found() + ). %%-------------------------------------------------------------------- %% Internal funcs %%-------------------------------------------------------------------- get_raw() -> - #{<<"links">> := Conf} = + #{<<"cluster">> := #{<<"links">> := Links}} = emqx_config:fill_defaults( - #{<<"links">> => emqx_conf:get_raw(?CONF_PATH)}, + #{<<"cluster">> => #{<<"links">> => emqx_conf:get_raw(?CONF_PATH)}}, #{obfuscate_sensitive_values => true} ), - Conf. + Links. links_config_schema() -> emqx_cluster_link_schema:links_schema( @@ -87,6 +179,24 @@ links_config_schema() -> } ). +link_config_schema() -> + emqx_cluster_link_schema:link_schema(). + +param_path_name() -> + {name, + hoconsc:mk( + binary(), + #{ + in => path, + required => true, + example => <<"my_link">>, + desc => ?DESC("param_path_name") + } + )}. + +update_link_config_schema() -> + proplists:delete(name, emqx_cluster_link_schema:fields("link")). + links_config_example() -> [ #{ @@ -114,3 +224,21 @@ links_config_example() -> <<"name">> => <<"emqxcl_c">> } ]. + +with_link(Name, FoundFn, NotFoundFn) -> + case emqx_cluster_link_config:link_raw(Name) of + undefined -> + NotFoundFn(); + Link = #{} -> + {arity, Arity} = erlang:fun_info(FoundFn, arity), + case Arity of + 1 -> FoundFn(Link); + 0 -> FoundFn() + end + end. + +return(Response) -> + fun() -> Response end. + +not_found() -> + return(?NOT_FOUND(<<"Cluster link not found">>)). diff --git a/apps/emqx_cluster_link/src/emqx_cluster_link_config.erl b/apps/emqx_cluster_link/src/emqx_cluster_link_config.erl index f27c7702e..36655460b 100644 --- a/apps/emqx_cluster_link/src/emqx_cluster_link_config.erl +++ b/apps/emqx_cluster_link/src/emqx_cluster_link_config.erl @@ -4,6 +4,8 @@ -module(emqx_cluster_link_config). +-feature(maybe_expr, enable). + -behaviour(emqx_config_handler). -include_lib("emqx/include/logger.hrl"). @@ -28,11 +30,15 @@ -export([ %% General + create/1, + delete/1, + update_one_link/1, update/1, cluster/0, enabled_links/0, links/0, link/1, + link_raw/1, topic_filters/1, %% Connections emqtt_options/1, @@ -55,6 +61,52 @@ %% +create(LinkConfig) -> + #{<<"name">> := Name} = LinkConfig, + case + emqx_conf:update( + ?LINKS_PATH, + {create, LinkConfig}, + #{rawconf_with_defaults => true, override_to => cluster} + ) + of + {ok, #{raw_config := NewConfigRows}} -> + NewLinkConfig = find_link(Name, NewConfigRows), + {ok, NewLinkConfig}; + {error, Reason} -> + {error, Reason} + end. + +delete(Name) -> + case + emqx_conf:update( + ?LINKS_PATH, + {delete, Name}, + #{rawconf_with_defaults => true, override_to => cluster} + ) + of + {ok, _} -> + ok; + {error, Reason} -> + {error, Reason} + end. + +update_one_link(LinkConfig) -> + #{<<"name">> := Name} = LinkConfig, + case + emqx_conf:update( + ?LINKS_PATH, + {update, LinkConfig}, + #{rawconf_with_defaults => true, override_to => cluster} + ) + of + {ok, #{raw_config := NewConfigRows}} -> + NewLinkConfig = find_link(Name, NewConfigRows), + {ok, NewLinkConfig}; + {error, Reason} -> + {error, Reason} + end. + update(Config) -> case emqx_conf:update( @@ -75,11 +127,20 @@ cluster() -> links() -> emqx:get_config(?LINKS_PATH, []). +links_raw() -> + emqx:get_raw_config(?LINKS_PATH, []). + enabled_links() -> [L || L = #{enable := true} <- links()]. link(Name) -> - case lists:dropwhile(fun(L) -> Name =/= upstream_name(L) end, links()) of + find_link(Name, links()). + +link_raw(Name) -> + find_link(Name, links_raw()). + +find_link(Name, Links) -> + case lists:dropwhile(fun(L) -> Name =/= upstream_name(L) end, Links) of [LinkConf | _] -> LinkConf; [] -> undefined end. @@ -133,6 +194,37 @@ remove_handler() -> pre_config_update(?LINKS_PATH, RawConf, RawConf) -> {ok, RawConf}; +pre_config_update(?LINKS_PATH, {create, LinkRawConf}, OldRawConf) -> + #{<<"name">> := Name} = LinkRawConf, + maybe + undefined ?= find_link(Name, OldRawConf), + NewRawConf0 = OldRawConf ++ [LinkRawConf], + NewRawConf = convert_certs(maybe_increment_ps_actor_incr(NewRawConf0, OldRawConf)), + {ok, NewRawConf} + else + _ -> + {error, already_exists} + end; +pre_config_update(?LINKS_PATH, {update, LinkRawConf}, OldRawConf) -> + #{<<"name">> := Name} = LinkRawConf, + maybe + {ok, {_Found, Front, Rear}} = safe_take(Name, OldRawConf), + NewRawConf0 = Front ++ [LinkRawConf] ++ Rear, + NewRawConf = convert_certs(maybe_increment_ps_actor_incr(NewRawConf0, OldRawConf)), + {ok, NewRawConf} + else + not_found -> + {error, not_found} + end; +pre_config_update(?LINKS_PATH, {delete, Name}, OldRawConf) -> + maybe + {ok, {_Found, Front, Rear}} = safe_take(Name, OldRawConf), + NewRawConf = Front ++ Rear, + {ok, NewRawConf} + else + _ -> + {error, not_found} + end; pre_config_update(?LINKS_PATH, NewRawConf, OldRawConf) -> {ok, convert_certs(maybe_increment_ps_actor_incr(NewRawConf, OldRawConf))}. @@ -320,3 +412,11 @@ do_convert_certs(LinkName, SSLOpts) -> ), throw({bad_ssl_config, Reason}) end. + +safe_take(Name, Transformations) -> + case lists:splitwith(fun(#{<<"name">> := N}) -> N =/= Name end, Transformations) of + {_Front, []} -> + not_found; + {Front, [Found | Rear]} -> + {ok, {Found, Front, Rear}} + end. diff --git a/apps/emqx_cluster_link/src/emqx_cluster_link_schema.erl b/apps/emqx_cluster_link/src/emqx_cluster_link_schema.erl index f46249a4f..a6073d677 100644 --- a/apps/emqx_cluster_link/src/emqx_cluster_link_schema.erl +++ b/apps/emqx_cluster_link/src/emqx_cluster_link_schema.erl @@ -12,7 +12,7 @@ -export([injected_fields/0]). %% Used in emqx_cluster_link_api --export([links_schema/1]). +-export([links_schema/1, link_schema/0]). -export([ roots/0, @@ -37,6 +37,9 @@ links_schema(Meta) -> default => [], validator => fun links_validator/1, desc => ?DESC("links") }). +link_schema() -> + hoconsc:ref(?MODULE, "link"). + fields("link") -> [ {enable, ?HOCON(boolean(), #{default => true, desc => ?DESC(enable)})}, diff --git a/apps/emqx_cluster_link/test/emqx_cluster_link_api_SUITE.erl b/apps/emqx_cluster_link/test/emqx_cluster_link_api_SUITE.erl index c5ec8da6c..5bb1c377a 100644 --- a/apps/emqx_cluster_link/test/emqx_cluster_link_api_SUITE.erl +++ b/apps/emqx_cluster_link/test/emqx_cluster_link_api_SUITE.erl @@ -37,6 +37,10 @@ "-----END CERTIFICATE-----" >>). +%%------------------------------------------------------------------------------ +%% CT boilerplate +%%------------------------------------------------------------------------------ + all() -> emqx_common_test_helpers:all(?MODULE). @@ -47,7 +51,7 @@ init_per_suite(Config) -> [ emqx_conf, emqx_management, - {emqx_dashboard, "dashboard.listeners.http { enable = true, bind = 18083 }"}, + emqx_mgmt_api_test_util:emqx_dashboard(), emqx_cluster_link ], #{work_dir => emqx_cth_suite:work_dir(Config)} @@ -61,8 +65,7 @@ end_per_suite(Config) -> ok. auth_header() -> - {ok, API} = emqx_common_test_http:create_default_app(), - emqx_common_test_http:auth_header(API). + emqx_mgmt_api_test_util:auth_header_(). init_per_testcase(_TC, Config) -> {ok, _} = emqx_cluster_link_config:update([]), @@ -71,62 +74,111 @@ init_per_testcase(_TC, Config) -> end_per_testcase(_TC, _Config) -> ok. -t_put_get_valid(Config) -> - Auth = ?config(auth, Config), - Path = ?API_PATH, - {ok, Resp} = emqx_mgmt_api_test_util:request_api(get, Path, Auth), - ?assertMatch([], emqx_utils_json:decode(Resp)), +%%------------------------------------------------------------------------------ +%% Helper fns +%%------------------------------------------------------------------------------ - Link1 = #{ +api_root() -> + <<"cluster/links">>. + +list() -> + Path = emqx_mgmt_api_test_util:api_path([api_root()]), + emqx_mgmt_api_test_util:simple_request(get, Path, _Params = ""). + +get_link(Name) -> + Path = emqx_mgmt_api_test_util:api_path([api_root(), Name]), + emqx_mgmt_api_test_util:simple_request(get, Path, _Params = ""). + +delete_link(Name) -> + Path = emqx_mgmt_api_test_util:api_path([api_root(), Name]), + emqx_mgmt_api_test_util:simple_request(delete, Path, _Params = ""). + +update_link(Name, Params) -> + Path = emqx_mgmt_api_test_util:api_path([api_root(), Name]), + emqx_mgmt_api_test_util:simple_request(put, Path, Params). + +create_link(Name, Params0) -> + Params = Params0#{<<"name">> => Name}, + Path = emqx_mgmt_api_test_util:api_path([api_root()]), + emqx_mgmt_api_test_util:simple_request(post, Path, Params). + +link_params() -> + link_params(_Overrides = #{}). + +link_params(Overrides) -> + Default = #{ + <<"clientid">> => <<"linkclientid">>, + <<"username">> => <<"myusername">>, <<"pool_size">> => 1, <<"server">> => <<"emqxcl_2.nohost:31883">>, - <<"topics">> => [<<"t/test-topic">>, <<"t/test/#">>], - <<"name">> => <<"emqcl_1">> + <<"topics">> => [<<"t/test-topic">>, <<"t/test/#">>] }, - Link2 = #{ - <<"pool_size">> => 1, - <<"server">> => <<"emqxcl_2.nohost:41883">>, - <<"topics">> => [<<"t/test-topic">>, <<"t/test/#">>], - <<"name">> => <<"emqcl_2">> - }, - ?assertMatch({ok, _}, emqx_mgmt_api_test_util:request_api(put, Path, "", Auth, [Link1, Link2])), + emqx_utils_maps:deep_merge(Default, Overrides). - {ok, Resp1} = emqx_mgmt_api_test_util:request_api(get, Path, Auth), - ?assertMatch([Link1, Link2], emqx_utils_json:decode(Resp1)), +%%------------------------------------------------------------------------------ +%% Test cases +%%------------------------------------------------------------------------------ + +t_put_get_valid(_Config) -> + ?assertMatch({200, []}, list()), + + Name1 = <<"emqcl_1">>, + Link1 = link_params(#{ + <<"server">> => <<"emqxcl_2.nohost:31883">>, + <<"name">> => Name1 + }), + Name2 = <<"emqcl_2">>, + Link2 = link_params(#{ + <<"server">> => <<"emqxcl_2.nohost:41883">>, + <<"name">> => Name2 + }), + ?assertMatch({201, _}, create_link(Name1, Link1)), + ?assertMatch({201, _}, create_link(Name2, Link2)), + ?assertMatch({200, [_, _]}, list()), DisabledLink1 = Link1#{<<"enable">> => false}, - ?assertMatch( - {ok, _}, emqx_mgmt_api_test_util:request_api(put, Path, "", Auth, [DisabledLink1, Link2]) - ), - - {ok, Resp2} = emqx_mgmt_api_test_util:request_api(get, Path, Auth), - ?assertMatch([DisabledLink1, Link2], emqx_utils_json:decode(Resp2)), + ?assertMatch({200, _}, update_link(Name1, maps:remove(<<"name">>, DisabledLink1))), + ?assertMatch({200, #{<<"enable">> := false}}, get_link(Name1)), + ?assertMatch({200, #{<<"enable">> := true}}, get_link(Name2)), SSL = #{<<"enable">> => true, <<"cacertfile">> => ?CACERT}, SSLLink1 = Link1#{<<"ssl">> => SSL}, + ?assertMatch({200, _}, update_link(Name1, maps:remove(<<"name">>, SSLLink1))), ?assertMatch( - {ok, _}, emqx_mgmt_api_test_util:request_api(put, Path, "", Auth, [Link2, SSLLink1]) + {200, #{<<"ssl">> := #{<<"enable">> := true, <<"cacertfile">> := _Path}}}, + get_link(Name1) ), - {ok, Resp3} = emqx_mgmt_api_test_util:request_api(get, Path, Auth), + ok. +t_put_invalid(_Config) -> + Name = <<"l1">>, + {201, _} = create_link(Name, link_params()), ?assertMatch( - [Link2, #{<<"ssl">> := #{<<"enable">> := true, <<"cacertfile">> := _Path}}], - emqx_utils_json:decode(Resp3) + {400, _}, + update_link(Name, maps:remove(<<"server">>, link_params())) ). -t_put_invalid(Config) -> - Auth = ?config(auth, Config), - Path = ?API_PATH, - Link = #{ - <<"pool_size">> => 1, - <<"server">> => <<"emqxcl_2.nohost:31883">>, - <<"topics">> => [<<"t/test-topic">>, <<"t/test/#">>], - <<"name">> => <<"emqcl_1">> - }, - ?assertMatch( - {error, {_, 400, _}}, emqx_mgmt_api_test_util:request_api(put, Path, "", Auth, [Link, Link]) - ), - ?assertMatch( - {error, {_, 400, _}}, - emqx_mgmt_api_test_util:request_api(put, Path, "", Auth, [maps:remove(<<"name">>, Link)]) - ). +t_crud(_Config) -> + %% No links initially. + ?assertMatch({200, []}, list()), + NameA = <<"a">>, + ?assertMatch({404, _}, get_link(NameA)), + ?assertMatch({404, _}, delete_link(NameA)), + ?assertMatch({404, _}, update_link(NameA, link_params())), + + Params1 = link_params(), + ?assertMatch({201, #{<<"name">> := NameA}}, create_link(NameA, Params1)), + ?assertMatch({400, #{<<"code">> := <<"ALREADY_EXISTS">>}}, create_link(NameA, Params1)), + ?assertMatch({200, [#{<<"name">> := NameA}]}, list()), + ?assertMatch({200, #{<<"name">> := NameA}}, get_link(NameA)), + + Params2 = Params1#{<<"pool_size">> := 2}, + ?assertMatch({200, #{<<"name">> := NameA}}, update_link(NameA, Params2)), + + ?assertMatch({204, _}, delete_link(NameA)), + ?assertMatch({404, _}, delete_link(NameA)), + ?assertMatch({404, _}, get_link(NameA)), + ?assertMatch({404, _}, update_link(NameA, Params1)), + ?assertMatch({200, []}, list()), + + ok. diff --git a/apps/emqx_management/test/emqx_mgmt_api_test_util.erl b/apps/emqx_management/test/emqx_mgmt_api_test_util.erl index 106a65a9c..4b1d40651 100644 --- a/apps/emqx_management/test/emqx_mgmt_api_test_util.erl +++ b/apps/emqx_management/test/emqx_mgmt_api_test_util.erl @@ -293,3 +293,30 @@ format_multipart_formdata(Data, Params, Name, FileNames, MimeType, Boundary) -> FileNames ), erlang:iolist_to_binary([WithPaths, StartBoundary, <<"--">>, LineSeparator]). + +maybe_json_decode(X) -> + case emqx_utils_json:safe_decode(X, [return_maps]) of + {ok, Decoded} -> Decoded; + {error, _} -> X + end. + +simple_request(Method, Path, Params) -> + AuthHeader = auth_header_(), + Opts = #{return_all => true}, + case request_api(Method, Path, "", AuthHeader, Params, Opts) of + {ok, {{_, Status, _}, _Headers, Body0}} -> + Body = maybe_json_decode(Body0), + {Status, Body}; + {error, {{_, Status, _}, _Headers, Body0}} -> + Body = + case emqx_utils_json:safe_decode(Body0, [return_maps]) of + {ok, Decoded0 = #{<<"message">> := Msg0}} -> + Msg = maybe_json_decode(Msg0), + Decoded0#{<<"message">> := Msg}; + {ok, Decoded0} -> + Decoded0; + {error, _} -> + Body0 + end, + {Status, Body} + end. diff --git a/apps/emqx_utils/include/emqx_utils_api.hrl b/apps/emqx_utils/include/emqx_utils_api.hrl index ba2941a4f..0876b9829 100644 --- a/apps/emqx_utils/include/emqx_utils_api.hrl +++ b/apps/emqx_utils/include/emqx_utils_api.hrl @@ -21,6 +21,8 @@ -define(OK(CONTENT), {200, CONTENT}). +-define(CREATED(CONTENT), {201, CONTENT}). + -define(NO_CONTENT, 204). -define(BAD_REQUEST(CODE, REASON), {400, ?ERROR_MSG(CODE, REASON)}). From ba3cbe02e33c9e5bc74c0954967074c1ffc39901 Mon Sep 17 00:00:00 2001 From: Thales Macedo Garitezi Date: Tue, 16 Jul 2024 16:47:25 -0300 Subject: [PATCH 02/19] feat(cluster link api): add status to responses Fixes https://emqx.atlassian.net/browse/EMQX-12627 --- apps/emqx/priv/bpapi.versions | 1 + .../src/emqx_cluster_link_api.erl | 246 +++++++++++++++--- .../src/emqx_cluster_link_mqtt.erl | 74 +++++- .../src/proto/emqx_cluster_link_proto_v1.erl | 31 +++ .../test/emqx_cluster_link_api_SUITE.erl | 212 ++++++++++++++- 5 files changed, 524 insertions(+), 40 deletions(-) create mode 100644 apps/emqx_cluster_link/src/proto/emqx_cluster_link_proto_v1.erl diff --git a/apps/emqx/priv/bpapi.versions b/apps/emqx/priv/bpapi.versions index 6e3cc046f..a5cc6b335 100644 --- a/apps/emqx/priv/bpapi.versions +++ b/apps/emqx/priv/bpapi.versions @@ -10,6 +10,7 @@ {emqx_bridge,5}. {emqx_bridge,6}. {emqx_broker,1}. +{emqx_cluster_link,1}. {emqx_cm,1}. {emqx_cm,2}. {emqx_cm,3}. diff --git a/apps/emqx_cluster_link/src/emqx_cluster_link_api.erl b/apps/emqx_cluster_link/src/emqx_cluster_link_api.erl index 324d6dd68..a184ab36d 100644 --- a/apps/emqx_cluster_link/src/emqx_cluster_link_api.erl +++ b/apps/emqx_cluster_link/src/emqx_cluster_link_api.erl @@ -8,10 +8,14 @@ -include_lib("hocon/include/hoconsc.hrl"). -include_lib("emqx/include/http_api.hrl"). -include_lib("emqx_utils/include/emqx_utils_api.hrl"). +-include_lib("emqx_resource/include/emqx_resource.hrl"). +-include_lib("emqx/include/logger.hrl"). -export([ api_spec/0, paths/0, + namespace/0, + fields/1, schema/1 ]). @@ -23,6 +27,10 @@ -define(CONF_PATH, [cluster, links]). -define(TAGS, [<<"Cluster">>]). +-type cluster_name() :: binary(). + +namespace() -> "cluster_link". + api_spec() -> emqx_dashboard_swagger:spec(?MODULE, #{check_schema => true}). @@ -40,7 +48,7 @@ schema("/cluster/links") -> description => "Get cluster links configuration", tags => ?TAGS, responses => - #{200 => links_config_schema()} + #{200 => links_config_schema_response()} }, post => #{ @@ -49,7 +57,7 @@ schema("/cluster/links") -> 'requestBody' => link_config_schema(), responses => #{ - 200 => link_config_schema(), + 200 => link_config_schema_response(), 400 => emqx_dashboard_swagger:error_codes( [?BAD_REQUEST, ?ALREADY_EXISTS], @@ -68,7 +76,7 @@ schema("/cluster/links/:name") -> parameters => [param_path_name()], responses => #{ - 200 => link_config_schema(), + 200 => link_config_schema_response(), 404 => emqx_dashboard_swagger:error_codes( [?NOT_FOUND], <<"Cluster link not found">> ) @@ -95,7 +103,7 @@ schema("/cluster/links/:name") -> 'requestBody' => update_link_config_schema(), responses => #{ - 200 => link_config_schema(), + 200 => link_config_schema_response(), 404 => emqx_dashboard_swagger:error_codes( [?NOT_FOUND], <<"Cluster link not found">> ), @@ -107,44 +115,30 @@ schema("/cluster/links/:name") -> } }. +fields(link_config_response) -> + [ + {node, hoconsc:mk(binary(), #{desc => ?DESC("node")})}, + {status, hoconsc:mk(status(), #{desc => ?DESC("status")})} + | emqx_cluster_link_schema:fields("link") + ]. + %%-------------------------------------------------------------------- %% API Handler funcs %%-------------------------------------------------------------------- '/cluster/links'(get, _Params) -> - ?OK(get_raw()); + handle_list(); '/cluster/links'(post, #{body := Body = #{<<"name">> := Name}}) -> with_link( Name, return(?BAD_REQUEST('ALREADY_EXISTS', <<"Cluster link already exists">>)), - fun() -> - case emqx_cluster_link_config:create(Body) of - {ok, Res} -> - ?CREATED(Res); - {error, Reason} -> - Message = list_to_binary(io_lib:format("Create link failed ~p", [Reason])), - ?BAD_REQUEST(Message) - end - end + fun() -> handle_create(Name, Body) end ). '/cluster/links/:name'(get, #{bindings := #{name := Name}}) -> - with_link(Name, fun(Link) -> ?OK(Link) end, not_found()); + with_link(Name, fun(Link) -> handle_lookup(Name, Link) end, not_found()); '/cluster/links/:name'(put, #{bindings := #{name := Name}, body := Params0}) -> - with_link( - Name, - fun(Link) -> - Params = Params0#{<<"name">> => Name}, - case emqx_cluster_link_config:update_one_link(Params) of - {ok, Res} -> - ?OK(Res); - {error, Reason} -> - Message = list_to_binary(io_lib:format("Update link failed ~p", [Reason])), - ?BAD_REQUEST(Message) - end - end, - not_found() - ); + with_link(Name, fun() -> handle_update(Name, Params0) end, not_found()); '/cluster/links/:name'(delete, #{bindings := #{name := Name}}) -> with_link( Name, @@ -164,6 +158,48 @@ schema("/cluster/links/:name") -> %% Internal funcs %%-------------------------------------------------------------------- +handle_list() -> + Links = get_raw(), + NodeResults = get_all_link_status_cluster(), + NameToStatus = collect_all_status(NodeResults), + EmptyStatus = #{status => inconsistent, node_status => []}, + Response = + lists:map( + fun(#{<<"name">> := Name} = Link) -> + Status = maps:get(Name, NameToStatus, EmptyStatus), + maps:merge(Link, Status) + end, + Links + ), + ?OK(Response). + +handle_create(Name, Params) -> + case emqx_cluster_link_config:create(Params) of + {ok, Link} -> + ?CREATED(add_status(Name, Link)); + {error, Reason} -> + Message = list_to_binary(io_lib:format("Create link failed ~p", [Reason])), + ?BAD_REQUEST(Message) + end. + +handle_lookup(Name, Link) -> + ?OK(add_status(Name, Link)). + +add_status(Name, Link) -> + NodeResults = get_link_status_cluster(Name), + Status = collect_single_status(NodeResults), + maps:merge(Link, Status). + +handle_update(Name, Params0) -> + Params = Params0#{<<"name">> => Name}, + case emqx_cluster_link_config:update_one_link(Params) of + {ok, Link} -> + ?OK(add_status(Name, Link)); + {error, Reason} -> + Message = list_to_binary(io_lib:format("Update link failed ~p", [Reason])), + ?BAD_REQUEST(Message) + end. + get_raw() -> #{<<"cluster">> := #{<<"links">> := Links}} = emqx_config:fill_defaults( @@ -172,15 +208,130 @@ get_raw() -> ), Links. -links_config_schema() -> - emqx_cluster_link_schema:links_schema( +get_all_link_status_cluster() -> + case emqx_cluster_link_mqtt:get_all_resources_cluster() of + {error, BadResults} -> + ?SLOG(warning, #{ + msg => "cluster_link_api_all_status_bad_erpc_results", + results => BadResults + }), + []; + {ok, NodeResults} -> + NodeResults + end. + +get_link_status_cluster(Name) -> + case emqx_cluster_link_mqtt:get_resource_cluster(Name) of + {error, BadResults} -> + ?SLOG(warning, #{ + msg => "cluster_link_api_lookup_status_bad_erpc_results", + results => BadResults + }), + []; + {ok, NodeResults} -> + NodeResults + end. + +-spec collect_all_status([{node(), #{cluster_name() => _}}]) -> + #{ + cluster_name() => #{ + node := node(), + status := emqx_resource:resource_status() | inconsistent + } + }. +collect_all_status(NodeResults) -> + Reindexed = lists:foldl( + fun({Node, AllLinkData}, Acc) -> + maps:fold( + fun(Name, Data, AccIn) -> + collect_all_status1(Node, Name, Data, AccIn) + end, + Acc, + AllLinkData + ) + end, + #{}, + NodeResults + ), + maps:fold( + fun(Name, NodeToData, Acc) -> + OnlyStatus = [S || #{status := S} <- maps:values(NodeToData)], + SummaryStatus = + case lists:usort(OnlyStatus) of + [SameStatus] -> SameStatus; + _ -> inconsistent + end, + NodeStatus = lists:map( + fun({Node, #{status := S}}) -> + #{node => Node, status => S} + end, + maps:to_list(NodeToData) + ), + Acc#{ + Name => #{ + status => SummaryStatus, + node_status => NodeStatus + } + } + end, + #{}, + Reindexed + ). + +collect_all_status1(Node, Name, Data, Acc) -> + maps:update_with( + Name, + fun(Old) -> Old#{Node => Data} end, + #{Node => Data}, + Acc + ). + +collect_single_status(NodeResults) -> + NodeStatus = + lists:map( + fun + ({Node, {ok, #{status := S}}}) -> + #{node => Node, status => S}; + ({Node, {error, _}}) -> + #{node => Node, status => ?status_disconnected}; + ({Node, _}) -> + #{node => Node, status => inconsistent} + end, + NodeResults + ), + OnlyStatus = [S || #{status := S} <- NodeStatus], + SummaryStatus = + case lists:usort(OnlyStatus) of + [SameStatus] -> SameStatus; + _ -> inconsistent + end, + #{ + status => SummaryStatus, + node_status => NodeStatus + }. + +links_config_schema_response() -> + hoconsc:mk(hoconsc:array(hoconsc:ref(?MODULE, link_config_response)), #{ + examples => #{<<"example">> => links_config_response_example()} + }). + +link_config_schema() -> + hoconsc:mk(emqx_cluster_link_schema:link_schema(), #{ + examples => #{<<"example">> => hd(links_config_example())} + }). + +link_config_schema_response() -> + hoconsc:mk( + hoconsc:ref(?MODULE, link_config_response), #{ - examples => #{<<"example">> => links_config_example()} + examples => #{ + <<"example">> => hd(links_config_response_example()) + } } ). -link_config_schema() -> - emqx_cluster_link_schema:link_schema(). +status() -> + hoconsc:enum([?status_connected, ?status_disconnected, ?status_connecting, inconsistent]). param_path_name() -> {name, @@ -197,6 +348,22 @@ param_path_name() -> update_link_config_schema() -> proplists:delete(name, emqx_cluster_link_schema:fields("link")). +links_config_response_example() -> + lists:map( + fun(LinkEx) -> + LinkEx#{ + <<"status">> => <<"connected">>, + <<"node_status">> => [ + #{ + <<"node">> => <<"emqx1@emqx.net">>, + <<"status">> => <<"connected">> + } + ] + } + end, + links_config_example() + ). + links_config_example() -> [ #{ @@ -229,7 +396,8 @@ with_link(Name, FoundFn, NotFoundFn) -> case emqx_cluster_link_config:link_raw(Name) of undefined -> NotFoundFn(); - Link = #{} -> + Link0 = #{} -> + Link = fill_defaults_single(Link0), {arity, Arity} = erlang:fun_info(FoundFn, arity), case Arity of 1 -> FoundFn(Link); @@ -237,6 +405,14 @@ with_link(Name, FoundFn, NotFoundFn) -> end end. +fill_defaults_single(Link0) -> + #{<<"cluster">> := #{<<"links">> := [Link]}} = + emqx_config:fill_defaults( + #{<<"cluster">> => #{<<"links">> => [Link0]}}, + #{obfuscate_sensitive_values => true} + ), + Link. + return(Response) -> fun() -> Response end. diff --git a/apps/emqx_cluster_link/src/emqx_cluster_link_mqtt.erl b/apps/emqx_cluster_link/src/emqx_cluster_link_mqtt.erl index 5185803b6..3b37a304e 100644 --- a/apps/emqx_cluster_link/src/emqx_cluster_link_mqtt.erl +++ b/apps/emqx_cluster_link/src/emqx_cluster_link_mqtt.erl @@ -46,6 +46,16 @@ forward/2 ]). +-export([ + get_all_resources_cluster/0, + get_resource_cluster/1 +]). +%% BpAPI / RPC Targets +-export([ + get_resource_local_v1/1, + get_all_resources_local_v1/0 +]). + -define(MSG_CLIENTID_SUFFIX, ":msg:"). -define(MQTT_HOST_OPTS, #{default_port => 1883}). @@ -80,6 +90,8 @@ -define(PUB_TIMEOUT, 10_000). +-type cluster_name() :: binary(). + -spec ensure_msg_fwd_resource(map()) -> {ok, emqx_resource:resource_data() | already_started} | {error, Reason :: term()}. ensure_msg_fwd_resource(#{name := Name, resource_opts := ResOpts} = ClusterConf) -> @@ -89,10 +101,57 @@ ensure_msg_fwd_resource(#{name := Name, resource_opts := ResOpts} = ClusterConf) }, emqx_resource:create_local(?MSG_RES_ID(Name), ?RES_GROUP, ?MODULE, ClusterConf, ResOpts1). --spec remove_msg_fwd_resource(binary() | map()) -> ok | {error, Reason :: term()}. +-spec remove_msg_fwd_resource(cluster_name()) -> ok | {error, Reason :: term()}. remove_msg_fwd_resource(ClusterName) -> emqx_resource:remove_local(?MSG_RES_ID(ClusterName)). +-spec get_all_resources_cluster() -> + {ok, [{node(), #{cluster_name() => emqx_resource:resource_data()}}]} + | {error, [term()]}. +get_all_resources_cluster() -> + Nodes = emqx:running_nodes(), + Results = emqx_cluster_link_proto_v1:get_all_resources(Nodes), + sequence_multicall_results(Nodes, Results). + +-spec get_resource_cluster(cluster_name()) -> + {ok, [{node(), {ok, emqx_resource:resource_data()} | {error, not_found}}]} + | {error, [term()]}. +get_resource_cluster(ClusterName) -> + Nodes = emqx:running_nodes(), + Results = emqx_cluster_link_proto_v1:get_resource(Nodes, ClusterName), + sequence_multicall_results(Nodes, Results). + +%% RPC Target in `emqx_cluster_link_proto_v1'. +-spec get_resource_local_v1(cluster_name()) -> + {ok, emqx_resource:resource_data()} | {error, not_found}. +get_resource_local_v1(ClusterName) -> + case emqx_resource:get_instance(?MSG_RES_ID(ClusterName)) of + {ok, _ResourceGroup, ResourceData} -> + {ok, ResourceData}; + {error, not_found} -> + {error, not_found} + end. + +%% RPC Target in `emqx_cluster_link_proto_v1'. +-spec get_all_resources_local_v1() -> #{cluster_name() => emqx_resource:resource_data()}. +get_all_resources_local_v1() -> + lists:foldl( + fun + (?MSG_RES_ID(Name) = Id, Acc) -> + case emqx_resource:get_instance(Id) of + {ok, ?RES_GROUP, ResourceData} -> + Acc#{Name => ResourceData}; + _ -> + Acc + end; + (_Id, Acc) -> + %% Doesn't follow the naming pattern; manually crafted? + Acc + end, + #{}, + emqx_resource:list_group_instances(?RES_GROUP) + ). + %%-------------------------------------------------------------------- %% emqx_resource callbacks (message forwarding) %%-------------------------------------------------------------------- @@ -419,3 +478,16 @@ emqtt_client_opts(ClientIdSuffix, ClusterConf) -> #{clientid := BaseClientId} = Opts = emqx_cluster_link_config:mk_emqtt_options(ClusterConf), ClientId = emqx_bridge_mqtt_lib:clientid_base([BaseClientId, ClientIdSuffix]), Opts#{clientid => ClientId}. + +-spec sequence_multicall_results([node()], emqx_rpc:erpc_multicall(term())) -> + {ok, [{node(), term()}]} | {error, [term()]}. +sequence_multicall_results(Nodes, Results) -> + case lists:partition(fun is_ok/1, lists:zip(Nodes, Results)) of + {OkResults, []} -> + {ok, [{Node, Res} || {Node, {ok, Res}} <- OkResults]}; + {_OkResults, BadResults} -> + {error, BadResults} + end. + +is_ok({_Node, {ok, _}}) -> true; +is_ok(_) -> false. diff --git a/apps/emqx_cluster_link/src/proto/emqx_cluster_link_proto_v1.erl b/apps/emqx_cluster_link/src/proto/emqx_cluster_link_proto_v1.erl new file mode 100644 index 000000000..725bb8afc --- /dev/null +++ b/apps/emqx_cluster_link/src/proto/emqx_cluster_link_proto_v1.erl @@ -0,0 +1,31 @@ +%%-------------------------------------------------------------------- +%% Copyright (c) 2024 EMQ Technologies Co., Ltd. All Rights Reserved. +%%-------------------------------------------------------------------- + +-module(emqx_cluster_link_proto_v1). + +-behaviour(emqx_bpapi). + +-export([ + introduced_in/0, + + get_resource/2, + get_all_resources/1 +]). + +-include_lib("emqx/include/bpapi.hrl"). + +-define(TIMEOUT, 15000). + +introduced_in() -> + "5.7.2". + +-spec get_resource([node()], binary()) -> + emqx_rpc:erpc_multicall({ok, emqx_resource:resource_data()} | {error, not_found}). +get_resource(Nodes, ClusterName) -> + erpc:multicall(Nodes, emqx_cluster_link_mqtt, get_resource_local_v1, [ClusterName], ?TIMEOUT). + +-spec get_all_resources([node()]) -> + emqx_rpc:erpc_multicall(#{binary() => emqx_resource:resource_data()}). +get_all_resources(Nodes) -> + erpc:multicall(Nodes, emqx_cluster_link_mqtt, get_all_resources_local_v1, [], ?TIMEOUT). diff --git a/apps/emqx_cluster_link/test/emqx_cluster_link_api_SUITE.erl b/apps/emqx_cluster_link/test/emqx_cluster_link_api_SUITE.erl index 5bb1c377a..5c136925d 100644 --- a/apps/emqx_cluster_link/test/emqx_cluster_link_api_SUITE.erl +++ b/apps/emqx_cluster_link/test/emqx_cluster_link_api_SUITE.erl @@ -37,6 +37,8 @@ "-----END CERTIFICATE-----" >>). +-define(ON(NODE, BODY), erpc:call(NODE, fun() -> BODY end)). + %%------------------------------------------------------------------------------ %% CT boilerplate %%------------------------------------------------------------------------------ @@ -67,10 +69,36 @@ end_per_suite(Config) -> auth_header() -> emqx_mgmt_api_test_util:auth_header_(). +init_per_testcase(t_status = TestCase, Config) -> + ok = emqx_cth_suite:stop_apps([emqx_dashboard]), + SourceClusterSpec = emqx_cluster_link_SUITE:mk_source_cluster(TestCase, Config), + TargetClusterSpec = emqx_cluster_link_SUITE:mk_target_cluster(TestCase, Config), + SourceNodes = [SN1 | _] = emqx_cth_cluster:start(SourceClusterSpec), + TargetNodes = emqx_cth_cluster:start(TargetClusterSpec), + emqx_cluster_link_SUITE:start_cluster_link(SourceNodes ++ TargetNodes, Config), + erpc:call(SN1, emqx_cth_suite, start_apps, [ + [emqx_management, emqx_mgmt_api_test_util:emqx_dashboard()], + #{work_dir => emqx_cth_suite:work_dir(TestCase, Config)} + ]), + [ + {source_nodes, SourceNodes}, + {target_nodes, TargetNodes} + | Config + ]; init_per_testcase(_TC, Config) -> {ok, _} = emqx_cluster_link_config:update([]), Config. +end_per_testcase(t_status, Config) -> + SourceNodes = ?config(source_nodes, Config), + TargetNodes = ?config(target_nodes, Config), + ok = emqx_cth_cluster:stop(SourceNodes), + ok = emqx_cth_cluster:stop(TargetNodes), + _ = emqx_cth_suite:start_apps( + [emqx_mgmt_api_test_util:emqx_dashboard()], + #{work_dir => emqx_cth_suite:work_dir(Config)} + ), + ok; end_per_testcase(_TC, _Config) -> ok. @@ -158,6 +186,8 @@ t_put_invalid(_Config) -> update_link(Name, maps:remove(<<"server">>, link_params())) ). +%% Tests a sequence of CRUD operations and their expected responses, for common use cases +%% and configuration states. t_crud(_Config) -> %% No links initially. ?assertMatch({200, []}, list()), @@ -167,13 +197,43 @@ t_crud(_Config) -> ?assertMatch({404, _}, update_link(NameA, link_params())), Params1 = link_params(), - ?assertMatch({201, #{<<"name">> := NameA}}, create_link(NameA, Params1)), + ?assertMatch( + {201, #{ + <<"name">> := NameA, + <<"status">> := _, + <<"node_status">> := [#{<<"node">> := _, <<"status">> := _} | _] + }}, + create_link(NameA, Params1) + ), ?assertMatch({400, #{<<"code">> := <<"ALREADY_EXISTS">>}}, create_link(NameA, Params1)), - ?assertMatch({200, [#{<<"name">> := NameA}]}, list()), - ?assertMatch({200, #{<<"name">> := NameA}}, get_link(NameA)), + ?assertMatch( + {200, [ + #{ + <<"name">> := NameA, + <<"status">> := _, + <<"node_status">> := [#{<<"node">> := _, <<"status">> := _} | _] + } + ]}, + list() + ), + ?assertMatch( + {200, #{ + <<"name">> := NameA, + <<"status">> := _, + <<"node_status">> := [#{<<"node">> := _, <<"status">> := _} | _] + }}, + get_link(NameA) + ), Params2 = Params1#{<<"pool_size">> := 2}, - ?assertMatch({200, #{<<"name">> := NameA}}, update_link(NameA, Params2)), + ?assertMatch( + {200, #{ + <<"name">> := NameA, + <<"status">> := _, + <<"node_status">> := [#{<<"node">> := _, <<"status">> := _} | _] + }}, + update_link(NameA, Params2) + ), ?assertMatch({204, _}, delete_link(NameA)), ?assertMatch({404, _}, delete_link(NameA)), @@ -182,3 +242,147 @@ t_crud(_Config) -> ?assertMatch({200, []}, list()), ok. + +%% Verifies the behavior of reported status under different conditions when listing all +%% links and when fetching a specific link. +t_status(Config) -> + [SN1 | _] = ?config(source_nodes, Config), + Name = <<"cl.target">>, + ?assertMatch( + {200, [ + #{ + <<"status">> := <<"connected">>, + <<"node_status">> := [ + #{ + <<"node">> := _, + <<"status">> := <<"connected">> + }, + #{ + <<"node">> := _, + <<"status">> := <<"connected">> + } + ] + } + ]}, + list() + ), + ?assertMatch( + {200, #{ + <<"status">> := <<"connected">>, + <<"node_status">> := [ + #{ + <<"node">> := _, + <<"status">> := <<"connected">> + }, + #{ + <<"node">> := _, + <<"status">> := <<"connected">> + } + ] + }}, + get_link(Name) + ), + + %% If one of the nodes reports a different status, the cluster is inconsistent. + ProtoMod = emqx_cluster_link_proto_v1, + ?ON(SN1, begin + ok = meck:new(ProtoMod, [no_link, passthrough, no_history]), + meck:expect(ProtoMod, get_all_resources, fun(Nodes) -> + [Res1, {ok, Res2A} | Rest] = meck:passthrough([Nodes]), + %% Res2A :: #{cluster_name() => emqx_resource:resource_data()} + Res2B = maps:map(fun(_, Data) -> Data#{status := disconnected} end, Res2A), + [Res1, {ok, Res2B} | Rest] + end), + meck:expect(ProtoMod, get_resource, fun(Nodes, LinkName) -> + [Res1, {ok, {ok, Res2A}} | Rest] = meck:passthrough([Nodes, LinkName]), + Res2B = Res2A#{status := disconnected}, + [Res1, {ok, {ok, Res2B}} | Rest] + end) + end), + ?assertMatch( + {200, [ + #{ + <<"status">> := <<"inconsistent">>, + <<"node_status">> := [ + #{ + <<"node">> := _, + <<"status">> := <<"connected">> + }, + #{ + <<"node">> := _, + <<"status">> := <<"disconnected">> + } + ] + } + ]}, + list() + ), + ?assertMatch( + {200, #{ + <<"status">> := <<"inconsistent">>, + <<"node_status">> := [ + #{ + <<"node">> := _, + <<"status">> := <<"connected">> + }, + #{ + <<"node">> := _, + <<"status">> := <<"disconnected">> + } + ] + }}, + get_link(Name) + ), + + %% Simulating erpc failures + ?ON(SN1, begin + meck:expect(ProtoMod, get_all_resources, fun(Nodes) -> + [Res1, _ | Rest] = meck:passthrough([Nodes]), + [Res1, {error, {erpc, noconnection}} | Rest] + end), + meck:expect(ProtoMod, get_resource, fun(Nodes, LinkName) -> + [Res1, _ | Rest] = meck:passthrough([Nodes, LinkName]), + [Res1, {error, {erpc, noconnection}} | Rest] + end) + end), + ?assertMatch( + {200, [ + #{ + <<"status">> := <<"inconsistent">>, + <<"node_status">> := [] + } + ]}, + list() + ), + ?assertMatch( + {200, #{ + <<"status">> := <<"inconsistent">>, + <<"node_status">> := [] + }}, + get_link(Name) + ), + %% Simulate another inconsistency + ?ON(SN1, begin + meck:expect(ProtoMod, get_resource, fun(Nodes, LinkName) -> + [Res1, _ | Rest] = meck:passthrough([Nodes, LinkName]), + [Res1, {ok, {error, not_found}} | Rest] + end) + end), + ?assertMatch( + {200, #{ + <<"status">> := <<"inconsistent">>, + <<"node_status">> := [ + #{ + <<"node">> := _, + <<"status">> := <<"connected">> + }, + #{ + <<"node">> := _, + <<"status">> := <<"disconnected">> + } + ] + }}, + get_link(Name) + ), + + ok. From 07cb147d383bc12e3eb3319cfd33b6de8749e03a Mon Sep 17 00:00:00 2001 From: Thales Macedo Garitezi Date: Wed, 17 Jul 2024 18:00:28 -0300 Subject: [PATCH 03/19] fix(cluster link schema): username is not required --- apps/emqx_cluster_link/src/emqx_cluster_link_schema.erl | 4 ++-- apps/emqx_cluster_link/test/emqx_cluster_link_api_SUITE.erl | 1 - 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/apps/emqx_cluster_link/src/emqx_cluster_link_schema.erl b/apps/emqx_cluster_link/src/emqx_cluster_link_schema.erl index a6073d677..a369429d5 100644 --- a/apps/emqx_cluster_link/src/emqx_cluster_link_schema.erl +++ b/apps/emqx_cluster_link/src/emqx_cluster_link_schema.erl @@ -47,8 +47,8 @@ fields("link") -> {server, emqx_schema:servers_sc(#{required => true, desc => ?DESC(server)}, ?MQTT_HOST_OPTS)}, {clientid, ?HOCON(binary(), #{desc => ?DESC(clientid)})}, - {username, ?HOCON(binary(), #{desc => ?DESC(username)})}, - {password, emqx_schema_secret:mk(#{desc => ?DESC(password)})}, + {username, ?HOCON(binary(), #{required => false, desc => ?DESC(username)})}, + {password, emqx_schema_secret:mk(#{required => false, desc => ?DESC(password)})}, {ssl, #{ type => ?R_REF(emqx_schema, "ssl_client_opts"), default => #{<<"enable">> => false}, diff --git a/apps/emqx_cluster_link/test/emqx_cluster_link_api_SUITE.erl b/apps/emqx_cluster_link/test/emqx_cluster_link_api_SUITE.erl index 5c136925d..535e8521a 100644 --- a/apps/emqx_cluster_link/test/emqx_cluster_link_api_SUITE.erl +++ b/apps/emqx_cluster_link/test/emqx_cluster_link_api_SUITE.erl @@ -136,7 +136,6 @@ link_params() -> link_params(Overrides) -> Default = #{ <<"clientid">> => <<"linkclientid">>, - <<"username">> => <<"myusername">>, <<"pool_size">> => 1, <<"server">> => <<"emqxcl_2.nohost:31883">>, <<"topics">> => [<<"t/test-topic">>, <<"t/test/#">>] From 6a5849488c767a567ba39a6ec785b728948c8164 Mon Sep 17 00:00:00 2001 From: Thales Macedo Garitezi Date: Wed, 17 Jul 2024 18:01:52 -0300 Subject: [PATCH 04/19] feat(cluster link): add metrics Fixes https://emqx.atlassian.net/browse/EMQX-12627 --- .../include/emqx_cluster_link.hrl | 4 + .../src/emqx_cluster_link.erl | 57 ++++ .../src/emqx_cluster_link_api.erl | 106 ++++++- .../src/emqx_cluster_link_app.erl | 12 +- .../src/emqx_cluster_link_config.erl | 6 +- .../src/emqx_cluster_link_extrouter.erl | 24 +- .../src/emqx_cluster_link_sup.erl | 5 +- .../test/emqx_cluster_link_api_SUITE.erl | 265 +++++++++++++++++- 8 files changed, 450 insertions(+), 29 deletions(-) diff --git a/apps/emqx_cluster_link/include/emqx_cluster_link.hrl b/apps/emqx_cluster_link/include/emqx_cluster_link.hrl index 08dc7f4ad..32c675d8d 100644 --- a/apps/emqx_cluster_link/include/emqx_cluster_link.hrl +++ b/apps/emqx_cluster_link/include/emqx_cluster_link.hrl @@ -17,3 +17,7 @@ %% Fairly compact text encoding. -define(SHARED_ROUTE_ID(Topic, Group), <<"$s/", Group/binary, "/", Topic/binary>>). -define(PERSISTENT_ROUTE_ID(Topic, ID), <<"$p/", ID/binary, "/", Topic/binary>>). + +-define(METRIC_NAME, cluster_link). + +-define(route_metric, 'routes'). diff --git a/apps/emqx_cluster_link/src/emqx_cluster_link.erl b/apps/emqx_cluster_link/src/emqx_cluster_link.erl index 76228c052..e3bc04a29 100644 --- a/apps/emqx_cluster_link/src/emqx_cluster_link.erl +++ b/apps/emqx_cluster_link/src/emqx_cluster_link.erl @@ -26,11 +26,29 @@ on_message_publish/1 ]). +%% metrics API +-export([ + maybe_create_metrics/1, + drop_metrics/1, + + get_metrics/1, + routes_inc/2 +]). + -include("emqx_cluster_link.hrl"). -include_lib("emqx/include/emqx.hrl"). -include_lib("emqx/include/emqx_hooks.hrl"). -include_lib("emqx/include/logger.hrl"). +%%-------------------------------------------------------------------- +%% Type definitions +%%-------------------------------------------------------------------- + +-define(METRICS, [ + ?route_metric +]). +-define(RATE_METRICS, []). + %%-------------------------------------------------------------------- %% emqx_external_broker API %%-------------------------------------------------------------------- @@ -132,6 +150,32 @@ put_hook() -> delete_hook() -> emqx_hooks:del('message.publish', {?MODULE, on_message_publish, []}). +%%-------------------------------------------------------------------- +%% metrics API +%%-------------------------------------------------------------------- + +get_metrics(ClusterName) -> + Nodes = emqx:running_nodes(), + Timeout = 15_000, + Results = emqx_metrics_proto_v2:get_metrics(Nodes, ?METRIC_NAME, ClusterName, Timeout), + sequence_multicall_results(Nodes, Results). + +maybe_create_metrics(ClusterName) -> + case emqx_metrics_worker:has_metrics(?METRIC_NAME, ClusterName) of + true -> + ok = emqx_metrics_worker:reset_metrics(?METRIC_NAME, ClusterName); + false -> + ok = emqx_metrics_worker:create_metrics( + ?METRIC_NAME, ClusterName, ?METRICS, ?RATE_METRICS + ) + end. + +drop_metrics(ClusterName) -> + ok = emqx_metrics_worker:clear_metrics(?METRIC_NAME, ClusterName). + +routes_inc(ClusterName, Val) -> + catch emqx_metrics_worker:inc(?METRIC_NAME, ClusterName, ?route_metric, Val). + %%-------------------------------------------------------------------- %% Internal functions %%-------------------------------------------------------------------- @@ -253,3 +297,16 @@ maybe_filter_incomming_msg(#message{topic = T} = Msg, ClusterName) -> true -> with_sender_name(Msg, ClusterName); false -> [] end. + +-spec sequence_multicall_results([node()], emqx_rpc:erpc_multicall(term())) -> + {ok, [{node(), term()}]} | {error, [term()]}. +sequence_multicall_results(Nodes, Results) -> + case lists:partition(fun is_ok/1, lists:zip(Nodes, Results)) of + {OkResults, []} -> + {ok, [{Node, Res} || {Node, {ok, Res}} <- OkResults]}; + {_OkResults, BadResults} -> + {error, BadResults} + end. + +is_ok({_Node, {ok, _}}) -> true; +is_ok(_) -> false. diff --git a/apps/emqx_cluster_link/src/emqx_cluster_link_api.erl b/apps/emqx_cluster_link/src/emqx_cluster_link_api.erl index a184ab36d..77a77610b 100644 --- a/apps/emqx_cluster_link/src/emqx_cluster_link_api.erl +++ b/apps/emqx_cluster_link/src/emqx_cluster_link_api.erl @@ -10,6 +10,7 @@ -include_lib("emqx_utils/include/emqx_utils_api.hrl"). -include_lib("emqx_resource/include/emqx_resource.hrl"). -include_lib("emqx/include/logger.hrl"). +-include("emqx_cluster_link.hrl"). -export([ api_spec/0, @@ -21,7 +22,8 @@ -export([ '/cluster/links'/2, - '/cluster/links/:name'/2 + '/cluster/links/:name'/2, + '/cluster/links/:name/metrics'/2 ]). -define(CONF_PATH, [cluster, links]). @@ -37,7 +39,8 @@ api_spec() -> paths() -> [ "/cluster/links", - "/cluster/links/:name" + "/cluster/links/:name", + "/cluster/links/:name/metrics" ]. schema("/cluster/links") -> @@ -113,6 +116,23 @@ schema("/cluster/links/:name") -> ) } } + }; +schema("/cluster/links/:name/metrics") -> + #{ + 'operationId' => '/cluster/links/:name/metrics', + get => + #{ + description => "Get a cluster link metrics", + tags => ?TAGS, + parameters => [param_path_name()], + responses => + #{ + 200 => link_metrics_schema_response(), + 404 => emqx_dashboard_swagger:error_codes( + [?NOT_FOUND], <<"Cluster link not found">> + ) + } + } }. fields(link_config_response) -> @@ -120,6 +140,24 @@ fields(link_config_response) -> {node, hoconsc:mk(binary(), #{desc => ?DESC("node")})}, {status, hoconsc:mk(status(), #{desc => ?DESC("status")})} | emqx_cluster_link_schema:fields("link") + ]; +fields(metrics) -> + [ + {metrics, hoconsc:mk(map(), #{desc => ?DESC("metrics")})} + ]; +fields(link_metrics_response) -> + [ + {node_metrics, + hoconsc:mk( + hoconsc:array(hoconsc:ref(?MODULE, node_metrics)), + #{desc => ?DESC("node_metrics")} + )} + | fields(metrics) + ]; +fields(node_metrics) -> + [ + {node, hoconsc:mk(atom(), #{desc => ?DESC("node")})} + | fields(metrics) ]. %%-------------------------------------------------------------------- @@ -154,6 +192,9 @@ fields(link_config_response) -> not_found() ). +'/cluster/links/:name/metrics'(get, #{bindings := #{name := Name}}) -> + with_link(Name, fun() -> handle_metrics(Name) end, not_found()). + %%-------------------------------------------------------------------- %% Internal funcs %%-------------------------------------------------------------------- @@ -185,6 +226,46 @@ handle_create(Name, Params) -> handle_lookup(Name, Link) -> ?OK(add_status(Name, Link)). +handle_metrics(Name) -> + case emqx_cluster_link:get_metrics(Name) of + {error, BadResults} -> + ?SLOG(warning, #{ + msg => "cluster_link_api_metrics_bad_erpc_results", + results => BadResults + }), + ?OK(#{metrics => #{}, node_metrics => []}); + {ok, NodeResults} -> + NodeMetrics = + lists:map( + fun({Node, Metrics}) -> + format_metrics(Node, Metrics) + end, + NodeResults + ), + AggregatedMetrics = aggregate_metrics(NodeMetrics), + Response = #{metrics => AggregatedMetrics, node_metrics => NodeMetrics}, + ?OK(Response) + end. + +aggregate_metrics(NodeMetrics) -> + ErrorLogger = fun(_) -> ok end, + lists:foldl( + fun(#{metrics := Metrics}, Acc) -> + emqx_utils_maps:best_effort_recursive_sum(Metrics, Acc, ErrorLogger) + end, + #{}, + NodeMetrics + ). + +format_metrics(Node, Metrics) -> + Routes = emqx_utils_maps:deep_get([counters, ?route_metric], Metrics, 0), + #{ + node => Node, + metrics => #{ + ?route_metric => Routes + } + }. + add_status(Name, Link) -> NodeResults = get_link_status_cluster(Name), Status = collect_single_status(NodeResults), @@ -330,6 +411,16 @@ link_config_schema_response() -> } ). +link_metrics_schema_response() -> + hoconsc:mk( + hoconsc:ref(?MODULE, link_metrics_response), + #{ + examples => #{ + <<"example">> => link_metrics_response_example() + } + } + ). + status() -> hoconsc:enum([?status_connected, ?status_disconnected, ?status_connecting, inconsistent]). @@ -392,6 +483,17 @@ links_config_example() -> } ]. +link_metrics_response_example() -> + #{ + <<"metrics">> => #{<<"routes">> => 10240}, + <<"node_metrics">> => [ + #{ + <<"node">> => <<"emqx1@emqx.net">>, + <<"metrics">> => #{<<"routes">> => 10240} + } + ] + }. + with_link(Name, FoundFn, NotFoundFn) -> case emqx_cluster_link_config:link_raw(Name) of undefined -> diff --git a/apps/emqx_cluster_link/src/emqx_cluster_link_app.erl b/apps/emqx_cluster_link/src/emqx_cluster_link_app.erl index 41f1a0a77..f9625fae4 100644 --- a/apps/emqx_cluster_link/src/emqx_cluster_link_app.erl +++ b/apps/emqx_cluster_link/src/emqx_cluster_link_app.erl @@ -22,7 +22,9 @@ start(_StartType, _StartArgs) -> _ -> ok end, - emqx_cluster_link_sup:start_link(LinksConf). + {ok, Sup} = emqx_cluster_link_sup:start_link(LinksConf), + ok = create_metrics(LinksConf), + {ok, Sup}. prep_stop(State) -> emqx_cluster_link_config:remove_handler(), @@ -53,3 +55,11 @@ remove_msg_fwd_resources(LinksConf) -> end, LinksConf ). + +create_metrics(LinksConf) -> + lists:foreach( + fun(#{name := ClusterName}) -> + ok = emqx_cluster_link:maybe_create_metrics(ClusterName) + end, + LinksConf + ). diff --git a/apps/emqx_cluster_link/src/emqx_cluster_link_config.erl b/apps/emqx_cluster_link/src/emqx_cluster_link_config.erl index 36655460b..0455ab21c 100644 --- a/apps/emqx_cluster_link/src/emqx_cluster_link_config.erl +++ b/apps/emqx_cluster_link/src/emqx_cluster_link_config.erl @@ -277,9 +277,10 @@ all_ok(Results) -> add_links(LinksConf) -> [add_link(Link) || Link <- LinksConf]. -add_link(#{enable := true} = LinkConf) -> +add_link(#{name := ClusterName, enable := true} = LinkConf) -> {ok, _Pid} = emqx_cluster_link_sup:ensure_actor(LinkConf), {ok, _} = emqx_cluster_link_mqtt:ensure_msg_fwd_resource(LinkConf), + ok = emqx_cluster_link:maybe_create_metrics(ClusterName), ok; add_link(_DisabledLinkConf) -> ok. @@ -289,7 +290,8 @@ remove_links(LinksConf) -> remove_link(Name) -> _ = emqx_cluster_link_mqtt:remove_msg_fwd_resource(Name), - ensure_actor_stopped(Name). + _ = ensure_actor_stopped(Name), + emqx_cluster_link:drop_metrics(Name). update_links(LinksConf) -> [update_link(Link) || Link <- LinksConf]. diff --git a/apps/emqx_cluster_link/src/emqx_cluster_link_extrouter.erl b/apps/emqx_cluster_link/src/emqx_cluster_link_extrouter.erl index 79d96e207..c45b12ae0 100644 --- a/apps/emqx_cluster_link/src/emqx_cluster_link_extrouter.erl +++ b/apps/emqx_cluster_link/src/emqx_cluster_link_extrouter.erl @@ -256,31 +256,35 @@ actor_apply_operation( apply_actor_operation(ActorID, Incarnation, Entry, OpName, Lane) -> _ = assert_current_incarnation(ActorID, Incarnation), - apply_operation(Entry, OpName, Lane). + apply_operation(ActorID, Entry, OpName, Lane). -apply_operation(Entry, OpName, Lane) -> +apply_operation(ActorID, Entry, OpName, Lane) -> %% NOTE %% This is safe sequence of operations only on core nodes. On replicants, %% `mria:dirty_update_counter/3` will be replicated asynchronously, which %% means this read can be stale. case mnesia:dirty_read(?EXTROUTE_TAB, Entry) of [#extroute{mcounter = MCounter}] -> - apply_operation(Entry, MCounter, OpName, Lane); + apply_operation(ActorID, Entry, MCounter, OpName, Lane); [] -> - apply_operation(Entry, 0, OpName, Lane) + apply_operation(ActorID, Entry, 0, OpName, Lane) end. -apply_operation(Entry, MCounter, OpName, Lane) -> +apply_operation(ActorID, Entry, MCounter, OpName, Lane) -> %% NOTE %% We are relying on the fact that changes to each individual lane of this %% multi-counter are synchronized. Without this, such counter updates would %% be unsafe. Instead, we would have to use another, more complex approach, %% that runs `ets:lookup/2` + `ets:select_replace/2` in a loop until the %% counter is updated accordingly. + ?ACTOR_ID(ClusterName, _Actor) = ActorID, Marker = 1 bsl Lane, case MCounter band Marker of 0 when OpName =:= add -> - mria:dirty_update_counter(?EXTROUTE_TAB, Entry, Marker); + Res = mria:dirty_update_counter(?EXTROUTE_TAB, Entry, Marker), + _ = emqx_cluster_link:routes_inc(ClusterName, 1), + ?tp("cluster_link_extrouter_route_added", #{}), + Res; Marker when OpName =:= add -> %% Already added. MCounter; @@ -289,6 +293,8 @@ apply_operation(Entry, MCounter, OpName, Lane) -> 0 -> Record = #extroute{entry = Entry, mcounter = 0}, ok = mria:dirty_delete_object(?EXTROUTE_TAB, Record), + _ = emqx_cluster_link:routes_inc(ClusterName, -1), + ?tp("cluster_link_extrouter_route_deleted", #{}), 0; C -> C @@ -362,16 +368,16 @@ clean_incarnation(Rec = #actor{id = {Cluster, Actor}}) -> mnesia_clean_incarnation(#actor{id = Actor, incarnation = Incarnation, lane = Lane}) -> case mnesia:read(?EXTROUTE_ACTOR_TAB, Actor, write) of [#actor{incarnation = Incarnation}] -> - _ = clean_lane(Lane), + _ = clean_lane(Actor, Lane), mnesia:delete(?EXTROUTE_ACTOR_TAB, Actor, write); _Renewed -> stale end. -clean_lane(Lane) -> +clean_lane(ActorID, Lane) -> ets:foldl( fun(#extroute{entry = Entry, mcounter = MCounter}, _) -> - apply_operation(Entry, MCounter, delete, Lane) + apply_operation(ActorID, Entry, MCounter, delete, Lane) end, 0, ?EXTROUTE_TAB diff --git a/apps/emqx_cluster_link/src/emqx_cluster_link_sup.erl b/apps/emqx_cluster_link/src/emqx_cluster_link_sup.erl index 2025510fc..81b5afb4c 100644 --- a/apps/emqx_cluster_link/src/emqx_cluster_link_sup.erl +++ b/apps/emqx_cluster_link/src/emqx_cluster_link_sup.erl @@ -6,6 +6,8 @@ -behaviour(supervisor). +-include("emqx_cluster_link.hrl"). + -export([start_link/1]). -export([ @@ -27,12 +29,13 @@ init(LinksConf) -> intensity => 10, period => 5 }, + Metrics = emqx_metrics_worker:child_spec(metrics, ?METRIC_NAME), ExtrouterGC = extrouter_gc_spec(), RouteActors = [ sup_spec(Name, ?ACTOR_MODULE, [LinkConf]) || #{name := Name} = LinkConf <- LinksConf ], - {ok, {SupFlags, [ExtrouterGC | RouteActors]}}. + {ok, {SupFlags, [Metrics, ExtrouterGC | RouteActors]}}. extrouter_gc_spec() -> %% NOTE: This one is currently global, not per-link. diff --git a/apps/emqx_cluster_link/test/emqx_cluster_link_api_SUITE.erl b/apps/emqx_cluster_link/test/emqx_cluster_link_api_SUITE.erl index 535e8521a..f4a15a62a 100644 --- a/apps/emqx_cluster_link/test/emqx_cluster_link_api_SUITE.erl +++ b/apps/emqx_cluster_link/test/emqx_cluster_link_api_SUITE.erl @@ -11,6 +11,8 @@ -include_lib("common_test/include/ct.hrl"). -include_lib("snabbkaffe/include/snabbkaffe.hrl"). +-import(emqx_common_test_helpers, [on_exit/1]). + -define(API_PATH, emqx_mgmt_api_test_util:api_path(["cluster", "links"])). -define(CONF_PATH, [cluster, links]). @@ -44,7 +46,21 @@ %%------------------------------------------------------------------------------ all() -> - emqx_common_test_helpers:all(?MODULE). + AllTCs = emqx_common_test_helpers:all(?MODULE), + OtherTCs = AllTCs -- cluster_test_cases(), + [ + {group, cluster} + | OtherTCs + ]. + +groups() -> + [{cluster, cluster_test_cases()}]. + +cluster_test_cases() -> + [ + t_status, + t_metrics + ]. init_per_suite(Config) -> %% This is called by emqx_machine in EMQX release @@ -66,30 +82,35 @@ end_per_suite(Config) -> emqx_config:delete_override_conf_files(), ok. -auth_header() -> - emqx_mgmt_api_test_util:auth_header_(). - -init_per_testcase(t_status = TestCase, Config) -> +init_per_group(cluster = Group, Config) -> ok = emqx_cth_suite:stop_apps([emqx_dashboard]), - SourceClusterSpec = emqx_cluster_link_SUITE:mk_source_cluster(TestCase, Config), - TargetClusterSpec = emqx_cluster_link_SUITE:mk_target_cluster(TestCase, Config), + SourceClusterSpec = emqx_cluster_link_SUITE:mk_source_cluster(Group, Config), + TargetClusterSpec = emqx_cluster_link_SUITE:mk_target_cluster(Group, Config), SourceNodes = [SN1 | _] = emqx_cth_cluster:start(SourceClusterSpec), - TargetNodes = emqx_cth_cluster:start(TargetClusterSpec), + TargetNodes = [TN1 | _] = emqx_cth_cluster:start(TargetClusterSpec), emqx_cluster_link_SUITE:start_cluster_link(SourceNodes ++ TargetNodes, Config), erpc:call(SN1, emqx_cth_suite, start_apps, [ [emqx_management, emqx_mgmt_api_test_util:emqx_dashboard()], - #{work_dir => emqx_cth_suite:work_dir(TestCase, Config)} + #{work_dir => emqx_cth_suite:work_dir(Group, Config)} + ]), + erpc:call(TN1, emqx_cth_suite, start_apps, [ + [ + emqx_management, + emqx_mgmt_api_test_util:emqx_dashboard( + "dashboard.listeners.http { enable = true, bind = 28083 }" + ) + ], + #{work_dir => emqx_cth_suite:work_dir(Group, Config)} ]), [ {source_nodes, SourceNodes}, {target_nodes, TargetNodes} | Config ]; -init_per_testcase(_TC, Config) -> - {ok, _} = emqx_cluster_link_config:update([]), +init_per_group(_Group, Config) -> Config. -end_per_testcase(t_status, Config) -> +end_per_group(cluster, Config) -> SourceNodes = ?config(source_nodes, Config), TargetNodes = ?config(target_nodes, Config), ok = emqx_cth_cluster:stop(SourceNodes), @@ -99,7 +120,20 @@ end_per_testcase(t_status, Config) -> #{work_dir => emqx_cth_suite:work_dir(Config)} ), ok; +end_per_group(_Group, _Config) -> + ok. + +auth_header() -> + emqx_mgmt_api_test_util:auth_header_(). + +init_per_testcase(_TC, Config) -> + {ok, _} = emqx_cluster_link_config:update([]), + snabbkaffe:start_trace(), + Config. + end_per_testcase(_TC, _Config) -> + snabbkaffe:stop(), + emqx_common_test_helpers:call_janitor(), ok. %%------------------------------------------------------------------------------ @@ -114,7 +148,11 @@ list() -> emqx_mgmt_api_test_util:simple_request(get, Path, _Params = ""). get_link(Name) -> - Path = emqx_mgmt_api_test_util:api_path([api_root(), Name]), + get_link(source, Name). + +get_link(SourceOrTargetCluster, Name) -> + Host = host(SourceOrTargetCluster), + Path = emqx_mgmt_api_test_util:api_path(Host, [api_root(), Name]), emqx_mgmt_api_test_util:simple_request(get, Path, _Params = ""). delete_link(Name) -> @@ -122,7 +160,11 @@ delete_link(Name) -> emqx_mgmt_api_test_util:simple_request(delete, Path, _Params = ""). update_link(Name, Params) -> - Path = emqx_mgmt_api_test_util:api_path([api_root(), Name]), + update_link(source, Name, Params). + +update_link(SourceOrTargetCluster, Name, Params) -> + Host = host(SourceOrTargetCluster), + Path = emqx_mgmt_api_test_util:api_path(Host, [api_root(), Name]), emqx_mgmt_api_test_util:simple_request(put, Path, Params). create_link(Name, Params0) -> @@ -130,6 +172,17 @@ create_link(Name, Params0) -> Path = emqx_mgmt_api_test_util:api_path([api_root()]), emqx_mgmt_api_test_util:simple_request(post, Path, Params). +get_metrics(Name) -> + get_metrics(source, Name). + +get_metrics(SourceOrTargetCluster, Name) -> + Host = host(SourceOrTargetCluster), + Path = emqx_mgmt_api_test_util:api_path(Host, [api_root(), Name, "metrics"]), + emqx_mgmt_api_test_util:simple_request(get, Path, _Params = []). + +host(source) -> "http://127.0.0.1:18083"; +host(target) -> "http://127.0.0.1:28083". + link_params() -> link_params(_Overrides = #{}). @@ -194,6 +247,7 @@ t_crud(_Config) -> ?assertMatch({404, _}, get_link(NameA)), ?assertMatch({404, _}, delete_link(NameA)), ?assertMatch({404, _}, update_link(NameA, link_params())), + ?assertMatch({404, _}, get_metrics(NameA)), Params1 = link_params(), ?assertMatch( @@ -223,6 +277,7 @@ t_crud(_Config) -> }}, get_link(NameA) ), + ?assertMatch({200, _}, get_metrics(NameA)), Params2 = Params1#{<<"pool_size">> := 2}, ?assertMatch( @@ -238,6 +293,7 @@ t_crud(_Config) -> ?assertMatch({404, _}, delete_link(NameA)), ?assertMatch({404, _}, get_link(NameA)), ?assertMatch({404, _}, update_link(NameA, Params1)), + ?assertMatch({404, _}, get_metrics(NameA)), ?assertMatch({200, []}, list()), ok. @@ -298,6 +354,7 @@ t_status(Config) -> [Res1, {ok, {ok, Res2B}} | Rest] end) end), + on_exit(fun() -> catch ?ON(SN1, meck:unload()) end), ?assertMatch( {200, [ #{ @@ -385,3 +442,183 @@ t_status(Config) -> ), ok. + +t_metrics(Config) -> + ct:timetrap({seconds, 10}), + [SN1, SN2] = ?config(source_nodes, Config), + [TN1, TN2] = ?config(target_nodes, Config), + %% N.B. Link names on each cluster, so they are switched. + SourceName = <<"cl.target">>, + TargetName = <<"cl.source">>, + + ?assertMatch( + {200, #{ + <<"metrics">> := #{<<"routes">> := 0}, + <<"node_metrics">> := [ + #{ + <<"node">> := _, + <<"metrics">> := #{<<"routes">> := 0} + }, + #{ + <<"node">> := _, + <<"metrics">> := #{<<"routes">> := 0} + } + ] + }}, + get_metrics(source, SourceName) + ), + ?assertMatch( + {200, #{ + <<"metrics">> := #{<<"routes">> := 0}, + <<"node_metrics">> := [ + #{ + <<"node">> := _, + <<"metrics">> := #{<<"routes">> := 0} + }, + #{ + <<"node">> := _, + <<"metrics">> := #{<<"routes">> := 0} + } + ] + }}, + get_metrics(target, TargetName) + ), + + SourceC1 = emqx_cluster_link_SUITE:start_client(<<"sc1">>, SN1), + SourceC2 = emqx_cluster_link_SUITE:start_client(<<"sc2">>, SN2), + {ok, _, _} = emqtt:subscribe(SourceC1, <<"t/sc1">>), + {ok, _, _} = emqtt:subscribe(SourceC2, <<"t/sc2">>), + + %% Still no routes, as routes in the source cluster are replicated to the target + %% cluster. + ?assertMatch( + {200, #{ + <<"metrics">> := #{<<"routes">> := 0}, + <<"node_metrics">> := [ + #{ + <<"node">> := _, + <<"metrics">> := #{<<"routes">> := 0} + }, + #{ + <<"node">> := _, + <<"metrics">> := #{<<"routes">> := 0} + } + ] + }}, + get_metrics(source, SourceName) + ), + ?assertMatch( + {200, #{ + <<"metrics">> := #{<<"routes">> := 0}, + <<"node_metrics">> := [ + #{ + <<"node">> := _, + <<"metrics">> := #{<<"routes">> := 0} + }, + #{ + <<"node">> := _, + <<"metrics">> := #{<<"routes">> := 0} + } + ] + }}, + get_metrics(target, TargetName) + ), + + TargetC1 = emqx_cluster_link_SUITE:start_client(<<"tc1">>, TN1), + TargetC2 = emqx_cluster_link_SUITE:start_client(<<"tc2">>, TN2), + {ok, _, _} = emqtt:subscribe(TargetC1, <<"t/tc1">>), + {ok, _, _} = emqtt:subscribe(TargetC2, <<"t/tc2">>), + {_, {ok, _}} = + ?wait_async_action( + begin + {ok, _, _} = emqtt:subscribe(TargetC1, <<"t/tc1">>), + {ok, _, _} = emqtt:subscribe(TargetC2, <<"t/tc2">>) + end, + #{?snk_kind := clink_route_sync_complete} + ), + + %% Routes = 2 in source cluster, because the target cluster has some topic filters + %% configured and subscribers to them, which were replicated to the source cluster. + ?assertMatch( + {200, #{ + <<"metrics">> := #{<<"routes">> := 2}, + <<"node_metrics">> := _ + }}, + get_metrics(source, SourceName) + ), + ?assertMatch( + {200, #{ + <<"metrics">> := #{<<"routes">> := 0}, + <<"node_metrics">> := _ + }}, + get_metrics(target, TargetName) + ), + + %% Unsubscribe and remove route. + ct:pal("unsubscribing"), + {_, {ok, _}} = + ?wait_async_action( + begin + {ok, _, _} = emqtt:unsubscribe(TargetC1, <<"t/tc1">>) + end, + #{?snk_kind := clink_route_sync_complete} + ), + + ?assertMatch( + {200, #{ + <<"metrics">> := #{<<"routes">> := 1}, + <<"node_metrics">> := _ + }}, + get_metrics(source, SourceName) + ), + + %% Disabling the link should remove the routes. + ct:pal("disabling"), + {200, TargetLink0} = get_link(target, TargetName), + TargetLink1 = maps:without([<<"status">>, <<"node_status">>], TargetLink0), + TargetLink2 = TargetLink1#{<<"enable">> := false}, + {_, {ok, _}} = + ?wait_async_action( + begin + {200, _} = update_link(target, TargetName, TargetLink2), + %% Note that only when the GC runs and collects the stopped actor it'll actually + %% remove the routes + NowMS = erlang:system_time(millisecond), + TTL = emqx_cluster_link_config:actor_ttl(), + ct:pal("gc"), + %% 2 Actors: one for normal routes, one for PS routes + 1 = ?ON(SN1, emqx_cluster_link_extrouter:actor_gc(#{timestamp => NowMS + TTL * 3})), + 1 = ?ON(SN1, emqx_cluster_link_extrouter:actor_gc(#{timestamp => NowMS + TTL * 3})), + ct:pal("gc done"), + ok + end, + #{?snk_kind := "cluster_link_extrouter_route_deleted"} + ), + + ?assertMatch( + {200, #{ + <<"metrics">> := #{<<"routes">> := 0}, + <<"node_metrics">> := _ + }}, + get_metrics(source, SourceName) + ), + + %% Enabling again + TargetLink3 = TargetLink2#{<<"enable">> := true}, + {_, {ok, _}} = + ?wait_async_action( + begin + {200, _} = update_link(target, TargetName, TargetLink3) + end, + #{?snk_kind := "cluster_link_extrouter_route_added"} + ), + + ?assertMatch( + {200, #{ + <<"metrics">> := #{<<"routes">> := 1}, + <<"node_metrics">> := _ + }}, + get_metrics(source, SourceName) + ), + + ok. From d9832252d8f1cafe30796fe555a4da088421359a Mon Sep 17 00:00:00 2001 From: Thales Macedo Garitezi Date: Mon, 22 Jul 2024 09:55:16 -0300 Subject: [PATCH 05/19] refactor: add namespace to avoid clashes with operations or other resources --- .../src/emqx_cluster_link_api.erl | 24 +++++++++---------- .../test/emqx_cluster_link_api_SUITE.erl | 8 +++---- 2 files changed, 16 insertions(+), 16 deletions(-) diff --git a/apps/emqx_cluster_link/src/emqx_cluster_link_api.erl b/apps/emqx_cluster_link/src/emqx_cluster_link_api.erl index 77a77610b..7e70a9ccc 100644 --- a/apps/emqx_cluster_link/src/emqx_cluster_link_api.erl +++ b/apps/emqx_cluster_link/src/emqx_cluster_link_api.erl @@ -22,8 +22,8 @@ -export([ '/cluster/links'/2, - '/cluster/links/:name'/2, - '/cluster/links/:name/metrics'/2 + '/cluster/links/link/:name'/2, + '/cluster/links/link/:name/metrics'/2 ]). -define(CONF_PATH, [cluster, links]). @@ -39,8 +39,8 @@ api_spec() -> paths() -> [ "/cluster/links", - "/cluster/links/:name", - "/cluster/links/:name/metrics" + "/cluster/links/link/:name", + "/cluster/links/link/:name/metrics" ]. schema("/cluster/links") -> @@ -69,9 +69,9 @@ schema("/cluster/links") -> } } }; -schema("/cluster/links/:name") -> +schema("/cluster/links/link/:name") -> #{ - 'operationId' => '/cluster/links/:name', + 'operationId' => '/cluster/links/link/:name', get => #{ description => "Get a cluster link configuration", @@ -117,9 +117,9 @@ schema("/cluster/links/:name") -> } } }; -schema("/cluster/links/:name/metrics") -> +schema("/cluster/links/link/:name/metrics") -> #{ - 'operationId' => '/cluster/links/:name/metrics', + 'operationId' => '/cluster/links/link/:name/metrics', get => #{ description => "Get a cluster link metrics", @@ -173,11 +173,11 @@ fields(node_metrics) -> fun() -> handle_create(Name, Body) end ). -'/cluster/links/:name'(get, #{bindings := #{name := Name}}) -> +'/cluster/links/link/:name'(get, #{bindings := #{name := Name}}) -> with_link(Name, fun(Link) -> handle_lookup(Name, Link) end, not_found()); -'/cluster/links/:name'(put, #{bindings := #{name := Name}, body := Params0}) -> +'/cluster/links/link/:name'(put, #{bindings := #{name := Name}, body := Params0}) -> with_link(Name, fun() -> handle_update(Name, Params0) end, not_found()); -'/cluster/links/:name'(delete, #{bindings := #{name := Name}}) -> +'/cluster/links/link/:name'(delete, #{bindings := #{name := Name}}) -> with_link( Name, fun() -> @@ -192,7 +192,7 @@ fields(node_metrics) -> not_found() ). -'/cluster/links/:name/metrics'(get, #{bindings := #{name := Name}}) -> +'/cluster/links/link/:name/metrics'(get, #{bindings := #{name := Name}}) -> with_link(Name, fun() -> handle_metrics(Name) end, not_found()). %%-------------------------------------------------------------------- diff --git a/apps/emqx_cluster_link/test/emqx_cluster_link_api_SUITE.erl b/apps/emqx_cluster_link/test/emqx_cluster_link_api_SUITE.erl index f4a15a62a..27cbe7532 100644 --- a/apps/emqx_cluster_link/test/emqx_cluster_link_api_SUITE.erl +++ b/apps/emqx_cluster_link/test/emqx_cluster_link_api_SUITE.erl @@ -152,11 +152,11 @@ get_link(Name) -> get_link(SourceOrTargetCluster, Name) -> Host = host(SourceOrTargetCluster), - Path = emqx_mgmt_api_test_util:api_path(Host, [api_root(), Name]), + Path = emqx_mgmt_api_test_util:api_path(Host, [api_root(), "link", Name]), emqx_mgmt_api_test_util:simple_request(get, Path, _Params = ""). delete_link(Name) -> - Path = emqx_mgmt_api_test_util:api_path([api_root(), Name]), + Path = emqx_mgmt_api_test_util:api_path([api_root(), "link", Name]), emqx_mgmt_api_test_util:simple_request(delete, Path, _Params = ""). update_link(Name, Params) -> @@ -164,7 +164,7 @@ update_link(Name, Params) -> update_link(SourceOrTargetCluster, Name, Params) -> Host = host(SourceOrTargetCluster), - Path = emqx_mgmt_api_test_util:api_path(Host, [api_root(), Name]), + Path = emqx_mgmt_api_test_util:api_path(Host, [api_root(), "link", Name]), emqx_mgmt_api_test_util:simple_request(put, Path, Params). create_link(Name, Params0) -> @@ -177,7 +177,7 @@ get_metrics(Name) -> get_metrics(SourceOrTargetCluster, Name) -> Host = host(SourceOrTargetCluster), - Path = emqx_mgmt_api_test_util:api_path(Host, [api_root(), Name, "metrics"]), + Path = emqx_mgmt_api_test_util:api_path(Host, [api_root(), "link", Name, "metrics"]), emqx_mgmt_api_test_util:simple_request(get, Path, _Params = []). host(source) -> "http://127.0.0.1:18083"; From ca2d4ad2a0ac86230226713e927c0fede081664a Mon Sep 17 00:00:00 2001 From: Thales Macedo Garitezi Date: Wed, 24 Jul 2024 10:04:27 -0300 Subject: [PATCH 06/19] refactor: move metrics logic to separate module --- .../src/emqx_cluster_link.erl | 57 ---------------- .../src/emqx_cluster_link_api.erl | 2 +- .../src/emqx_cluster_link_app.erl | 2 +- .../src/emqx_cluster_link_config.erl | 4 +- .../src/emqx_cluster_link_extrouter.erl | 4 +- .../src/emqx_cluster_link_metrics.erl | 67 +++++++++++++++++++ 6 files changed, 73 insertions(+), 63 deletions(-) create mode 100644 apps/emqx_cluster_link/src/emqx_cluster_link_metrics.erl diff --git a/apps/emqx_cluster_link/src/emqx_cluster_link.erl b/apps/emqx_cluster_link/src/emqx_cluster_link.erl index e3bc04a29..76228c052 100644 --- a/apps/emqx_cluster_link/src/emqx_cluster_link.erl +++ b/apps/emqx_cluster_link/src/emqx_cluster_link.erl @@ -26,29 +26,11 @@ on_message_publish/1 ]). -%% metrics API --export([ - maybe_create_metrics/1, - drop_metrics/1, - - get_metrics/1, - routes_inc/2 -]). - -include("emqx_cluster_link.hrl"). -include_lib("emqx/include/emqx.hrl"). -include_lib("emqx/include/emqx_hooks.hrl"). -include_lib("emqx/include/logger.hrl"). -%%-------------------------------------------------------------------- -%% Type definitions -%%-------------------------------------------------------------------- - --define(METRICS, [ - ?route_metric -]). --define(RATE_METRICS, []). - %%-------------------------------------------------------------------- %% emqx_external_broker API %%-------------------------------------------------------------------- @@ -150,32 +132,6 @@ put_hook() -> delete_hook() -> emqx_hooks:del('message.publish', {?MODULE, on_message_publish, []}). -%%-------------------------------------------------------------------- -%% metrics API -%%-------------------------------------------------------------------- - -get_metrics(ClusterName) -> - Nodes = emqx:running_nodes(), - Timeout = 15_000, - Results = emqx_metrics_proto_v2:get_metrics(Nodes, ?METRIC_NAME, ClusterName, Timeout), - sequence_multicall_results(Nodes, Results). - -maybe_create_metrics(ClusterName) -> - case emqx_metrics_worker:has_metrics(?METRIC_NAME, ClusterName) of - true -> - ok = emqx_metrics_worker:reset_metrics(?METRIC_NAME, ClusterName); - false -> - ok = emqx_metrics_worker:create_metrics( - ?METRIC_NAME, ClusterName, ?METRICS, ?RATE_METRICS - ) - end. - -drop_metrics(ClusterName) -> - ok = emqx_metrics_worker:clear_metrics(?METRIC_NAME, ClusterName). - -routes_inc(ClusterName, Val) -> - catch emqx_metrics_worker:inc(?METRIC_NAME, ClusterName, ?route_metric, Val). - %%-------------------------------------------------------------------- %% Internal functions %%-------------------------------------------------------------------- @@ -297,16 +253,3 @@ maybe_filter_incomming_msg(#message{topic = T} = Msg, ClusterName) -> true -> with_sender_name(Msg, ClusterName); false -> [] end. - --spec sequence_multicall_results([node()], emqx_rpc:erpc_multicall(term())) -> - {ok, [{node(), term()}]} | {error, [term()]}. -sequence_multicall_results(Nodes, Results) -> - case lists:partition(fun is_ok/1, lists:zip(Nodes, Results)) of - {OkResults, []} -> - {ok, [{Node, Res} || {Node, {ok, Res}} <- OkResults]}; - {_OkResults, BadResults} -> - {error, BadResults} - end. - -is_ok({_Node, {ok, _}}) -> true; -is_ok(_) -> false. diff --git a/apps/emqx_cluster_link/src/emqx_cluster_link_api.erl b/apps/emqx_cluster_link/src/emqx_cluster_link_api.erl index 7e70a9ccc..0d748a267 100644 --- a/apps/emqx_cluster_link/src/emqx_cluster_link_api.erl +++ b/apps/emqx_cluster_link/src/emqx_cluster_link_api.erl @@ -227,7 +227,7 @@ handle_lookup(Name, Link) -> ?OK(add_status(Name, Link)). handle_metrics(Name) -> - case emqx_cluster_link:get_metrics(Name) of + case emqx_cluster_link_metrics:get_metrics(Name) of {error, BadResults} -> ?SLOG(warning, #{ msg => "cluster_link_api_metrics_bad_erpc_results", diff --git a/apps/emqx_cluster_link/src/emqx_cluster_link_app.erl b/apps/emqx_cluster_link/src/emqx_cluster_link_app.erl index f9625fae4..9502ad1c3 100644 --- a/apps/emqx_cluster_link/src/emqx_cluster_link_app.erl +++ b/apps/emqx_cluster_link/src/emqx_cluster_link_app.erl @@ -59,7 +59,7 @@ remove_msg_fwd_resources(LinksConf) -> create_metrics(LinksConf) -> lists:foreach( fun(#{name := ClusterName}) -> - ok = emqx_cluster_link:maybe_create_metrics(ClusterName) + ok = emqx_cluster_link_metrics:maybe_create_metrics(ClusterName) end, LinksConf ). diff --git a/apps/emqx_cluster_link/src/emqx_cluster_link_config.erl b/apps/emqx_cluster_link/src/emqx_cluster_link_config.erl index 0455ab21c..5e257a247 100644 --- a/apps/emqx_cluster_link/src/emqx_cluster_link_config.erl +++ b/apps/emqx_cluster_link/src/emqx_cluster_link_config.erl @@ -280,7 +280,7 @@ add_links(LinksConf) -> add_link(#{name := ClusterName, enable := true} = LinkConf) -> {ok, _Pid} = emqx_cluster_link_sup:ensure_actor(LinkConf), {ok, _} = emqx_cluster_link_mqtt:ensure_msg_fwd_resource(LinkConf), - ok = emqx_cluster_link:maybe_create_metrics(ClusterName), + ok = emqx_cluster_link_metrics:maybe_create_metrics(ClusterName), ok; add_link(_DisabledLinkConf) -> ok. @@ -291,7 +291,7 @@ remove_links(LinksConf) -> remove_link(Name) -> _ = emqx_cluster_link_mqtt:remove_msg_fwd_resource(Name), _ = ensure_actor_stopped(Name), - emqx_cluster_link:drop_metrics(Name). + emqx_cluster_link_metrics:drop_metrics(Name). update_links(LinksConf) -> [update_link(Link) || Link <- LinksConf]. diff --git a/apps/emqx_cluster_link/src/emqx_cluster_link_extrouter.erl b/apps/emqx_cluster_link/src/emqx_cluster_link_extrouter.erl index c45b12ae0..3e2ff1804 100644 --- a/apps/emqx_cluster_link/src/emqx_cluster_link_extrouter.erl +++ b/apps/emqx_cluster_link/src/emqx_cluster_link_extrouter.erl @@ -282,7 +282,7 @@ apply_operation(ActorID, Entry, MCounter, OpName, Lane) -> case MCounter band Marker of 0 when OpName =:= add -> Res = mria:dirty_update_counter(?EXTROUTE_TAB, Entry, Marker), - _ = emqx_cluster_link:routes_inc(ClusterName, 1), + _ = emqx_cluster_link_metrics:routes_inc(ClusterName, 1), ?tp("cluster_link_extrouter_route_added", #{}), Res; Marker when OpName =:= add -> @@ -293,7 +293,7 @@ apply_operation(ActorID, Entry, MCounter, OpName, Lane) -> 0 -> Record = #extroute{entry = Entry, mcounter = 0}, ok = mria:dirty_delete_object(?EXTROUTE_TAB, Record), - _ = emqx_cluster_link:routes_inc(ClusterName, -1), + _ = emqx_cluster_link_metrics:routes_inc(ClusterName, -1), ?tp("cluster_link_extrouter_route_deleted", #{}), 0; C -> diff --git a/apps/emqx_cluster_link/src/emqx_cluster_link_metrics.erl b/apps/emqx_cluster_link/src/emqx_cluster_link_metrics.erl new file mode 100644 index 000000000..3d6f1edc8 --- /dev/null +++ b/apps/emqx_cluster_link/src/emqx_cluster_link_metrics.erl @@ -0,0 +1,67 @@ +%%-------------------------------------------------------------------- +%% Copyright (c) 2024 EMQ Technologies Co., Ltd. All Rights Reserved. +%%-------------------------------------------------------------------- +-module(emqx_cluster_link_metrics). + +-include("emqx_cluster_link.hrl"). + +%% API +-export([ + maybe_create_metrics/1, + drop_metrics/1, + + get_metrics/1, + routes_inc/2 +]). + +%%-------------------------------------------------------------------- +%% Type definitions +%%-------------------------------------------------------------------- + +-define(METRICS, [ + ?route_metric +]). +-define(RATE_METRICS, []). + +%%-------------------------------------------------------------------- +%% metrics API +%%-------------------------------------------------------------------- + +get_metrics(ClusterName) -> + Nodes = emqx:running_nodes(), + Timeout = 15_000, + Results = emqx_metrics_proto_v2:get_metrics(Nodes, ?METRIC_NAME, ClusterName, Timeout), + sequence_multicall_results(Nodes, Results). + +maybe_create_metrics(ClusterName) -> + case emqx_metrics_worker:has_metrics(?METRIC_NAME, ClusterName) of + true -> + ok = emqx_metrics_worker:reset_metrics(?METRIC_NAME, ClusterName); + false -> + ok = emqx_metrics_worker:create_metrics( + ?METRIC_NAME, ClusterName, ?METRICS, ?RATE_METRICS + ) + end. + +drop_metrics(ClusterName) -> + ok = emqx_metrics_worker:clear_metrics(?METRIC_NAME, ClusterName). + +routes_inc(ClusterName, Val) -> + catch emqx_metrics_worker:inc(?METRIC_NAME, ClusterName, ?route_metric, Val). + +%%-------------------------------------------------------------------- +%% Internal functions +%%-------------------------------------------------------------------- + +-spec sequence_multicall_results([node()], emqx_rpc:erpc_multicall(term())) -> + {ok, [{node(), term()}]} | {error, [term()]}. +sequence_multicall_results(Nodes, Results) -> + case lists:partition(fun is_ok/1, lists:zip(Nodes, Results)) of + {OkResults, []} -> + {ok, [{Node, Res} || {Node, {ok, Res}} <- OkResults]}; + {_OkResults, BadResults} -> + {error, BadResults} + end. + +is_ok({_Node, {ok, _}}) -> true; +is_ok(_) -> false. From 216a6abed9e8d4d04f4b4df23b4958f2649ede8f Mon Sep 17 00:00:00 2001 From: Thales Macedo Garitezi Date: Wed, 24 Jul 2024 10:10:43 -0300 Subject: [PATCH 07/19] refactor: rename CRUD functions --- .../src/emqx_cluster_link_api.erl | 6 +++--- .../src/emqx_cluster_link_config.erl | 18 +++++++++--------- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/apps/emqx_cluster_link/src/emqx_cluster_link_api.erl b/apps/emqx_cluster_link/src/emqx_cluster_link_api.erl index 0d748a267..552e7d6d3 100644 --- a/apps/emqx_cluster_link/src/emqx_cluster_link_api.erl +++ b/apps/emqx_cluster_link/src/emqx_cluster_link_api.erl @@ -181,7 +181,7 @@ fields(node_metrics) -> with_link( Name, fun() -> - case emqx_cluster_link_config:delete(Name) of + case emqx_cluster_link_config:delete_link(Name) of ok -> ?NO_CONTENT; {error, Reason} -> @@ -215,7 +215,7 @@ handle_list() -> ?OK(Response). handle_create(Name, Params) -> - case emqx_cluster_link_config:create(Params) of + case emqx_cluster_link_config:create_link(Params) of {ok, Link} -> ?CREATED(add_status(Name, Link)); {error, Reason} -> @@ -273,7 +273,7 @@ add_status(Name, Link) -> handle_update(Name, Params0) -> Params = Params0#{<<"name">> => Name}, - case emqx_cluster_link_config:update_one_link(Params) of + case emqx_cluster_link_config:update_link(Params) of {ok, Link} -> ?OK(add_status(Name, Link)); {error, Reason} -> diff --git a/apps/emqx_cluster_link/src/emqx_cluster_link_config.erl b/apps/emqx_cluster_link/src/emqx_cluster_link_config.erl index 5e257a247..c35b218db 100644 --- a/apps/emqx_cluster_link/src/emqx_cluster_link_config.erl +++ b/apps/emqx_cluster_link/src/emqx_cluster_link_config.erl @@ -30,9 +30,9 @@ -export([ %% General - create/1, - delete/1, - update_one_link/1, + create_link/1, + delete_link/1, + update_link/1, update/1, cluster/0, enabled_links/0, @@ -61,7 +61,7 @@ %% -create(LinkConfig) -> +create_link(LinkConfig) -> #{<<"name">> := Name} = LinkConfig, case emqx_conf:update( @@ -77,7 +77,7 @@ create(LinkConfig) -> {error, Reason} end. -delete(Name) -> +delete_link(Name) -> case emqx_conf:update( ?LINKS_PATH, @@ -91,7 +91,7 @@ delete(Name) -> {error, Reason} end. -update_one_link(LinkConfig) -> +update_link(LinkConfig) -> #{<<"name">> := Name} = LinkConfig, case emqx_conf:update( @@ -294,9 +294,9 @@ remove_link(Name) -> emqx_cluster_link_metrics:drop_metrics(Name). update_links(LinksConf) -> - [update_link(Link) || Link <- LinksConf]. + [do_update_link(Link) || Link <- LinksConf]. -update_link({OldLinkConf, #{enable := true, name := Name} = NewLinkConf}) -> +do_update_link({OldLinkConf, #{enable := true, name := Name} = NewLinkConf}) -> case what_is_changed(OldLinkConf, NewLinkConf) of both -> _ = ensure_actor_stopped(Name), @@ -309,7 +309,7 @@ update_link({OldLinkConf, #{enable := true, name := Name} = NewLinkConf}) -> msg_resource -> ok = update_msg_fwd_resource(OldLinkConf, NewLinkConf) end; -update_link({_OldLinkConf, #{enable := false, name := Name} = _NewLinkConf}) -> +do_update_link({_OldLinkConf, #{enable := false, name := Name} = _NewLinkConf}) -> _ = emqx_cluster_link_mqtt:remove_msg_fwd_resource(Name), ensure_actor_stopped(Name). From 2d507146ab3ff5669ff45363d69a09d21526ec82 Mon Sep 17 00:00:00 2001 From: Thales Macedo Garitezi Date: Wed, 24 Jul 2024 10:13:48 -0300 Subject: [PATCH 08/19] refactor: change style of case clause --- apps/emqx_cluster_link/src/emqx_cluster_link_api.erl | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/apps/emqx_cluster_link/src/emqx_cluster_link_api.erl b/apps/emqx_cluster_link/src/emqx_cluster_link_api.erl index 552e7d6d3..8e84b376b 100644 --- a/apps/emqx_cluster_link/src/emqx_cluster_link_api.erl +++ b/apps/emqx_cluster_link/src/emqx_cluster_link_api.erl @@ -498,13 +498,11 @@ with_link(Name, FoundFn, NotFoundFn) -> case emqx_cluster_link_config:link_raw(Name) of undefined -> NotFoundFn(); - Link0 = #{} -> + Link0 = #{} when is_function(FoundFn, 1) -> Link = fill_defaults_single(Link0), - {arity, Arity} = erlang:fun_info(FoundFn, arity), - case Arity of - 1 -> FoundFn(Link); - 0 -> FoundFn() - end + FoundFn(Link); + _Link = #{} when is_function(FoundFn, 0) -> + FoundFn() end. fill_defaults_single(Link0) -> From 82bb876de060811de030c34d565b197289ae0980 Mon Sep 17 00:00:00 2001 From: Thales Macedo Garitezi Date: Wed, 24 Jul 2024 10:15:01 -0300 Subject: [PATCH 09/19] docs: improve descriptions Co-authored-by: Andrew Mayorov --- apps/emqx_cluster_link/src/emqx_cluster_link_api.erl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/apps/emqx_cluster_link/src/emqx_cluster_link_api.erl b/apps/emqx_cluster_link/src/emqx_cluster_link_api.erl index 8e84b376b..5eca0a944 100644 --- a/apps/emqx_cluster_link/src/emqx_cluster_link_api.erl +++ b/apps/emqx_cluster_link/src/emqx_cluster_link_api.erl @@ -55,7 +55,7 @@ schema("/cluster/links") -> }, post => #{ - description => "Create a cluster link configuration", + description => "Create a cluster link", tags => ?TAGS, 'requestBody' => link_config_schema(), responses => @@ -87,7 +87,7 @@ schema("/cluster/links/link/:name") -> }, delete => #{ - description => "Delete a cluster link configuration", + description => "Delete a cluster link", tags => ?TAGS, parameters => [param_path_name()], responses => From 76e51fa532c1b81db9a6bdbb5d157e7426b42077 Mon Sep 17 00:00:00 2001 From: Thales Macedo Garitezi Date: Wed, 24 Jul 2024 10:17:45 -0300 Subject: [PATCH 10/19] fix: correctly use maybe match clause --- apps/emqx_cluster_link/src/emqx_cluster_link_config.erl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/apps/emqx_cluster_link/src/emqx_cluster_link_config.erl b/apps/emqx_cluster_link/src/emqx_cluster_link_config.erl index c35b218db..2a97f2d69 100644 --- a/apps/emqx_cluster_link/src/emqx_cluster_link_config.erl +++ b/apps/emqx_cluster_link/src/emqx_cluster_link_config.erl @@ -208,7 +208,7 @@ pre_config_update(?LINKS_PATH, {create, LinkRawConf}, OldRawConf) -> pre_config_update(?LINKS_PATH, {update, LinkRawConf}, OldRawConf) -> #{<<"name">> := Name} = LinkRawConf, maybe - {ok, {_Found, Front, Rear}} = safe_take(Name, OldRawConf), + {_Found, Front, Rear} ?= safe_take(Name, OldRawConf), NewRawConf0 = Front ++ [LinkRawConf] ++ Rear, NewRawConf = convert_certs(maybe_increment_ps_actor_incr(NewRawConf0, OldRawConf)), {ok, NewRawConf} @@ -218,7 +218,7 @@ pre_config_update(?LINKS_PATH, {update, LinkRawConf}, OldRawConf) -> end; pre_config_update(?LINKS_PATH, {delete, Name}, OldRawConf) -> maybe - {ok, {_Found, Front, Rear}} = safe_take(Name, OldRawConf), + {_Found, Front, Rear} ?= safe_take(Name, OldRawConf), NewRawConf = Front ++ Rear, {ok, NewRawConf} else @@ -420,5 +420,5 @@ safe_take(Name, Transformations) -> {_Front, []} -> not_found; {Front, [Found | Rear]} -> - {ok, {Found, Front, Rear}} + {Found, Front, Rear} end. From 79db2e6d7faaa9a96e9da4acb9d6f9ddfba93f1a Mon Sep 17 00:00:00 2001 From: Thales Macedo Garitezi Date: Wed, 24 Jul 2024 10:28:11 -0300 Subject: [PATCH 11/19] test: fix flaky test --- .../test/emqx_cluster_link_api_SUITE.erl | 38 ++++++++++--------- 1 file changed, 21 insertions(+), 17 deletions(-) diff --git a/apps/emqx_cluster_link/test/emqx_cluster_link_api_SUITE.erl b/apps/emqx_cluster_link/test/emqx_cluster_link_api_SUITE.erl index 27cbe7532..0e5143865 100644 --- a/apps/emqx_cluster_link/test/emqx_cluster_link_api_SUITE.erl +++ b/apps/emqx_cluster_link/test/emqx_cluster_link_api_SUITE.erl @@ -303,23 +303,27 @@ t_crud(_Config) -> t_status(Config) -> [SN1 | _] = ?config(source_nodes, Config), Name = <<"cl.target">>, - ?assertMatch( - {200, [ - #{ - <<"status">> := <<"connected">>, - <<"node_status">> := [ - #{ - <<"node">> := _, - <<"status">> := <<"connected">> - }, - #{ - <<"node">> := _, - <<"status">> := <<"connected">> - } - ] - } - ]}, - list() + ?retry( + 100, + 10, + ?assertMatch( + {200, [ + #{ + <<"status">> := <<"connected">>, + <<"node_status">> := [ + #{ + <<"node">> := _, + <<"status">> := <<"connected">> + }, + #{ + <<"node">> := _, + <<"status">> := <<"connected">> + } + ] + } + ]}, + list() + ) ), ?assertMatch( {200, #{ From 34f5a886cea0caa0d9bfa5631ac36fdcf261b9ec Mon Sep 17 00:00:00 2001 From: Thales Macedo Garitezi Date: Wed, 24 Jul 2024 12:06:48 -0300 Subject: [PATCH 12/19] refactor(cluster link api): return erpc errors in status and metrics responses --- .../src/emqx_cluster_link_api.erl | 151 ++++++++++-------- .../src/emqx_cluster_link_metrics.erl | 15 +- .../src/emqx_cluster_link_mqtt.erl | 23 +-- .../test/emqx_cluster_link_api_SUITE.erl | 24 ++- 4 files changed, 109 insertions(+), 104 deletions(-) diff --git a/apps/emqx_cluster_link/src/emqx_cluster_link_api.erl b/apps/emqx_cluster_link/src/emqx_cluster_link_api.erl index 5eca0a944..257a19854 100644 --- a/apps/emqx_cluster_link/src/emqx_cluster_link_api.erl +++ b/apps/emqx_cluster_link/src/emqx_cluster_link_api.erl @@ -201,9 +201,15 @@ fields(node_metrics) -> handle_list() -> Links = get_raw(), - NodeResults = get_all_link_status_cluster(), - NameToStatus = collect_all_status(NodeResults), - EmptyStatus = #{status => inconsistent, node_status => []}, + NodeRPCResults = emqx_cluster_link_mqtt:get_all_resources_cluster(), + {NameToStatus, Errors} = collect_all_status(NodeRPCResults), + NodeErrors = lists:map( + fun({Node, Error}) -> + #{node => Node, status => inconsistent, reason => Error} + end, + Errors + ), + EmptyStatus = #{status => inconsistent, node_status => NodeErrors}, Response = lists:map( fun(#{<<"name">> := Name} = Link) -> @@ -227,25 +233,32 @@ handle_lookup(Name, Link) -> ?OK(add_status(Name, Link)). handle_metrics(Name) -> - case emqx_cluster_link_metrics:get_metrics(Name) of - {error, BadResults} -> + Results = emqx_cluster_link_metrics:get_metrics(Name), + {NodeMetrics0, NodeErrors} = + lists:foldl( + fun + ({Node, {ok, Metrics}}, {OkAccIn, ErrAccIn}) -> + {[format_metrics(Node, Metrics) | OkAccIn], ErrAccIn}; + ({Node, Error}, {OkAccIn, ErrAccIn}) -> + {OkAccIn, [{Node, Error} | ErrAccIn]} + end, + {[], []}, + Results + ), + case NodeErrors of + [] -> + ok; + [_ | _] -> ?SLOG(warning, #{ msg => "cluster_link_api_metrics_bad_erpc_results", - results => BadResults - }), - ?OK(#{metrics => #{}, node_metrics => []}); - {ok, NodeResults} -> - NodeMetrics = - lists:map( - fun({Node, Metrics}) -> - format_metrics(Node, Metrics) - end, - NodeResults - ), - AggregatedMetrics = aggregate_metrics(NodeMetrics), - Response = #{metrics => AggregatedMetrics, node_metrics => NodeMetrics}, - ?OK(Response) - end. + errors => maps:from_list(NodeErrors) + }) + end, + NodeMetrics1 = lists:map(fun({Node, _Error}) -> format_metrics(Node, #{}) end, NodeErrors), + NodeMetrics = NodeMetrics1 ++ NodeMetrics0, + AggregatedMetrics = aggregate_metrics(NodeMetrics), + Response = #{metrics => AggregatedMetrics, node_metrics => NodeMetrics}, + ?OK(Response). aggregate_metrics(NodeMetrics) -> ErrorLogger = fun(_) -> ok end, @@ -267,8 +280,8 @@ format_metrics(Node, Metrics) -> }. add_status(Name, Link) -> - NodeResults = get_link_status_cluster(Name), - Status = collect_single_status(NodeResults), + NodeRPCResults = emqx_cluster_link_mqtt:get_resource_cluster(Name), + Status = collect_single_status(NodeRPCResults), maps:merge(Link, Status). handle_update(Name, Params0) -> @@ -289,64 +302,62 @@ get_raw() -> ), Links. -get_all_link_status_cluster() -> - case emqx_cluster_link_mqtt:get_all_resources_cluster() of - {error, BadResults} -> - ?SLOG(warning, #{ - msg => "cluster_link_api_all_status_bad_erpc_results", - results => BadResults - }), - []; - {ok, NodeResults} -> - NodeResults - end. - -get_link_status_cluster(Name) -> - case emqx_cluster_link_mqtt:get_resource_cluster(Name) of - {error, BadResults} -> - ?SLOG(warning, #{ - msg => "cluster_link_api_lookup_status_bad_erpc_results", - results => BadResults - }), - []; - {ok, NodeResults} -> - NodeResults - end. - --spec collect_all_status([{node(), #{cluster_name() => _}}]) -> - #{ +-spec collect_all_status([{node(), {ok, #{cluster_name() => _}} | _Error}]) -> + {ClusterToStatus, Errors} +when + ClusterToStatus :: #{ cluster_name() => #{ node := node(), status := emqx_resource:resource_status() | inconsistent } - }. + }, + Errors :: [{node(), term()}]. collect_all_status(NodeResults) -> - Reindexed = lists:foldl( - fun({Node, AllLinkData}, Acc) -> - maps:fold( - fun(Name, Data, AccIn) -> - collect_all_status1(Node, Name, Data, AccIn) - end, - Acc, - AllLinkData - ) + {Reindexed, Errors} = lists:foldl( + fun + ({Node, {ok, AllLinkData}}, {OkAccIn, ErrAccIn}) -> + OkAcc = maps:fold( + fun(Name, Data, AccIn) -> + collect_all_status1(Node, Name, Data, AccIn) + end, + OkAccIn, + AllLinkData + ), + {OkAcc, ErrAccIn}; + ({Node, Error}, {OkAccIn, ErrAccIn}) -> + {OkAccIn, [{Node, Error} | ErrAccIn]} end, - #{}, + {#{}, []}, NodeResults ), - maps:fold( + NoErrors = + case Errors of + [] -> + true; + [_ | _] -> + ?SLOG(warning, #{ + msg => "cluster_link_api_lookup_status_bad_erpc_results", + errors => Errors + }), + false + end, + ClusterToStatus = maps:fold( fun(Name, NodeToData, Acc) -> OnlyStatus = [S || #{status := S} <- maps:values(NodeToData)], SummaryStatus = case lists:usort(OnlyStatus) of - [SameStatus] -> SameStatus; + [SameStatus] when NoErrors -> SameStatus; _ -> inconsistent end, NodeStatus = lists:map( - fun({Node, #{status := S}}) -> - #{node => Node, status => S} + fun + ({Node, #{status := S}}) -> + #{node => Node, status => S}; + ({Node, Error0}) -> + Error = emqx_logger_jsonfmt:best_effort_json(Error0), + #{node => Node, status => inconsistent, reason => Error} end, - maps:to_list(NodeToData) + maps:to_list(NodeToData) ++ Errors ), Acc#{ Name => #{ @@ -357,7 +368,8 @@ collect_all_status(NodeResults) -> end, #{}, Reindexed - ). + ), + {ClusterToStatus, Errors}. collect_all_status1(Node, Name, Data, Acc) -> maps:update_with( @@ -371,12 +383,13 @@ collect_single_status(NodeResults) -> NodeStatus = lists:map( fun - ({Node, {ok, #{status := S}}}) -> + ({Node, {ok, {ok, #{status := S}}}}) -> #{node => Node, status => S}; - ({Node, {error, _}}) -> + ({Node, {ok, {error, _}}}) -> #{node => Node, status => ?status_disconnected}; - ({Node, _}) -> - #{node => Node, status => inconsistent} + ({Node, Error0}) -> + Error = emqx_logger_jsonfmt:best_effort_json(Error0), + #{node => Node, status => inconsistent, reason => Error} end, NodeResults ), diff --git a/apps/emqx_cluster_link/src/emqx_cluster_link_metrics.erl b/apps/emqx_cluster_link/src/emqx_cluster_link_metrics.erl index 3d6f1edc8..695419c50 100644 --- a/apps/emqx_cluster_link/src/emqx_cluster_link_metrics.erl +++ b/apps/emqx_cluster_link/src/emqx_cluster_link_metrics.erl @@ -31,7 +31,7 @@ get_metrics(ClusterName) -> Nodes = emqx:running_nodes(), Timeout = 15_000, Results = emqx_metrics_proto_v2:get_metrics(Nodes, ?METRIC_NAME, ClusterName, Timeout), - sequence_multicall_results(Nodes, Results). + lists:zip(Nodes, Results). maybe_create_metrics(ClusterName) -> case emqx_metrics_worker:has_metrics(?METRIC_NAME, ClusterName) of @@ -52,16 +52,3 @@ routes_inc(ClusterName, Val) -> %%-------------------------------------------------------------------- %% Internal functions %%-------------------------------------------------------------------- - --spec sequence_multicall_results([node()], emqx_rpc:erpc_multicall(term())) -> - {ok, [{node(), term()}]} | {error, [term()]}. -sequence_multicall_results(Nodes, Results) -> - case lists:partition(fun is_ok/1, lists:zip(Nodes, Results)) of - {OkResults, []} -> - {ok, [{Node, Res} || {Node, {ok, Res}} <- OkResults]}; - {_OkResults, BadResults} -> - {error, BadResults} - end. - -is_ok({_Node, {ok, _}}) -> true; -is_ok(_) -> false. diff --git a/apps/emqx_cluster_link/src/emqx_cluster_link_mqtt.erl b/apps/emqx_cluster_link/src/emqx_cluster_link_mqtt.erl index 3b37a304e..65e02f53e 100644 --- a/apps/emqx_cluster_link/src/emqx_cluster_link_mqtt.erl +++ b/apps/emqx_cluster_link/src/emqx_cluster_link_mqtt.erl @@ -106,20 +106,18 @@ remove_msg_fwd_resource(ClusterName) -> emqx_resource:remove_local(?MSG_RES_ID(ClusterName)). -spec get_all_resources_cluster() -> - {ok, [{node(), #{cluster_name() => emqx_resource:resource_data()}}]} - | {error, [term()]}. + [{node(), emqx_rpc:erpc(#{cluster_name() => emqx_resource:resource_data()})}]. get_all_resources_cluster() -> Nodes = emqx:running_nodes(), Results = emqx_cluster_link_proto_v1:get_all_resources(Nodes), - sequence_multicall_results(Nodes, Results). + lists:zip(Nodes, Results). -spec get_resource_cluster(cluster_name()) -> - {ok, [{node(), {ok, emqx_resource:resource_data()} | {error, not_found}}]} - | {error, [term()]}. + [{node(), {ok, {ok, emqx_resource:resource_data()} | {error, not_found}} | _Error}]. get_resource_cluster(ClusterName) -> Nodes = emqx:running_nodes(), Results = emqx_cluster_link_proto_v1:get_resource(Nodes, ClusterName), - sequence_multicall_results(Nodes, Results). + lists:zip(Nodes, Results). %% RPC Target in `emqx_cluster_link_proto_v1'. -spec get_resource_local_v1(cluster_name()) -> @@ -478,16 +476,3 @@ emqtt_client_opts(ClientIdSuffix, ClusterConf) -> #{clientid := BaseClientId} = Opts = emqx_cluster_link_config:mk_emqtt_options(ClusterConf), ClientId = emqx_bridge_mqtt_lib:clientid_base([BaseClientId, ClientIdSuffix]), Opts#{clientid => ClientId}. - --spec sequence_multicall_results([node()], emqx_rpc:erpc_multicall(term())) -> - {ok, [{node(), term()}]} | {error, [term()]}. -sequence_multicall_results(Nodes, Results) -> - case lists:partition(fun is_ok/1, lists:zip(Nodes, Results)) of - {OkResults, []} -> - {ok, [{Node, Res} || {Node, {ok, Res}} <- OkResults]}; - {_OkResults, BadResults} -> - {error, BadResults} - end. - -is_ok({_Node, {ok, _}}) -> true; -is_ok(_) -> false. diff --git a/apps/emqx_cluster_link/test/emqx_cluster_link_api_SUITE.erl b/apps/emqx_cluster_link/test/emqx_cluster_link_api_SUITE.erl index 0e5143865..6b469272a 100644 --- a/apps/emqx_cluster_link/test/emqx_cluster_link_api_SUITE.erl +++ b/apps/emqx_cluster_link/test/emqx_cluster_link_api_SUITE.erl @@ -409,7 +409,17 @@ t_status(Config) -> {200, [ #{ <<"status">> := <<"inconsistent">>, - <<"node_status">> := [] + <<"node_status">> := [ + #{ + <<"node">> := _, + <<"status">> := <<"connected">> + }, + #{ + <<"node">> := _, + <<"status">> := <<"inconsistent">>, + <<"reason">> := _ + } + ] } ]}, list() @@ -417,7 +427,17 @@ t_status(Config) -> ?assertMatch( {200, #{ <<"status">> := <<"inconsistent">>, - <<"node_status">> := [] + <<"node_status">> := [ + #{ + <<"node">> := _, + <<"status">> := <<"connected">> + }, + #{ + <<"node">> := _, + <<"status">> := <<"inconsistent">>, + <<"reason">> := _ + } + ] }}, get_link(Name) ), From 7829838dc5f9083bdbc41bbf61b534fb6d0c0ef9 Mon Sep 17 00:00:00 2001 From: Thales Macedo Garitezi Date: Wed, 24 Jul 2024 12:26:01 -0300 Subject: [PATCH 13/19] feat(cluster link api): add forwarding resource metrics to response --- .../src/emqx_cluster_link_api.erl | 40 +++++++++++++---- .../src/emqx_cluster_link_metrics.erl | 8 +++- .../src/emqx_cluster_link_mqtt.erl | 6 +++ .../test/emqx_cluster_link_api_SUITE.erl | 45 +++++++++++++++++-- 4 files changed, 86 insertions(+), 13 deletions(-) diff --git a/apps/emqx_cluster_link/src/emqx_cluster_link_api.erl b/apps/emqx_cluster_link/src/emqx_cluster_link_api.erl index 257a19854..6511cf395 100644 --- a/apps/emqx_cluster_link/src/emqx_cluster_link_api.erl +++ b/apps/emqx_cluster_link/src/emqx_cluster_link_api.erl @@ -237,10 +237,19 @@ handle_metrics(Name) -> {NodeMetrics0, NodeErrors} = lists:foldl( fun - ({Node, {ok, Metrics}}, {OkAccIn, ErrAccIn}) -> - {[format_metrics(Node, Metrics) | OkAccIn], ErrAccIn}; - ({Node, Error}, {OkAccIn, ErrAccIn}) -> - {OkAccIn, [{Node, Error} | ErrAccIn]} + ({Node, {ok, RouterMetrics}, {ok, ResourceMetrics}}, {OkAccIn, ErrAccIn}) -> + OkAcc = [format_metrics(Node, RouterMetrics, ResourceMetrics) | OkAccIn], + {OkAcc, ErrAccIn}; + ({Node, {ok, RouterMetrics}, ResError}, {OkAccIn, ErrAccIn}) -> + OkAcc = [format_metrics(Node, RouterMetrics, _ResourceMetrics = #{}) | OkAccIn], + {OkAcc, [{Node, #{resource => ResError}} | ErrAccIn]}; + ({Node, RouterError, {ok, ResourceMetrics}}, {OkAccIn, ErrAccIn}) -> + OkAcc = [format_metrics(Node, _RouterMetrics = #{}, ResourceMetrics) | OkAccIn], + {OkAcc, [{Node, #{router => RouterError}} | ErrAccIn]}; + ({Node, RouterError, ResourceError}, {OkAccIn, ErrAccIn}) -> + {OkAccIn, [ + {Node, #{router => RouterError, resource => ResourceError}} | ErrAccIn + ]} end, {[], []}, Results @@ -254,7 +263,7 @@ handle_metrics(Name) -> errors => maps:from_list(NodeErrors) }) end, - NodeMetrics1 = lists:map(fun({Node, _Error}) -> format_metrics(Node, #{}) end, NodeErrors), + NodeMetrics1 = lists:map(fun({Node, _Error}) -> format_metrics(Node, #{}, #{}) end, NodeErrors), NodeMetrics = NodeMetrics1 ++ NodeMetrics0, AggregatedMetrics = aggregate_metrics(NodeMetrics), Response = #{metrics => AggregatedMetrics, node_metrics => NodeMetrics}, @@ -270,12 +279,27 @@ aggregate_metrics(NodeMetrics) -> NodeMetrics ). -format_metrics(Node, Metrics) -> - Routes = emqx_utils_maps:deep_get([counters, ?route_metric], Metrics, 0), +format_metrics(Node, RouterMetrics, ResourceMetrics) -> + Get = fun(Path, Map) -> emqx_utils_maps:deep_get(Path, Map, 0) end, + Routes = Get([counters, ?route_metric], RouterMetrics), #{ node => Node, metrics => #{ - ?route_metric => Routes + ?route_metric => Routes, + + 'matched' => Get([counters, 'matched'], ResourceMetrics), + 'success' => Get([counters, 'success'], ResourceMetrics), + 'failed' => Get([counters, 'failed'], ResourceMetrics), + 'dropped' => Get([counters, 'dropped'], ResourceMetrics), + 'retried' => Get([counters, 'retried'], ResourceMetrics), + 'received' => Get([counters, 'received'], ResourceMetrics), + + 'queuing' => Get([gauges, 'queuing'], ResourceMetrics), + 'inflight' => Get([gauges, 'inflight'], ResourceMetrics), + + 'rate' => Get([rate, 'matched', current], ResourceMetrics), + 'rate_last5m' => Get([rate, 'matched', last5m], ResourceMetrics), + 'rate_max' => Get([rate, 'matched', max], ResourceMetrics) } }. diff --git a/apps/emqx_cluster_link/src/emqx_cluster_link_metrics.erl b/apps/emqx_cluster_link/src/emqx_cluster_link_metrics.erl index 695419c50..61e5fc9ce 100644 --- a/apps/emqx_cluster_link/src/emqx_cluster_link_metrics.erl +++ b/apps/emqx_cluster_link/src/emqx_cluster_link_metrics.erl @@ -30,8 +30,12 @@ get_metrics(ClusterName) -> Nodes = emqx:running_nodes(), Timeout = 15_000, - Results = emqx_metrics_proto_v2:get_metrics(Nodes, ?METRIC_NAME, ClusterName, Timeout), - lists:zip(Nodes, Results). + RouterResults = emqx_metrics_proto_v2:get_metrics(Nodes, ?METRIC_NAME, ClusterName, Timeout), + ResourceId = emqx_cluster_link_mqtt:resource_id(ClusterName), + ResourceResults = emqx_metrics_proto_v2:get_metrics( + Nodes, resource_metrics, ResourceId, Timeout + ), + lists:zip3(Nodes, RouterResults, ResourceResults). maybe_create_metrics(ClusterName) -> case emqx_metrics_worker:has_metrics(?METRIC_NAME, ClusterName) of diff --git a/apps/emqx_cluster_link/src/emqx_cluster_link_mqtt.erl b/apps/emqx_cluster_link/src/emqx_cluster_link_mqtt.erl index 65e02f53e..5a0f6db9c 100644 --- a/apps/emqx_cluster_link/src/emqx_cluster_link_mqtt.erl +++ b/apps/emqx_cluster_link/src/emqx_cluster_link_mqtt.erl @@ -9,6 +9,7 @@ -include_lib("emqx/include/emqx_mqtt.hrl"). -include_lib("emqx/include/logger.hrl"). -include_lib("snabbkaffe/include/trace.hrl"). +-include_lib("emqx_resource/include/emqx_resource.hrl"). -behaviour(emqx_resource). -behaviour(ecpool_worker). @@ -27,6 +28,7 @@ ]). -export([ + resource_id/1, ensure_msg_fwd_resource/1, remove_msg_fwd_resource/1, decode_route_op/1, @@ -92,6 +94,10 @@ -type cluster_name() :: binary(). +-spec resource_id(cluster_name()) -> resource_id(). +resource_id(ClusterName) -> + ?MSG_RES_ID(ClusterName). + -spec ensure_msg_fwd_resource(map()) -> {ok, emqx_resource:resource_data() | already_started} | {error, Reason :: term()}. ensure_msg_fwd_resource(#{name := Name, resource_opts := ResOpts} = ClusterConf) -> diff --git a/apps/emqx_cluster_link/test/emqx_cluster_link_api_SUITE.erl b/apps/emqx_cluster_link/test/emqx_cluster_link_api_SUITE.erl index 6b469272a..e8e8f345e 100644 --- a/apps/emqx_cluster_link/test/emqx_cluster_link_api_SUITE.erl +++ b/apps/emqx_cluster_link/test/emqx_cluster_link_api_SUITE.erl @@ -477,15 +477,54 @@ t_metrics(Config) -> ?assertMatch( {200, #{ - <<"metrics">> := #{<<"routes">> := 0}, + <<"metrics">> := #{ + <<"routes">> := 0, + <<"matched">> := _, + <<"success">> := _, + <<"failed">> := _, + <<"dropped">> := _, + <<"retried">> := _, + <<"received">> := _, + <<"queuing">> := _, + <<"inflight">> := _, + <<"rate">> := _, + <<"rate_last5m">> := _, + <<"rate_max">> := _ + }, <<"node_metrics">> := [ #{ <<"node">> := _, - <<"metrics">> := #{<<"routes">> := 0} + <<"metrics">> := #{ + <<"routes">> := 0, + <<"matched">> := _, + <<"success">> := _, + <<"failed">> := _, + <<"dropped">> := _, + <<"retried">> := _, + <<"received">> := _, + <<"queuing">> := _, + <<"inflight">> := _, + <<"rate">> := _, + <<"rate_last5m">> := _, + <<"rate_max">> := _ + } }, #{ <<"node">> := _, - <<"metrics">> := #{<<"routes">> := 0} + <<"metrics">> := #{ + <<"routes">> := 0, + <<"matched">> := _, + <<"success">> := _, + <<"failed">> := _, + <<"dropped">> := _, + <<"retried">> := _, + <<"received">> := _, + <<"queuing">> := _, + <<"inflight">> := _, + <<"rate">> := _, + <<"rate_last5m">> := _, + <<"rate_max">> := _ + } } ] }}, From dda73651c58cb03ed7b53164041bf768fb30e3f1 Mon Sep 17 00:00:00 2001 From: Thales Macedo Garitezi Date: Wed, 24 Jul 2024 15:29:27 -0300 Subject: [PATCH 14/19] fix(cluster link metrics): use periodic full table scan and gauge to count routes --- .../src/emqx_cluster_link_api.erl | 2 +- .../src/emqx_cluster_link_bookkeeper.erl | 84 +++++++++++++++++++ .../src/emqx_cluster_link_config.erl | 8 +- .../src/emqx_cluster_link_extrouter.erl | 34 +++++--- .../src/emqx_cluster_link_metrics.erl | 8 +- .../src/emqx_cluster_link_schema.erl | 13 ++- .../src/emqx_cluster_link_sup.erl | 12 ++- .../test/emqx_cluster_link_SUITE.erl | 2 + .../test/emqx_cluster_link_api_SUITE.erl | 61 +++++++++----- 9 files changed, 185 insertions(+), 39 deletions(-) create mode 100644 apps/emqx_cluster_link/src/emqx_cluster_link_bookkeeper.erl diff --git a/apps/emqx_cluster_link/src/emqx_cluster_link_api.erl b/apps/emqx_cluster_link/src/emqx_cluster_link_api.erl index 6511cf395..21c435a0f 100644 --- a/apps/emqx_cluster_link/src/emqx_cluster_link_api.erl +++ b/apps/emqx_cluster_link/src/emqx_cluster_link_api.erl @@ -281,7 +281,7 @@ aggregate_metrics(NodeMetrics) -> format_metrics(Node, RouterMetrics, ResourceMetrics) -> Get = fun(Path, Map) -> emqx_utils_maps:deep_get(Path, Map, 0) end, - Routes = Get([counters, ?route_metric], RouterMetrics), + Routes = Get([gauges, ?route_metric], RouterMetrics), #{ node => Node, metrics => #{ diff --git a/apps/emqx_cluster_link/src/emqx_cluster_link_bookkeeper.erl b/apps/emqx_cluster_link/src/emqx_cluster_link_bookkeeper.erl new file mode 100644 index 000000000..992fc7bf1 --- /dev/null +++ b/apps/emqx_cluster_link/src/emqx_cluster_link_bookkeeper.erl @@ -0,0 +1,84 @@ +%%-------------------------------------------------------------------- +%% Copyright (c) 2024 EMQ Technologies Co., Ltd. All Rights Reserved. +%%-------------------------------------------------------------------- +-module(emqx_cluster_link_bookkeeper). + +%% API +-export([ + start_link/0 +]). + +%% `gen_server' API +-export([ + init/1, + handle_continue/2, + handle_call/3, + handle_cast/2, + handle_info/2 +]). + +%%------------------------------------------------------------------------------ +%% Type declarations +%%------------------------------------------------------------------------------ + +%% call/cast/info events +-record(tally_routes, {}). + +%%------------------------------------------------------------------------------ +%% API +%%------------------------------------------------------------------------------ + +-spec start_link() -> gen_server:start_ret(). +start_link() -> + gen_server:start_link({local, ?MODULE}, ?MODULE, _InitOpts = #{}, _Opts = []). + +%%------------------------------------------------------------------------------ +%% `gen_server' API +%%------------------------------------------------------------------------------ + +init(_Opts) -> + State = #{}, + {ok, State, {continue, #tally_routes{}}}. + +handle_continue(#tally_routes{}, State) -> + handle_tally_routes(), + {noreply, State}. + +handle_call(_Call, _From, State) -> + {reply, {error, bad_call}, State}. + +handle_cast(_Cast, State) -> + {noreply, State}. + +handle_info(#tally_routes{}, State) -> + handle_tally_routes(), + {noreply, State}; +handle_info(_Info, State) -> + {noreply, State}. + +%%------------------------------------------------------------------------------ +%% Internal fns +%%------------------------------------------------------------------------------ + +cluster_names() -> + Links = emqx_cluster_link_config:links(), + lists:map(fun(#{name := Name}) -> Name end, Links). + +ensure_timer(Event, Timeout) -> + _ = erlang:send_after(Timeout, self(), Event), + ok. + +handle_tally_routes() -> + ClusterNames = cluster_names(), + tally_routes(ClusterNames), + ensure_timer(#tally_routes{}, emqx_cluster_link_config:tally_routes_interval()), + ok. + +tally_routes([ClusterName | ClusterNames]) -> + Tab = emqx_cluster_link_extrouter:extroute_tab(), + Pat = emqx_cluster_link_extrouter:cluster_routes_ms(ClusterName), + NumRoutes = ets:select_count(Tab, Pat), + emqx_cluster_link_metrics:routes_set(ClusterName, NumRoutes), + tally_routes(ClusterNames); +tally_routes([]) -> + ok. diff --git a/apps/emqx_cluster_link/src/emqx_cluster_link_config.erl b/apps/emqx_cluster_link/src/emqx_cluster_link_config.erl index 2a97f2d69..fd84a5b7f 100644 --- a/apps/emqx_cluster_link/src/emqx_cluster_link_config.erl +++ b/apps/emqx_cluster_link/src/emqx_cluster_link_config.erl @@ -46,7 +46,9 @@ %% Actor Lifecycle actor_ttl/0, actor_gc_interval/0, - actor_heartbeat_interval/0 + actor_heartbeat_interval/0, + %% Metrics + tally_routes_interval/0 ]). -export([ @@ -163,6 +165,10 @@ actor_gc_interval() -> actor_heartbeat_interval() -> actor_ttl() div 3. +-spec tally_routes_interval() -> _Milliseconds :: timeout(). +tally_routes_interval() -> + emqx_config:get([cluster, tally_routes_interval]). + %% mk_emqtt_options(#{server := Server, ssl := #{enable := EnableSsl} = Ssl} = LinkConf) -> diff --git a/apps/emqx_cluster_link/src/emqx_cluster_link_extrouter.erl b/apps/emqx_cluster_link/src/emqx_cluster_link_extrouter.erl index 3e2ff1804..e76b24d79 100644 --- a/apps/emqx_cluster_link/src/emqx_cluster_link_extrouter.erl +++ b/apps/emqx_cluster_link/src/emqx_cluster_link_extrouter.erl @@ -34,6 +34,9 @@ apply_actor_operation/5 ]). +%% Internal export for bookkeeping +-export([cluster_routes_ms/1, extroute_tab/0]). + %% Strictly monotonically increasing integer. -type smint() :: integer(). @@ -147,6 +150,18 @@ make_extroute_rec_pat(Entry) -> [{1, extroute}, {#extroute.entry, Entry}] ). +%% Internal exports for bookkeeping +cluster_routes_ms(ClusterName) -> + TopicPat = '_', + RouteIDPat = '_', + Pat = make_extroute_rec_pat( + emqx_trie_search:make_pat(TopicPat, ?ROUTE_ID(ClusterName, RouteIDPat)) + ), + [{Pat, [], [true]}]. + +extroute_tab() -> + ?EXTROUTE_TAB. + %% -record(state, { @@ -256,33 +271,31 @@ actor_apply_operation( apply_actor_operation(ActorID, Incarnation, Entry, OpName, Lane) -> _ = assert_current_incarnation(ActorID, Incarnation), - apply_operation(ActorID, Entry, OpName, Lane). + apply_operation(Entry, OpName, Lane). -apply_operation(ActorID, Entry, OpName, Lane) -> +apply_operation(Entry, OpName, Lane) -> %% NOTE %% This is safe sequence of operations only on core nodes. On replicants, %% `mria:dirty_update_counter/3` will be replicated asynchronously, which %% means this read can be stale. case mnesia:dirty_read(?EXTROUTE_TAB, Entry) of [#extroute{mcounter = MCounter}] -> - apply_operation(ActorID, Entry, MCounter, OpName, Lane); + apply_operation(Entry, MCounter, OpName, Lane); [] -> - apply_operation(ActorID, Entry, 0, OpName, Lane) + apply_operation(Entry, 0, OpName, Lane) end. -apply_operation(ActorID, Entry, MCounter, OpName, Lane) -> +apply_operation(Entry, MCounter, OpName, Lane) -> %% NOTE %% We are relying on the fact that changes to each individual lane of this %% multi-counter are synchronized. Without this, such counter updates would %% be unsafe. Instead, we would have to use another, more complex approach, %% that runs `ets:lookup/2` + `ets:select_replace/2` in a loop until the %% counter is updated accordingly. - ?ACTOR_ID(ClusterName, _Actor) = ActorID, Marker = 1 bsl Lane, case MCounter band Marker of 0 when OpName =:= add -> Res = mria:dirty_update_counter(?EXTROUTE_TAB, Entry, Marker), - _ = emqx_cluster_link_metrics:routes_inc(ClusterName, 1), ?tp("cluster_link_extrouter_route_added", #{}), Res; Marker when OpName =:= add -> @@ -293,7 +306,6 @@ apply_operation(ActorID, Entry, MCounter, OpName, Lane) -> 0 -> Record = #extroute{entry = Entry, mcounter = 0}, ok = mria:dirty_delete_object(?EXTROUTE_TAB, Record), - _ = emqx_cluster_link_metrics:routes_inc(ClusterName, -1), ?tp("cluster_link_extrouter_route_deleted", #{}), 0; C -> @@ -368,16 +380,16 @@ clean_incarnation(Rec = #actor{id = {Cluster, Actor}}) -> mnesia_clean_incarnation(#actor{id = Actor, incarnation = Incarnation, lane = Lane}) -> case mnesia:read(?EXTROUTE_ACTOR_TAB, Actor, write) of [#actor{incarnation = Incarnation}] -> - _ = clean_lane(Actor, Lane), + _ = clean_lane(Lane), mnesia:delete(?EXTROUTE_ACTOR_TAB, Actor, write); _Renewed -> stale end. -clean_lane(ActorID, Lane) -> +clean_lane(Lane) -> ets:foldl( fun(#extroute{entry = Entry, mcounter = MCounter}, _) -> - apply_operation(ActorID, Entry, MCounter, delete, Lane) + apply_operation(Entry, MCounter, delete, Lane) end, 0, ?EXTROUTE_TAB diff --git a/apps/emqx_cluster_link/src/emqx_cluster_link_metrics.erl b/apps/emqx_cluster_link/src/emqx_cluster_link_metrics.erl index 61e5fc9ce..36c5e791d 100644 --- a/apps/emqx_cluster_link/src/emqx_cluster_link_metrics.erl +++ b/apps/emqx_cluster_link/src/emqx_cluster_link_metrics.erl @@ -11,7 +11,7 @@ drop_metrics/1, get_metrics/1, - routes_inc/2 + routes_set/2 ]). %%-------------------------------------------------------------------- @@ -50,8 +50,10 @@ maybe_create_metrics(ClusterName) -> drop_metrics(ClusterName) -> ok = emqx_metrics_worker:clear_metrics(?METRIC_NAME, ClusterName). -routes_inc(ClusterName, Val) -> - catch emqx_metrics_worker:inc(?METRIC_NAME, ClusterName, ?route_metric, Val). +routes_set(ClusterName, Val) -> + catch emqx_metrics_worker:set_gauge( + ?METRIC_NAME, ClusterName, <<"singleton">>, ?route_metric, Val + ). %%-------------------------------------------------------------------- %% Internal functions diff --git a/apps/emqx_cluster_link/src/emqx_cluster_link_schema.erl b/apps/emqx_cluster_link/src/emqx_cluster_link_schema.erl index a369429d5..dd5ab66f6 100644 --- a/apps/emqx_cluster_link/src/emqx_cluster_link_schema.erl +++ b/apps/emqx_cluster_link/src/emqx_cluster_link_schema.erl @@ -30,7 +30,18 @@ namespace() -> "cluster". roots() -> []. injected_fields() -> - #{cluster => [{links, links_schema(#{})}]}. + #{ + cluster => [ + {links, links_schema(#{})}, + {tally_routes_interval, + hoconsc:mk( + emqx_schema:timeout_duration(), #{ + default => <<"15s">>, + importance => ?IMPORTANCE_HIDDEN + } + )} + ] + }. links_schema(Meta) -> ?HOCON(?ARRAY(?R_REF("link")), Meta#{ diff --git a/apps/emqx_cluster_link/src/emqx_cluster_link_sup.erl b/apps/emqx_cluster_link/src/emqx_cluster_link_sup.erl index 81b5afb4c..42f195cf7 100644 --- a/apps/emqx_cluster_link/src/emqx_cluster_link_sup.erl +++ b/apps/emqx_cluster_link/src/emqx_cluster_link_sup.erl @@ -30,12 +30,13 @@ init(LinksConf) -> period => 5 }, Metrics = emqx_metrics_worker:child_spec(metrics, ?METRIC_NAME), + BookKeeper = bookkeeper_spec(), ExtrouterGC = extrouter_gc_spec(), RouteActors = [ sup_spec(Name, ?ACTOR_MODULE, [LinkConf]) || #{name := Name} = LinkConf <- LinksConf ], - {ok, {SupFlags, [Metrics, ExtrouterGC | RouteActors]}}. + {ok, {SupFlags, [Metrics, BookKeeper, ExtrouterGC | RouteActors]}}. extrouter_gc_spec() -> %% NOTE: This one is currently global, not per-link. @@ -56,6 +57,15 @@ sup_spec(Id, Mod, Args) -> modules => [Mod] }. +bookkeeper_spec() -> + #{ + id => bookkeeper, + start => {emqx_cluster_link_bookkeeper, start_link, []}, + restart => permanent, + type => worker, + shutdown => 5_000 + }. + ensure_actor(#{name := Name} = LinkConf) -> case supervisor:start_child(?SERVER, sup_spec(Name, ?ACTOR_MODULE, [LinkConf])) of {ok, Pid} -> diff --git a/apps/emqx_cluster_link/test/emqx_cluster_link_SUITE.erl b/apps/emqx_cluster_link/test/emqx_cluster_link_SUITE.erl index e023aacab..21aa1a70e 100644 --- a/apps/emqx_cluster_link/test/emqx_cluster_link_SUITE.erl +++ b/apps/emqx_cluster_link/test/emqx_cluster_link_SUITE.erl @@ -53,6 +53,7 @@ mk_source_cluster(BaseName, Config) -> SourceConf = "cluster {" "\n name = cl.source" + "\n tally_routes_interval = 300ms" "\n links = [" "\n { enable = true" "\n name = cl.target" @@ -75,6 +76,7 @@ mk_target_cluster(BaseName, Config) -> TargetConf = "cluster {" "\n name = cl.target" + "\n tally_routes_interval = 300ms" "\n links = [" "\n { enable = true" "\n name = cl.source" diff --git a/apps/emqx_cluster_link/test/emqx_cluster_link_api_SUITE.erl b/apps/emqx_cluster_link/test/emqx_cluster_link_api_SUITE.erl index e8e8f345e..486179af1 100644 --- a/apps/emqx_cluster_link/test/emqx_cluster_link_api_SUITE.erl +++ b/apps/emqx_cluster_link/test/emqx_cluster_link_api_SUITE.erl @@ -600,14 +600,22 @@ t_metrics(Config) -> #{?snk_kind := clink_route_sync_complete} ), - %% Routes = 2 in source cluster, because the target cluster has some topic filters - %% configured and subscribers to them, which were replicated to the source cluster. - ?assertMatch( - {200, #{ - <<"metrics">> := #{<<"routes">> := 2}, - <<"node_metrics">> := _ - }}, - get_metrics(source, SourceName) + %% Routes = 4 in source cluster, because the target cluster has some topic filters + %% configured and subscribers to them, which were replicated to the source cluster, + %% and we have 2 nodes with 2 routes each. + ?retry( + 300, + 10, + ?assertMatch( + {200, #{ + <<"metrics">> := #{<<"routes">> := 4}, + <<"node_metrics">> := [ + #{<<"metrics">> := #{<<"routes">> := 2}}, + #{<<"metrics">> := #{<<"routes">> := 2}} + ] + }}, + get_metrics(source, SourceName) + ) ), ?assertMatch( {200, #{ @@ -627,12 +635,19 @@ t_metrics(Config) -> #{?snk_kind := clink_route_sync_complete} ), - ?assertMatch( - {200, #{ - <<"metrics">> := #{<<"routes">> := 1}, - <<"node_metrics">> := _ - }}, - get_metrics(source, SourceName) + ?retry( + 300, + 10, + ?assertMatch( + {200, #{ + <<"metrics">> := #{<<"routes">> := 2}, + <<"node_metrics">> := [ + #{<<"metrics">> := #{<<"routes">> := 1}}, + #{<<"metrics">> := #{<<"routes">> := 1}} + ] + }}, + get_metrics(source, SourceName) + ) ), %% Disabling the link should remove the routes. @@ -658,12 +673,16 @@ t_metrics(Config) -> #{?snk_kind := "cluster_link_extrouter_route_deleted"} ), - ?assertMatch( - {200, #{ - <<"metrics">> := #{<<"routes">> := 0}, - <<"node_metrics">> := _ - }}, - get_metrics(source, SourceName) + ?retry( + 300, + 10, + ?assertMatch( + {200, #{ + <<"metrics">> := #{<<"routes">> := 0}, + <<"node_metrics">> := _ + }}, + get_metrics(source, SourceName) + ) ), %% Enabling again @@ -678,7 +697,7 @@ t_metrics(Config) -> ?assertMatch( {200, #{ - <<"metrics">> := #{<<"routes">> := 1}, + <<"metrics">> := #{<<"routes">> := 2}, <<"node_metrics">> := _ }}, get_metrics(source, SourceName) From 87e4e2340d80f95537cfcc61bc7db62e325c6302 Mon Sep 17 00:00:00 2001 From: Thales Macedo Garitezi Date: Thu, 25 Jul 2024 11:10:54 -0300 Subject: [PATCH 15/19] refactor: better metric and error fold --- .../src/emqx_cluster_link_api.erl | 31 ++++++++++--------- .../test/emqx_cluster_link_api_SUITE.erl | 16 ++++++---- 2 files changed, 27 insertions(+), 20 deletions(-) diff --git a/apps/emqx_cluster_link/src/emqx_cluster_link_api.erl b/apps/emqx_cluster_link/src/emqx_cluster_link_api.erl index 21c435a0f..4ff1ee7ef 100644 --- a/apps/emqx_cluster_link/src/emqx_cluster_link_api.erl +++ b/apps/emqx_cluster_link/src/emqx_cluster_link_api.erl @@ -236,20 +236,11 @@ handle_metrics(Name) -> Results = emqx_cluster_link_metrics:get_metrics(Name), {NodeMetrics0, NodeErrors} = lists:foldl( - fun - ({Node, {ok, RouterMetrics}, {ok, ResourceMetrics}}, {OkAccIn, ErrAccIn}) -> - OkAcc = [format_metrics(Node, RouterMetrics, ResourceMetrics) | OkAccIn], - {OkAcc, ErrAccIn}; - ({Node, {ok, RouterMetrics}, ResError}, {OkAccIn, ErrAccIn}) -> - OkAcc = [format_metrics(Node, RouterMetrics, _ResourceMetrics = #{}) | OkAccIn], - {OkAcc, [{Node, #{resource => ResError}} | ErrAccIn]}; - ({Node, RouterError, {ok, ResourceMetrics}}, {OkAccIn, ErrAccIn}) -> - OkAcc = [format_metrics(Node, _RouterMetrics = #{}, ResourceMetrics) | OkAccIn], - {OkAcc, [{Node, #{router => RouterError}} | ErrAccIn]}; - ({Node, RouterError, ResourceError}, {OkAccIn, ErrAccIn}) -> - {OkAccIn, [ - {Node, #{router => RouterError, resource => ResourceError}} | ErrAccIn - ]} + fun({Node, RouterMetrics0, ResourceMetrics0}, {OkAccIn, ErrAccIn}) -> + {RouterMetrics, RouterError} = get_metrics_or_errors(RouterMetrics0), + {ResourceMetrics, ResourceError} = get_metrics_or_errors(ResourceMetrics0), + ErrAcc = append_errors(RouterError, ResourceError, Node, ErrAccIn), + {[format_metrics(Node, RouterMetrics, ResourceMetrics) | OkAccIn], ErrAcc} end, {[], []}, Results @@ -269,6 +260,18 @@ handle_metrics(Name) -> Response = #{metrics => AggregatedMetrics, node_metrics => NodeMetrics}, ?OK(Response). +get_metrics_or_errors({ok, Metrics}) -> + {Metrics, undefined}; +get_metrics_or_errors(Error) -> + {#{}, Error}. + +append_errors(undefined, undefined, _Node, Acc) -> + Acc; +append_errors(RouterError, ResourceError, Node, Acc) -> + Err0 = emqx_utils_maps:put_if(#{}, router, RouterError, RouterError =/= undefined), + Err = emqx_utils_maps:put_if(Err0, resource, ResourceError, ResourceError =/= undefined), + [{Node, Err} | Acc]. + aggregate_metrics(NodeMetrics) -> ErrorLogger = fun(_) -> ok end, lists:foldl( diff --git a/apps/emqx_cluster_link/test/emqx_cluster_link_api_SUITE.erl b/apps/emqx_cluster_link/test/emqx_cluster_link_api_SUITE.erl index 486179af1..5e3ae5d50 100644 --- a/apps/emqx_cluster_link/test/emqx_cluster_link_api_SUITE.erl +++ b/apps/emqx_cluster_link/test/emqx_cluster_link_api_SUITE.erl @@ -695,12 +695,16 @@ t_metrics(Config) -> #{?snk_kind := "cluster_link_extrouter_route_added"} ), - ?assertMatch( - {200, #{ - <<"metrics">> := #{<<"routes">> := 2}, - <<"node_metrics">> := _ - }}, - get_metrics(source, SourceName) + ?retry( + 300, + 10, + ?assertMatch( + {200, #{ + <<"metrics">> := #{<<"routes">> := 2}, + <<"node_metrics">> := _ + }}, + get_metrics(source, SourceName) + ) ), ok. From 30259284d148c6ec306ee8fe60894b3eb147b5f3 Mon Sep 17 00:00:00 2001 From: Thales Macedo Garitezi Date: Thu, 25 Jul 2024 11:17:29 -0300 Subject: [PATCH 16/19] chore: namespace metrics by type --- .../src/emqx_cluster_link_api.erl | 29 +++-- .../test/emqx_cluster_link_api_SUITE.erl | 120 ++++++++++-------- 2 files changed, 82 insertions(+), 67 deletions(-) diff --git a/apps/emqx_cluster_link/src/emqx_cluster_link_api.erl b/apps/emqx_cluster_link/src/emqx_cluster_link_api.erl index 4ff1ee7ef..45b97e8f7 100644 --- a/apps/emqx_cluster_link/src/emqx_cluster_link_api.erl +++ b/apps/emqx_cluster_link/src/emqx_cluster_link_api.erl @@ -288,21 +288,24 @@ format_metrics(Node, RouterMetrics, ResourceMetrics) -> #{ node => Node, metrics => #{ - ?route_metric => Routes, + router => #{ + ?route_metric => Routes + }, + forwarding => #{ + 'matched' => Get([counters, 'matched'], ResourceMetrics), + 'success' => Get([counters, 'success'], ResourceMetrics), + 'failed' => Get([counters, 'failed'], ResourceMetrics), + 'dropped' => Get([counters, 'dropped'], ResourceMetrics), + 'retried' => Get([counters, 'retried'], ResourceMetrics), + 'received' => Get([counters, 'received'], ResourceMetrics), - 'matched' => Get([counters, 'matched'], ResourceMetrics), - 'success' => Get([counters, 'success'], ResourceMetrics), - 'failed' => Get([counters, 'failed'], ResourceMetrics), - 'dropped' => Get([counters, 'dropped'], ResourceMetrics), - 'retried' => Get([counters, 'retried'], ResourceMetrics), - 'received' => Get([counters, 'received'], ResourceMetrics), + 'queuing' => Get([gauges, 'queuing'], ResourceMetrics), + 'inflight' => Get([gauges, 'inflight'], ResourceMetrics), - 'queuing' => Get([gauges, 'queuing'], ResourceMetrics), - 'inflight' => Get([gauges, 'inflight'], ResourceMetrics), - - 'rate' => Get([rate, 'matched', current], ResourceMetrics), - 'rate_last5m' => Get([rate, 'matched', last5m], ResourceMetrics), - 'rate_max' => Get([rate, 'matched', max], ResourceMetrics) + 'rate' => Get([rate, 'matched', current], ResourceMetrics), + 'rate_last5m' => Get([rate, 'matched', last5m], ResourceMetrics), + 'rate_max' => Get([rate, 'matched', max], ResourceMetrics) + } } }. diff --git a/apps/emqx_cluster_link/test/emqx_cluster_link_api_SUITE.erl b/apps/emqx_cluster_link/test/emqx_cluster_link_api_SUITE.erl index 5e3ae5d50..6fd54e228 100644 --- a/apps/emqx_cluster_link/test/emqx_cluster_link_api_SUITE.erl +++ b/apps/emqx_cluster_link/test/emqx_cluster_link_api_SUITE.erl @@ -478,52 +478,64 @@ t_metrics(Config) -> ?assertMatch( {200, #{ <<"metrics">> := #{ - <<"routes">> := 0, - <<"matched">> := _, - <<"success">> := _, - <<"failed">> := _, - <<"dropped">> := _, - <<"retried">> := _, - <<"received">> := _, - <<"queuing">> := _, - <<"inflight">> := _, - <<"rate">> := _, - <<"rate_last5m">> := _, - <<"rate_max">> := _ + <<"router">> := #{ + <<"routes">> := 0 + }, + <<"forwarding">> := #{ + <<"matched">> := _, + <<"success">> := _, + <<"failed">> := _, + <<"dropped">> := _, + <<"retried">> := _, + <<"received">> := _, + <<"queuing">> := _, + <<"inflight">> := _, + <<"rate">> := _, + <<"rate_last5m">> := _, + <<"rate_max">> := _ + } }, <<"node_metrics">> := [ #{ <<"node">> := _, <<"metrics">> := #{ - <<"routes">> := 0, - <<"matched">> := _, - <<"success">> := _, - <<"failed">> := _, - <<"dropped">> := _, - <<"retried">> := _, - <<"received">> := _, - <<"queuing">> := _, - <<"inflight">> := _, - <<"rate">> := _, - <<"rate_last5m">> := _, - <<"rate_max">> := _ + <<"router">> := #{ + <<"routes">> := 0 + }, + <<"forwarding">> := #{ + <<"matched">> := _, + <<"success">> := _, + <<"failed">> := _, + <<"dropped">> := _, + <<"retried">> := _, + <<"received">> := _, + <<"queuing">> := _, + <<"inflight">> := _, + <<"rate">> := _, + <<"rate_last5m">> := _, + <<"rate_max">> := _ + } } }, #{ <<"node">> := _, <<"metrics">> := #{ - <<"routes">> := 0, - <<"matched">> := _, - <<"success">> := _, - <<"failed">> := _, - <<"dropped">> := _, - <<"retried">> := _, - <<"received">> := _, - <<"queuing">> := _, - <<"inflight">> := _, - <<"rate">> := _, - <<"rate_last5m">> := _, - <<"rate_max">> := _ + <<"router">> := #{ + <<"routes">> := 0 + }, + <<"forwarding">> := #{ + <<"matched">> := _, + <<"success">> := _, + <<"failed">> := _, + <<"dropped">> := _, + <<"retried">> := _, + <<"received">> := _, + <<"queuing">> := _, + <<"inflight">> := _, + <<"rate">> := _, + <<"rate_last5m">> := _, + <<"rate_max">> := _ + } } } ] @@ -532,15 +544,15 @@ t_metrics(Config) -> ), ?assertMatch( {200, #{ - <<"metrics">> := #{<<"routes">> := 0}, + <<"metrics">> := #{<<"router">> := #{<<"routes">> := 0}}, <<"node_metrics">> := [ #{ <<"node">> := _, - <<"metrics">> := #{<<"routes">> := 0} + <<"metrics">> := #{<<"router">> := #{<<"routes">> := 0}} }, #{ <<"node">> := _, - <<"metrics">> := #{<<"routes">> := 0} + <<"metrics">> := #{<<"router">> := #{<<"routes">> := 0}} } ] }}, @@ -556,15 +568,15 @@ t_metrics(Config) -> %% cluster. ?assertMatch( {200, #{ - <<"metrics">> := #{<<"routes">> := 0}, + <<"metrics">> := #{<<"router">> := #{<<"routes">> := 0}}, <<"node_metrics">> := [ #{ <<"node">> := _, - <<"metrics">> := #{<<"routes">> := 0} + <<"metrics">> := #{<<"router">> := #{<<"routes">> := 0}} }, #{ <<"node">> := _, - <<"metrics">> := #{<<"routes">> := 0} + <<"metrics">> := #{<<"router">> := #{<<"routes">> := 0}} } ] }}, @@ -572,15 +584,15 @@ t_metrics(Config) -> ), ?assertMatch( {200, #{ - <<"metrics">> := #{<<"routes">> := 0}, + <<"metrics">> := #{<<"router">> := #{<<"routes">> := 0}}, <<"node_metrics">> := [ #{ <<"node">> := _, - <<"metrics">> := #{<<"routes">> := 0} + <<"metrics">> := #{<<"router">> := #{<<"routes">> := 0}} }, #{ <<"node">> := _, - <<"metrics">> := #{<<"routes">> := 0} + <<"metrics">> := #{<<"router">> := #{<<"routes">> := 0}} } ] }}, @@ -608,10 +620,10 @@ t_metrics(Config) -> 10, ?assertMatch( {200, #{ - <<"metrics">> := #{<<"routes">> := 4}, + <<"metrics">> := #{<<"router">> := #{<<"routes">> := 4}}, <<"node_metrics">> := [ - #{<<"metrics">> := #{<<"routes">> := 2}}, - #{<<"metrics">> := #{<<"routes">> := 2}} + #{<<"metrics">> := #{<<"router">> := #{<<"routes">> := 2}}}, + #{<<"metrics">> := #{<<"router">> := #{<<"routes">> := 2}}} ] }}, get_metrics(source, SourceName) @@ -619,7 +631,7 @@ t_metrics(Config) -> ), ?assertMatch( {200, #{ - <<"metrics">> := #{<<"routes">> := 0}, + <<"metrics">> := #{<<"router">> := #{<<"routes">> := 0}}, <<"node_metrics">> := _ }}, get_metrics(target, TargetName) @@ -640,10 +652,10 @@ t_metrics(Config) -> 10, ?assertMatch( {200, #{ - <<"metrics">> := #{<<"routes">> := 2}, + <<"metrics">> := #{<<"router">> := #{<<"routes">> := 2}}, <<"node_metrics">> := [ - #{<<"metrics">> := #{<<"routes">> := 1}}, - #{<<"metrics">> := #{<<"routes">> := 1}} + #{<<"metrics">> := #{<<"router">> := #{<<"routes">> := 1}}}, + #{<<"metrics">> := #{<<"router">> := #{<<"routes">> := 1}}} ] }}, get_metrics(source, SourceName) @@ -678,7 +690,7 @@ t_metrics(Config) -> 10, ?assertMatch( {200, #{ - <<"metrics">> := #{<<"routes">> := 0}, + <<"metrics">> := #{<<"router">> := #{<<"routes">> := 0}}, <<"node_metrics">> := _ }}, get_metrics(source, SourceName) @@ -700,7 +712,7 @@ t_metrics(Config) -> 10, ?assertMatch( {200, #{ - <<"metrics">> := #{<<"routes">> := 2}, + <<"metrics">> := #{<<"router">> := #{<<"routes">> := 2}}, <<"node_metrics">> := _ }}, get_metrics(source, SourceName) From 6dbf015c932d46dc1d660fde4b9d1f3a7a8e79e3 Mon Sep 17 00:00:00 2001 From: Thales Macedo Garitezi Date: Thu, 25 Jul 2024 11:24:15 -0300 Subject: [PATCH 17/19] refactor: demote hidden config to hardcoded value --- .../src/emqx_cluster_link_bookkeeper.erl | 10 +++++++++- .../emqx_cluster_link/src/emqx_cluster_link_config.erl | 8 +------- .../emqx_cluster_link/src/emqx_cluster_link_schema.erl | 9 +-------- .../emqx_cluster_link/test/emqx_cluster_link_SUITE.erl | 2 -- 4 files changed, 11 insertions(+), 18 deletions(-) diff --git a/apps/emqx_cluster_link/src/emqx_cluster_link_bookkeeper.erl b/apps/emqx_cluster_link/src/emqx_cluster_link_bookkeeper.erl index 992fc7bf1..da5d7eaae 100644 --- a/apps/emqx_cluster_link/src/emqx_cluster_link_bookkeeper.erl +++ b/apps/emqx_cluster_link/src/emqx_cluster_link_bookkeeper.erl @@ -21,6 +21,14 @@ %% Type declarations %%------------------------------------------------------------------------------ +-ifdef(TEST). +%% ms +-define(TALLY_ROUTES_INTERVAL, 300). +-else. +%% ms +-define(TALLY_ROUTES_INTERVAL, 15_000). +-endif. + %% call/cast/info events -record(tally_routes, {}). @@ -71,7 +79,7 @@ ensure_timer(Event, Timeout) -> handle_tally_routes() -> ClusterNames = cluster_names(), tally_routes(ClusterNames), - ensure_timer(#tally_routes{}, emqx_cluster_link_config:tally_routes_interval()), + ensure_timer(#tally_routes{}, ?TALLY_ROUTES_INTERVAL), ok. tally_routes([ClusterName | ClusterNames]) -> diff --git a/apps/emqx_cluster_link/src/emqx_cluster_link_config.erl b/apps/emqx_cluster_link/src/emqx_cluster_link_config.erl index fd84a5b7f..2a97f2d69 100644 --- a/apps/emqx_cluster_link/src/emqx_cluster_link_config.erl +++ b/apps/emqx_cluster_link/src/emqx_cluster_link_config.erl @@ -46,9 +46,7 @@ %% Actor Lifecycle actor_ttl/0, actor_gc_interval/0, - actor_heartbeat_interval/0, - %% Metrics - tally_routes_interval/0 + actor_heartbeat_interval/0 ]). -export([ @@ -165,10 +163,6 @@ actor_gc_interval() -> actor_heartbeat_interval() -> actor_ttl() div 3. --spec tally_routes_interval() -> _Milliseconds :: timeout(). -tally_routes_interval() -> - emqx_config:get([cluster, tally_routes_interval]). - %% mk_emqtt_options(#{server := Server, ssl := #{enable := EnableSsl} = Ssl} = LinkConf) -> diff --git a/apps/emqx_cluster_link/src/emqx_cluster_link_schema.erl b/apps/emqx_cluster_link/src/emqx_cluster_link_schema.erl index dd5ab66f6..9b08510b9 100644 --- a/apps/emqx_cluster_link/src/emqx_cluster_link_schema.erl +++ b/apps/emqx_cluster_link/src/emqx_cluster_link_schema.erl @@ -32,14 +32,7 @@ roots() -> []. injected_fields() -> #{ cluster => [ - {links, links_schema(#{})}, - {tally_routes_interval, - hoconsc:mk( - emqx_schema:timeout_duration(), #{ - default => <<"15s">>, - importance => ?IMPORTANCE_HIDDEN - } - )} + {links, links_schema(#{})} ] }. diff --git a/apps/emqx_cluster_link/test/emqx_cluster_link_SUITE.erl b/apps/emqx_cluster_link/test/emqx_cluster_link_SUITE.erl index 21aa1a70e..e023aacab 100644 --- a/apps/emqx_cluster_link/test/emqx_cluster_link_SUITE.erl +++ b/apps/emqx_cluster_link/test/emqx_cluster_link_SUITE.erl @@ -53,7 +53,6 @@ mk_source_cluster(BaseName, Config) -> SourceConf = "cluster {" "\n name = cl.source" - "\n tally_routes_interval = 300ms" "\n links = [" "\n { enable = true" "\n name = cl.target" @@ -76,7 +75,6 @@ mk_target_cluster(BaseName, Config) -> TargetConf = "cluster {" "\n name = cl.target" - "\n tally_routes_interval = 300ms" "\n links = [" "\n { enable = true" "\n name = cl.source" From 6da71200f3c4dbaba25072d8199fc5157d04b339 Mon Sep 17 00:00:00 2001 From: Thales Macedo Garitezi Date: Thu, 25 Jul 2024 11:26:44 -0300 Subject: [PATCH 18/19] refactor: improve bookkeeping api --- .../src/emqx_cluster_link_bookkeeper.erl | 4 +--- .../src/emqx_cluster_link_extrouter.erl | 10 ++++------ 2 files changed, 5 insertions(+), 9 deletions(-) diff --git a/apps/emqx_cluster_link/src/emqx_cluster_link_bookkeeper.erl b/apps/emqx_cluster_link/src/emqx_cluster_link_bookkeeper.erl index da5d7eaae..826d4f0db 100644 --- a/apps/emqx_cluster_link/src/emqx_cluster_link_bookkeeper.erl +++ b/apps/emqx_cluster_link/src/emqx_cluster_link_bookkeeper.erl @@ -83,9 +83,7 @@ handle_tally_routes() -> ok. tally_routes([ClusterName | ClusterNames]) -> - Tab = emqx_cluster_link_extrouter:extroute_tab(), - Pat = emqx_cluster_link_extrouter:cluster_routes_ms(ClusterName), - NumRoutes = ets:select_count(Tab, Pat), + NumRoutes = emqx_cluster_link_extrouter:count(ClusterName), emqx_cluster_link_metrics:routes_set(ClusterName, NumRoutes), tally_routes(ClusterNames); tally_routes([]) -> diff --git a/apps/emqx_cluster_link/src/emqx_cluster_link_extrouter.erl b/apps/emqx_cluster_link/src/emqx_cluster_link_extrouter.erl index e76b24d79..44b147454 100644 --- a/apps/emqx_cluster_link/src/emqx_cluster_link_extrouter.erl +++ b/apps/emqx_cluster_link/src/emqx_cluster_link_extrouter.erl @@ -35,7 +35,7 @@ ]). %% Internal export for bookkeeping --export([cluster_routes_ms/1, extroute_tab/0]). +-export([count/1]). %% Strictly monotonically increasing integer. -type smint() :: integer(). @@ -151,16 +151,14 @@ make_extroute_rec_pat(Entry) -> ). %% Internal exports for bookkeeping -cluster_routes_ms(ClusterName) -> +count(ClusterName) -> TopicPat = '_', RouteIDPat = '_', Pat = make_extroute_rec_pat( emqx_trie_search:make_pat(TopicPat, ?ROUTE_ID(ClusterName, RouteIDPat)) ), - [{Pat, [], [true]}]. - -extroute_tab() -> - ?EXTROUTE_TAB. + MS = [{Pat, [], [true]}], + ets:select_count(?EXTROUTE_TAB, MS). %% From 03821c7b497e8fa1e3128d6e06832162b40fd94e Mon Sep 17 00:00:00 2001 From: Thales Macedo Garitezi Date: Thu, 25 Jul 2024 11:41:37 -0300 Subject: [PATCH 19/19] fix(cluster link metrics): route count metric is cluster-wide --- .../src/emqx_cluster_link_api.erl | 38 ++++++++++++++++--- .../test/emqx_cluster_link_api_SUITE.erl | 12 +++--- 2 files changed, 39 insertions(+), 11 deletions(-) diff --git a/apps/emqx_cluster_link/src/emqx_cluster_link_api.erl b/apps/emqx_cluster_link/src/emqx_cluster_link_api.erl index 45b97e8f7..77f613e45 100644 --- a/apps/emqx_cluster_link/src/emqx_cluster_link_api.erl +++ b/apps/emqx_cluster_link/src/emqx_cluster_link_api.erl @@ -274,13 +274,41 @@ append_errors(RouterError, ResourceError, Node, Acc) -> aggregate_metrics(NodeMetrics) -> ErrorLogger = fun(_) -> ok end, - lists:foldl( - fun(#{metrics := Metrics}, Acc) -> - emqx_utils_maps:best_effort_recursive_sum(Metrics, Acc, ErrorLogger) + #{metrics := #{router := EmptyRouterMetrics}} = format_metrics(node(), #{}, #{}), + {RouterMetrics, ResourceMetrics} = lists:foldl( + fun( + #{metrics := #{router := RMetrics, forwarding := FMetrics}}, + {RouterAccIn, ResourceAccIn} + ) -> + ResourceAcc = + emqx_utils_maps:best_effort_recursive_sum(FMetrics, ResourceAccIn, ErrorLogger), + RouterAcc = merge_cluster_wide_metrics(RMetrics, RouterAccIn), + {RouterAcc, ResourceAcc} end, - #{}, + {EmptyRouterMetrics, #{}}, NodeMetrics - ). + ), + #{router => RouterMetrics, forwarding => ResourceMetrics}. + +merge_cluster_wide_metrics(Metrics, Acc) -> + %% For cluster-wide metrics, all nodes should report the same values, except if the + %% RPC to fetch a node's metrics failed, in which case all values will be 0. + F = + fun(_Key, V1, V2) -> + case {erlang:is_map(V1), erlang:is_map(V2)} of + {true, true} -> + merge_cluster_wide_metrics(V1, V2); + {true, false} -> + merge_cluster_wide_metrics(V1, #{}); + {false, true} -> + merge_cluster_wide_metrics(V2, #{}); + {false, false} -> + true = is_number(V1), + true = is_number(V2), + max(V1, V2) + end + end, + maps:merge_with(F, Acc, Metrics). format_metrics(Node, RouterMetrics, ResourceMetrics) -> Get = fun(Path, Map) -> emqx_utils_maps:deep_get(Path, Map, 0) end, diff --git a/apps/emqx_cluster_link/test/emqx_cluster_link_api_SUITE.erl b/apps/emqx_cluster_link/test/emqx_cluster_link_api_SUITE.erl index 6fd54e228..8157c86d6 100644 --- a/apps/emqx_cluster_link/test/emqx_cluster_link_api_SUITE.erl +++ b/apps/emqx_cluster_link/test/emqx_cluster_link_api_SUITE.erl @@ -612,15 +612,15 @@ t_metrics(Config) -> #{?snk_kind := clink_route_sync_complete} ), - %% Routes = 4 in source cluster, because the target cluster has some topic filters - %% configured and subscribers to them, which were replicated to the source cluster, - %% and we have 2 nodes with 2 routes each. + %% Routes = 2 in source cluster, because the target cluster has some topic filters + %% configured and subscribers to them, which were replicated to the source cluster. + %% This metric is global (cluster-wide). ?retry( 300, 10, ?assertMatch( {200, #{ - <<"metrics">> := #{<<"router">> := #{<<"routes">> := 4}}, + <<"metrics">> := #{<<"router">> := #{<<"routes">> := 2}}, <<"node_metrics">> := [ #{<<"metrics">> := #{<<"router">> := #{<<"routes">> := 2}}}, #{<<"metrics">> := #{<<"router">> := #{<<"routes">> := 2}}} @@ -652,7 +652,7 @@ t_metrics(Config) -> 10, ?assertMatch( {200, #{ - <<"metrics">> := #{<<"router">> := #{<<"routes">> := 2}}, + <<"metrics">> := #{<<"router">> := #{<<"routes">> := 1}}, <<"node_metrics">> := [ #{<<"metrics">> := #{<<"router">> := #{<<"routes">> := 1}}}, #{<<"metrics">> := #{<<"router">> := #{<<"routes">> := 1}}} @@ -712,7 +712,7 @@ t_metrics(Config) -> 10, ?assertMatch( {200, #{ - <<"metrics">> := #{<<"router">> := #{<<"routes">> := 2}}, + <<"metrics">> := #{<<"router">> := #{<<"routes">> := 1}}, <<"node_metrics">> := _ }}, get_metrics(source, SourceName)