refactor(cluster link api): return erpc errors in status and metrics responses
This commit is contained in:
parent
79db2e6d7f
commit
34f5a886ce
|
@ -201,9 +201,15 @@ fields(node_metrics) ->
|
|||
|
||||
handle_list() ->
|
||||
Links = get_raw(),
|
||||
NodeResults = get_all_link_status_cluster(),
|
||||
NameToStatus = collect_all_status(NodeResults),
|
||||
EmptyStatus = #{status => inconsistent, node_status => []},
|
||||
NodeRPCResults = emqx_cluster_link_mqtt:get_all_resources_cluster(),
|
||||
{NameToStatus, Errors} = collect_all_status(NodeRPCResults),
|
||||
NodeErrors = lists:map(
|
||||
fun({Node, Error}) ->
|
||||
#{node => Node, status => inconsistent, reason => Error}
|
||||
end,
|
||||
Errors
|
||||
),
|
||||
EmptyStatus = #{status => inconsistent, node_status => NodeErrors},
|
||||
Response =
|
||||
lists:map(
|
||||
fun(#{<<"name">> := Name} = Link) ->
|
||||
|
@ -227,25 +233,32 @@ handle_lookup(Name, Link) ->
|
|||
?OK(add_status(Name, Link)).
|
||||
|
||||
handle_metrics(Name) ->
|
||||
case emqx_cluster_link_metrics:get_metrics(Name) of
|
||||
{error, BadResults} ->
|
||||
Results = emqx_cluster_link_metrics:get_metrics(Name),
|
||||
{NodeMetrics0, NodeErrors} =
|
||||
lists:foldl(
|
||||
fun
|
||||
({Node, {ok, Metrics}}, {OkAccIn, ErrAccIn}) ->
|
||||
{[format_metrics(Node, Metrics) | OkAccIn], ErrAccIn};
|
||||
({Node, Error}, {OkAccIn, ErrAccIn}) ->
|
||||
{OkAccIn, [{Node, Error} | ErrAccIn]}
|
||||
end,
|
||||
{[], []},
|
||||
Results
|
||||
),
|
||||
case NodeErrors of
|
||||
[] ->
|
||||
ok;
|
||||
[_ | _] ->
|
||||
?SLOG(warning, #{
|
||||
msg => "cluster_link_api_metrics_bad_erpc_results",
|
||||
results => BadResults
|
||||
}),
|
||||
?OK(#{metrics => #{}, node_metrics => []});
|
||||
{ok, NodeResults} ->
|
||||
NodeMetrics =
|
||||
lists:map(
|
||||
fun({Node, Metrics}) ->
|
||||
format_metrics(Node, Metrics)
|
||||
end,
|
||||
NodeResults
|
||||
),
|
||||
AggregatedMetrics = aggregate_metrics(NodeMetrics),
|
||||
Response = #{metrics => AggregatedMetrics, node_metrics => NodeMetrics},
|
||||
?OK(Response)
|
||||
end.
|
||||
errors => maps:from_list(NodeErrors)
|
||||
})
|
||||
end,
|
||||
NodeMetrics1 = lists:map(fun({Node, _Error}) -> format_metrics(Node, #{}) end, NodeErrors),
|
||||
NodeMetrics = NodeMetrics1 ++ NodeMetrics0,
|
||||
AggregatedMetrics = aggregate_metrics(NodeMetrics),
|
||||
Response = #{metrics => AggregatedMetrics, node_metrics => NodeMetrics},
|
||||
?OK(Response).
|
||||
|
||||
aggregate_metrics(NodeMetrics) ->
|
||||
ErrorLogger = fun(_) -> ok end,
|
||||
|
@ -267,8 +280,8 @@ format_metrics(Node, Metrics) ->
|
|||
}.
|
||||
|
||||
add_status(Name, Link) ->
|
||||
NodeResults = get_link_status_cluster(Name),
|
||||
Status = collect_single_status(NodeResults),
|
||||
NodeRPCResults = emqx_cluster_link_mqtt:get_resource_cluster(Name),
|
||||
Status = collect_single_status(NodeRPCResults),
|
||||
maps:merge(Link, Status).
|
||||
|
||||
handle_update(Name, Params0) ->
|
||||
|
@ -289,64 +302,62 @@ get_raw() ->
|
|||
),
|
||||
Links.
|
||||
|
||||
get_all_link_status_cluster() ->
|
||||
case emqx_cluster_link_mqtt:get_all_resources_cluster() of
|
||||
{error, BadResults} ->
|
||||
?SLOG(warning, #{
|
||||
msg => "cluster_link_api_all_status_bad_erpc_results",
|
||||
results => BadResults
|
||||
}),
|
||||
[];
|
||||
{ok, NodeResults} ->
|
||||
NodeResults
|
||||
end.
|
||||
|
||||
get_link_status_cluster(Name) ->
|
||||
case emqx_cluster_link_mqtt:get_resource_cluster(Name) of
|
||||
{error, BadResults} ->
|
||||
?SLOG(warning, #{
|
||||
msg => "cluster_link_api_lookup_status_bad_erpc_results",
|
||||
results => BadResults
|
||||
}),
|
||||
[];
|
||||
{ok, NodeResults} ->
|
||||
NodeResults
|
||||
end.
|
||||
|
||||
-spec collect_all_status([{node(), #{cluster_name() => _}}]) ->
|
||||
#{
|
||||
-spec collect_all_status([{node(), {ok, #{cluster_name() => _}} | _Error}]) ->
|
||||
{ClusterToStatus, Errors}
|
||||
when
|
||||
ClusterToStatus :: #{
|
||||
cluster_name() => #{
|
||||
node := node(),
|
||||
status := emqx_resource:resource_status() | inconsistent
|
||||
}
|
||||
}.
|
||||
},
|
||||
Errors :: [{node(), term()}].
|
||||
collect_all_status(NodeResults) ->
|
||||
Reindexed = lists:foldl(
|
||||
fun({Node, AllLinkData}, Acc) ->
|
||||
maps:fold(
|
||||
fun(Name, Data, AccIn) ->
|
||||
collect_all_status1(Node, Name, Data, AccIn)
|
||||
end,
|
||||
Acc,
|
||||
AllLinkData
|
||||
)
|
||||
{Reindexed, Errors} = lists:foldl(
|
||||
fun
|
||||
({Node, {ok, AllLinkData}}, {OkAccIn, ErrAccIn}) ->
|
||||
OkAcc = maps:fold(
|
||||
fun(Name, Data, AccIn) ->
|
||||
collect_all_status1(Node, Name, Data, AccIn)
|
||||
end,
|
||||
OkAccIn,
|
||||
AllLinkData
|
||||
),
|
||||
{OkAcc, ErrAccIn};
|
||||
({Node, Error}, {OkAccIn, ErrAccIn}) ->
|
||||
{OkAccIn, [{Node, Error} | ErrAccIn]}
|
||||
end,
|
||||
#{},
|
||||
{#{}, []},
|
||||
NodeResults
|
||||
),
|
||||
maps:fold(
|
||||
NoErrors =
|
||||
case Errors of
|
||||
[] ->
|
||||
true;
|
||||
[_ | _] ->
|
||||
?SLOG(warning, #{
|
||||
msg => "cluster_link_api_lookup_status_bad_erpc_results",
|
||||
errors => Errors
|
||||
}),
|
||||
false
|
||||
end,
|
||||
ClusterToStatus = maps:fold(
|
||||
fun(Name, NodeToData, Acc) ->
|
||||
OnlyStatus = [S || #{status := S} <- maps:values(NodeToData)],
|
||||
SummaryStatus =
|
||||
case lists:usort(OnlyStatus) of
|
||||
[SameStatus] -> SameStatus;
|
||||
[SameStatus] when NoErrors -> SameStatus;
|
||||
_ -> inconsistent
|
||||
end,
|
||||
NodeStatus = lists:map(
|
||||
fun({Node, #{status := S}}) ->
|
||||
#{node => Node, status => S}
|
||||
fun
|
||||
({Node, #{status := S}}) ->
|
||||
#{node => Node, status => S};
|
||||
({Node, Error0}) ->
|
||||
Error = emqx_logger_jsonfmt:best_effort_json(Error0),
|
||||
#{node => Node, status => inconsistent, reason => Error}
|
||||
end,
|
||||
maps:to_list(NodeToData)
|
||||
maps:to_list(NodeToData) ++ Errors
|
||||
),
|
||||
Acc#{
|
||||
Name => #{
|
||||
|
@ -357,7 +368,8 @@ collect_all_status(NodeResults) ->
|
|||
end,
|
||||
#{},
|
||||
Reindexed
|
||||
).
|
||||
),
|
||||
{ClusterToStatus, Errors}.
|
||||
|
||||
collect_all_status1(Node, Name, Data, Acc) ->
|
||||
maps:update_with(
|
||||
|
@ -371,12 +383,13 @@ collect_single_status(NodeResults) ->
|
|||
NodeStatus =
|
||||
lists:map(
|
||||
fun
|
||||
({Node, {ok, #{status := S}}}) ->
|
||||
({Node, {ok, {ok, #{status := S}}}}) ->
|
||||
#{node => Node, status => S};
|
||||
({Node, {error, _}}) ->
|
||||
({Node, {ok, {error, _}}}) ->
|
||||
#{node => Node, status => ?status_disconnected};
|
||||
({Node, _}) ->
|
||||
#{node => Node, status => inconsistent}
|
||||
({Node, Error0}) ->
|
||||
Error = emqx_logger_jsonfmt:best_effort_json(Error0),
|
||||
#{node => Node, status => inconsistent, reason => Error}
|
||||
end,
|
||||
NodeResults
|
||||
),
|
||||
|
|
|
@ -31,7 +31,7 @@ get_metrics(ClusterName) ->
|
|||
Nodes = emqx:running_nodes(),
|
||||
Timeout = 15_000,
|
||||
Results = emqx_metrics_proto_v2:get_metrics(Nodes, ?METRIC_NAME, ClusterName, Timeout),
|
||||
sequence_multicall_results(Nodes, Results).
|
||||
lists:zip(Nodes, Results).
|
||||
|
||||
maybe_create_metrics(ClusterName) ->
|
||||
case emqx_metrics_worker:has_metrics(?METRIC_NAME, ClusterName) of
|
||||
|
@ -52,16 +52,3 @@ routes_inc(ClusterName, Val) ->
|
|||
%%--------------------------------------------------------------------
|
||||
%% Internal functions
|
||||
%%--------------------------------------------------------------------
|
||||
|
||||
-spec sequence_multicall_results([node()], emqx_rpc:erpc_multicall(term())) ->
|
||||
{ok, [{node(), term()}]} | {error, [term()]}.
|
||||
sequence_multicall_results(Nodes, Results) ->
|
||||
case lists:partition(fun is_ok/1, lists:zip(Nodes, Results)) of
|
||||
{OkResults, []} ->
|
||||
{ok, [{Node, Res} || {Node, {ok, Res}} <- OkResults]};
|
||||
{_OkResults, BadResults} ->
|
||||
{error, BadResults}
|
||||
end.
|
||||
|
||||
is_ok({_Node, {ok, _}}) -> true;
|
||||
is_ok(_) -> false.
|
||||
|
|
|
@ -106,20 +106,18 @@ remove_msg_fwd_resource(ClusterName) ->
|
|||
emqx_resource:remove_local(?MSG_RES_ID(ClusterName)).
|
||||
|
||||
-spec get_all_resources_cluster() ->
|
||||
{ok, [{node(), #{cluster_name() => emqx_resource:resource_data()}}]}
|
||||
| {error, [term()]}.
|
||||
[{node(), emqx_rpc:erpc(#{cluster_name() => emqx_resource:resource_data()})}].
|
||||
get_all_resources_cluster() ->
|
||||
Nodes = emqx:running_nodes(),
|
||||
Results = emqx_cluster_link_proto_v1:get_all_resources(Nodes),
|
||||
sequence_multicall_results(Nodes, Results).
|
||||
lists:zip(Nodes, Results).
|
||||
|
||||
-spec get_resource_cluster(cluster_name()) ->
|
||||
{ok, [{node(), {ok, emqx_resource:resource_data()} | {error, not_found}}]}
|
||||
| {error, [term()]}.
|
||||
[{node(), {ok, {ok, emqx_resource:resource_data()} | {error, not_found}} | _Error}].
|
||||
get_resource_cluster(ClusterName) ->
|
||||
Nodes = emqx:running_nodes(),
|
||||
Results = emqx_cluster_link_proto_v1:get_resource(Nodes, ClusterName),
|
||||
sequence_multicall_results(Nodes, Results).
|
||||
lists:zip(Nodes, Results).
|
||||
|
||||
%% RPC Target in `emqx_cluster_link_proto_v1'.
|
||||
-spec get_resource_local_v1(cluster_name()) ->
|
||||
|
@ -478,16 +476,3 @@ emqtt_client_opts(ClientIdSuffix, ClusterConf) ->
|
|||
#{clientid := BaseClientId} = Opts = emqx_cluster_link_config:mk_emqtt_options(ClusterConf),
|
||||
ClientId = emqx_bridge_mqtt_lib:clientid_base([BaseClientId, ClientIdSuffix]),
|
||||
Opts#{clientid => ClientId}.
|
||||
|
||||
-spec sequence_multicall_results([node()], emqx_rpc:erpc_multicall(term())) ->
|
||||
{ok, [{node(), term()}]} | {error, [term()]}.
|
||||
sequence_multicall_results(Nodes, Results) ->
|
||||
case lists:partition(fun is_ok/1, lists:zip(Nodes, Results)) of
|
||||
{OkResults, []} ->
|
||||
{ok, [{Node, Res} || {Node, {ok, Res}} <- OkResults]};
|
||||
{_OkResults, BadResults} ->
|
||||
{error, BadResults}
|
||||
end.
|
||||
|
||||
is_ok({_Node, {ok, _}}) -> true;
|
||||
is_ok(_) -> false.
|
||||
|
|
|
@ -409,7 +409,17 @@ t_status(Config) ->
|
|||
{200, [
|
||||
#{
|
||||
<<"status">> := <<"inconsistent">>,
|
||||
<<"node_status">> := []
|
||||
<<"node_status">> := [
|
||||
#{
|
||||
<<"node">> := _,
|
||||
<<"status">> := <<"connected">>
|
||||
},
|
||||
#{
|
||||
<<"node">> := _,
|
||||
<<"status">> := <<"inconsistent">>,
|
||||
<<"reason">> := _
|
||||
}
|
||||
]
|
||||
}
|
||||
]},
|
||||
list()
|
||||
|
@ -417,7 +427,17 @@ t_status(Config) ->
|
|||
?assertMatch(
|
||||
{200, #{
|
||||
<<"status">> := <<"inconsistent">>,
|
||||
<<"node_status">> := []
|
||||
<<"node_status">> := [
|
||||
#{
|
||||
<<"node">> := _,
|
||||
<<"status">> := <<"connected">>
|
||||
},
|
||||
#{
|
||||
<<"node">> := _,
|
||||
<<"status">> := <<"inconsistent">>,
|
||||
<<"reason">> := _
|
||||
}
|
||||
]
|
||||
}},
|
||||
get_link(Name)
|
||||
),
|
||||
|
|
Loading…
Reference in New Issue