fix(rule_engine_api): don't crash when formatting empty metrics

Fixes https://emqx.atlassian.net/browse/EMQX-10073
Fixes https://github.com/emqx/emqx/issues/10714#issuecomment-1567987664

Similar issue to https://github.com/emqx/emqx/pull/10743, but on the
rule engine API.
This commit is contained in:
Thales Macedo Garitezi 2023-05-30 11:51:18 -03:00
parent 04bd39861d
commit 57aacb471c
3 changed files with 84 additions and 41 deletions

View File

@ -529,47 +529,69 @@ printable_function_name(Mod, Func) ->
list_to_binary(lists:concat([Mod, ":", Func])).
get_rule_metrics(Id) ->
Format = fun(
Node,
#{
counters :=
#{
'matched' := Matched,
'passed' := Passed,
'failed' := Failed,
'failed.exception' := FailedEx,
'failed.no_result' := FailedNoRes,
'actions.total' := OTotal,
'actions.failed' := OFailed,
'actions.failed.out_of_service' := OFailedOOS,
'actions.failed.unknown' := OFailedUnknown,
'actions.success' := OFailedSucc
},
rate :=
#{
'matched' :=
#{current := Current, max := Max, last5m := Last5M}
}
}
) ->
#{
metrics => ?METRICS(
Matched,
Passed,
Failed,
FailedEx,
FailedNoRes,
OTotal,
OFailed,
OFailedOOS,
OFailedUnknown,
OFailedSucc,
Current,
Max,
Last5M
),
node => Node
}
Format = fun
(
Node,
#{
counters :=
#{
'matched' := Matched,
'passed' := Passed,
'failed' := Failed,
'failed.exception' := FailedEx,
'failed.no_result' := FailedNoRes,
'actions.total' := OTotal,
'actions.failed' := OFailed,
'actions.failed.out_of_service' := OFailedOOS,
'actions.failed.unknown' := OFailedUnknown,
'actions.success' := OFailedSucc
},
rate :=
#{
'matched' :=
#{current := Current, max := Max, last5m := Last5M}
}
}
) ->
#{
metrics => ?METRICS(
Matched,
Passed,
Failed,
FailedEx,
FailedNoRes,
OTotal,
OFailed,
OFailedOOS,
OFailedUnknown,
OFailedSucc,
Current,
Max,
Last5M
),
node => Node
};
(Node, _Metrics) ->
%% Empty metrics: can happen when a node joins another and a bridge is not yet
%% replicated to it, so the counters map is empty.
#{
metrics => ?METRICS(
_Matched = 0,
_Passed = 0,
_Failed = 0,
_FailedEx = 0,
_FailedNoRes = 0,
_OTotal = 0,
_OFailed = 0,
_OFailedOOS = 0,
_OFailedUnknown = 0,
_OFailedSucc = 0,
_Current = 0,
_Max = 0,
_Last5M = 0
),
node => Node
}
end,
[
Format(Node, emqx_plugin_libs_proto_v1:get_metrics(Node, rule_metrics, Id))

View File

@ -94,6 +94,26 @@ t_crud_rule_api(_Config) ->
ct:pal("RMetrics : ~p", [Metrics]),
?assertMatch(#{id := RuleId, metrics := _, node_metrics := _}, Metrics),
%% simulating a node joining a cluster and lagging the configuration replication; in
%% such cases, when fetching metrics, a rule may exist in the cluster but not on the
%% new node. We just check that it doesn't provoke a crash.
emqx_common_test_helpers:with_mock(
emqx_metrics_worker,
get_metrics,
fun(HandlerName, MetricId) ->
%% change the metric id to some unknown id.
meck:passthrough([HandlerName, <<"unknown-", MetricId/binary>>])
end,
fun() ->
{200, Metrics1} = emqx_rule_engine_api:'/rules/:id/metrics'(get, #{
bindings => #{id => RuleId}
}),
ct:pal("RMetrics : ~p", [Metrics1]),
?assertMatch(#{id := RuleId, metrics := _, node_metrics := _}, Metrics1),
ok
end
),
{200, Rule2} = emqx_rule_engine_api:'/rules/:id'(put, #{
bindings => #{id => RuleId},
body => ?SIMPLE_RULE(RuleId)#{<<"sql">> => <<"select * from \"t/b\"">>}

View File

@ -0,0 +1 @@
Fixes an issue where trying to get rule info or metrics could result in a crash when a node is joining a cluster.