From 042a84c30f9c6f968b9cd40dcd3115aa91636da3 Mon Sep 17 00:00:00 2001 From: JimMoen Date: Wed, 28 Feb 2024 11:33:27 +0800 Subject: [PATCH 1/5] fix(prom): api crash when tls cert file non existed --- .../src/emqx_prometheus.app.src | 2 +- apps/emqx_prometheus/src/emqx_prometheus.erl | 20 +++++++++++-------- 2 files changed, 13 insertions(+), 9 deletions(-) diff --git a/apps/emqx_prometheus/src/emqx_prometheus.app.src b/apps/emqx_prometheus/src/emqx_prometheus.app.src index 9e9952d6c..b8c3d8790 100644 --- a/apps/emqx_prometheus/src/emqx_prometheus.app.src +++ b/apps/emqx_prometheus/src/emqx_prometheus.app.src @@ -2,7 +2,7 @@ {application, emqx_prometheus, [ {description, "Prometheus for EMQX"}, % strict semver, bump manually! - {vsn, "5.0.19"}, + {vsn, "5.0.20"}, {modules, []}, {registered, [emqx_prometheus_sup]}, {applications, [kernel, stdlib, prometheus, emqx, emqx_auth, emqx_resource, emqx_management]}, diff --git a/apps/emqx_prometheus/src/emqx_prometheus.erl b/apps/emqx_prometheus/src/emqx_prometheus.erl index 59241bd02..309a794fb 100644 --- a/apps/emqx_prometheus/src/emqx_prometheus.erl +++ b/apps/emqx_prometheus/src/emqx_prometheus.erl @@ -883,13 +883,17 @@ gen_point(Type, Name, Path) -> %% TODO: cert manager for more generic utils functions cert_expiry_at_from_path(Path0) -> Path = emqx_schema:naive_env_interpolation(Path0), - {ok, PemBin} = file:read_file(Path), - [CertEntry | _] = public_key:pem_decode(PemBin), - Cert = public_key:pem_entry_decode(CertEntry), - %% TODO: Not fully tested for all certs type - {'utcTime', NotAfterUtc} = - Cert#'Certificate'.'tbsCertificate'#'TBSCertificate'.validity#'Validity'.'notAfter', - utc_time_to_epoch(NotAfterUtc). + case file:read_file(Path) of + {ok, PemBin} -> + [CertEntry | _] = public_key:pem_decode(PemBin), + Cert = public_key:pem_entry_decode(CertEntry), + %% TODO: Not fully tested for all certs type + {'utcTime', NotAfterUtc} = + Cert#'Certificate'.'tbsCertificate'#'TBSCertificate'.validity#'Validity'.'notAfter', + utc_time_to_epoch(NotAfterUtc); + {error, _} -> + 0 + end. utc_time_to_epoch(UtcTime) -> date_to_expiry_epoch(utc_time_to_datetime(UtcTime)). @@ -898,7 +902,7 @@ utc_time_to_datetime(Str) -> {ok, [Year, Month, Day, Hour, Minute, Second], _} = io_lib:fread( "~2d~2d~2d~2d~2d~2dZ", Str ), - %% Alwoys Assuming YY is in 2000 + %% Always Assuming YY is in 2000 {{2000 + Year, Month, Day}, {Hour, Minute, Second}}. %% 62167219200 =:= calendar:datetime_to_gregorian_seconds({{1970, 1, 1}, {0, 0, 0}}). From 812e8ef314b3c3e78691ba9ea8d56bae4459676d Mon Sep 17 00:00:00 2001 From: JimMoen Date: Wed, 28 Feb 2024 14:29:46 +0800 Subject: [PATCH 2/5] fix: try-catch for unknow reason to inhibit api crash --- apps/emqx_prometheus/src/emqx_prometheus.erl | 36 +++++++++++++++----- 1 file changed, 27 insertions(+), 9 deletions(-) diff --git a/apps/emqx_prometheus/src/emqx_prometheus.erl b/apps/emqx_prometheus/src/emqx_prometheus.erl index 309a794fb..8b34cc9eb 100644 --- a/apps/emqx_prometheus/src/emqx_prometheus.erl +++ b/apps/emqx_prometheus/src/emqx_prometheus.erl @@ -883,15 +883,33 @@ gen_point(Type, Name, Path) -> %% TODO: cert manager for more generic utils functions cert_expiry_at_from_path(Path0) -> Path = emqx_schema:naive_env_interpolation(Path0), - case file:read_file(Path) of - {ok, PemBin} -> - [CertEntry | _] = public_key:pem_decode(PemBin), - Cert = public_key:pem_entry_decode(CertEntry), - %% TODO: Not fully tested for all certs type - {'utcTime', NotAfterUtc} = - Cert#'Certificate'.'tbsCertificate'#'TBSCertificate'.validity#'Validity'.'notAfter', - utc_time_to_epoch(NotAfterUtc); - {error, _} -> + try + case file:read_file(Path) of + {ok, PemBin} -> + [CertEntry | _] = public_key:pem_decode(PemBin), + Cert = public_key:pem_entry_decode(CertEntry), + %% TODO: Not fully tested for all certs type + {'utcTime', NotAfterUtc} = + Cert#'Certificate'.'tbsCertificate'#'TBSCertificate'.validity#'Validity'.'notAfter', + utc_time_to_epoch(NotAfterUtc); + {error, Reason} -> + ?SLOG(error, #{ + msg => "read_cert_file_failed", + path => Path0, + resolved_path => Path, + reason => Reason + }), + 0 + end + catch + E:R -> + ?SLOG(error, #{ + msg => "obtain_cert_expiry_time_failed", + error => E, + reason => R, + path => Path0, + resolved_path => Path + }), 0 end. From 5f125047f583c9a9db69328677f840de385406b1 Mon Sep 17 00:00:00 2001 From: JimMoen Date: Wed, 28 Feb 2024 14:50:09 +0800 Subject: [PATCH 3/5] refactor: remove unnecessary lists fold for `cerfile` --- apps/emqx_prometheus/src/emqx_prometheus.erl | 38 ++++++++------------ 1 file changed, 15 insertions(+), 23 deletions(-) diff --git a/apps/emqx_prometheus/src/emqx_prometheus.erl b/apps/emqx_prometheus/src/emqx_prometheus.erl index 8b34cc9eb..399232c40 100644 --- a/apps/emqx_prometheus/src/emqx_prometheus.erl +++ b/apps/emqx_prometheus/src/emqx_prometheus.erl @@ -830,8 +830,7 @@ cert_data(undefined) -> cert_data(AllListeners) -> Points = lists:foldl( fun(ListenerType, PointsAcc) -> - PointsAcc ++ - points_of_listeners(ListenerType, AllListeners) + lists:append(PointsAcc, points_of_listeners(ListenerType, AllListeners)) end, _PointsInitAcc = [], ?LISTENER_TYPES @@ -843,41 +842,34 @@ cert_data(AllListeners) -> points_of_listeners(Type, AllListeners) -> do_points_of_listeners(Type, maps:get(Type, AllListeners, undefined)). --define(CERT_TYPES, [certfile]). - --spec do_points_of_listeners(Type, TypeOfListeners) -> +-spec do_points_of_listeners(Type, Listeners) -> [_Point :: {[{LabelKey, LabelValue}], Epoch}] when Type :: ssl | wss | quic, - TypeOfListeners :: #{ListenerName :: atom() => ListenerConf :: map()} | undefined, + Listeners :: #{ListenerName :: atom() => ListenerConf :: map()} | undefined, LabelKey :: atom(), LabelValue :: atom(), Epoch :: non_neg_integer(). do_points_of_listeners(_, undefined) -> []; -do_points_of_listeners(ListenerType, TypeOfListeners) -> +do_points_of_listeners(Type, Listeners) -> lists:foldl( fun(Name, PointsAcc) -> - lists:foldl( - fun(CertType, AccIn) -> - case - emqx_utils_maps:deep_get( - [Name, ssl_options, CertType], TypeOfListeners, undefined - ) - of - undefined -> AccIn; - Path -> [gen_point(ListenerType, Name, Path) | AccIn] - end - end, - [], - ?CERT_TYPES - ) ++ PointsAcc + case + emqx_utils_maps:deep_get( + [Name, ssl_options, certfile], Listeners, undefined + ) + of + undefined -> PointsAcc; + Path -> [gen_point_cert_expiry_at(Type, Name, Path) | PointsAcc] + end end, [], - maps:keys(TypeOfListeners) + %% listener names + maps:keys(Listeners) ). -gen_point(Type, Name, Path) -> +gen_point_cert_expiry_at(Type, Name, Path) -> {[{listener_type, Type}, {listener_name, Name}], cert_expiry_at_from_path(Path)}. %% TODO: cert manager for more generic utils functions From 0e12503a8d69cc1c2a58ab4d182c82bd3cfdc44e Mon Sep 17 00:00:00 2001 From: JimMoen Date: Wed, 28 Feb 2024 15:25:54 +0800 Subject: [PATCH 4/5] chore: add fix change log --- changes/ce/fix-12606.en.md | 1 + 1 file changed, 1 insertion(+) create mode 100644 changes/ce/fix-12606.en.md diff --git a/changes/ce/fix-12606.en.md b/changes/ce/fix-12606.en.md new file mode 100644 index 000000000..ab928848d --- /dev/null +++ b/changes/ce/fix-12606.en.md @@ -0,0 +1 @@ +Fix the issue of the endpoint `/prometheus/stats` crashing when the listener's cert file is unreadable. From 6fd04e33f556b94aa10e104d46869bf02f65d2a0 Mon Sep 17 00:00:00 2001 From: JimMoen Date: Wed, 28 Feb 2024 15:37:48 +0800 Subject: [PATCH 5/5] fix(prom): skip cert info for disabled ssl/wss/quic listeners --- apps/emqx_prometheus/src/emqx_prometheus.erl | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/apps/emqx_prometheus/src/emqx_prometheus.erl b/apps/emqx_prometheus/src/emqx_prometheus.erl index 399232c40..3297d12e3 100644 --- a/apps/emqx_prometheus/src/emqx_prometheus.erl +++ b/apps/emqx_prometheus/src/emqx_prometheus.erl @@ -856,10 +856,12 @@ do_points_of_listeners(Type, Listeners) -> lists:foldl( fun(Name, PointsAcc) -> case - emqx_utils_maps:deep_get( - [Name, ssl_options, certfile], Listeners, undefined - ) + emqx_utils_maps:deep_get([Name, enable], Listeners, false) andalso + emqx_utils_maps:deep_get( + [Name, ssl_options, certfile], Listeners, undefined + ) of + false -> PointsAcc; undefined -> PointsAcc; Path -> [gen_point_cert_expiry_at(Type, Name, Path) | PointsAcc] end @@ -894,11 +896,12 @@ cert_expiry_at_from_path(Path0) -> 0 end catch - E:R -> + E:R:S -> ?SLOG(error, #{ msg => "obtain_cert_expiry_time_failed", error => E, reason => R, + stacktrace => S, path => Path0, resolved_path => Path }),