fix(emqx_opentelemetry): avoid using `application:ensure_all_started/1` for better deadlock safety
This commit is contained in:
parent
8d3f98eff2
commit
2a3f6b749c
|
@ -25,12 +25,7 @@
|
|||
redbug,
|
||||
xmerl,
|
||||
{hocon, load},
|
||||
telemetry,
|
||||
{opentelemetry, load},
|
||||
{opentelemetry_api, load},
|
||||
{opentelemetry_experimental, load},
|
||||
{opentelemetry_api_experimental, load},
|
||||
{opentelemetry_exporter, load}
|
||||
telemetry
|
||||
],
|
||||
%% must always be of type `load'
|
||||
common_business_apps =>
|
||||
|
|
|
@ -50,6 +50,7 @@ start() ->
|
|||
start_sysmon(),
|
||||
configure_shard_transports(),
|
||||
set_mnesia_extra_diagnostic_checks(),
|
||||
emqx_otel_app:configure_otel_deps(),
|
||||
ekka:start(),
|
||||
ok.
|
||||
|
||||
|
|
|
@ -69,9 +69,7 @@ stop_apps() ->
|
|||
?SLOG(notice, #{msg => "stopping_emqx_apps"}),
|
||||
_ = emqx_alarm_handler:unload(),
|
||||
ok = emqx_conf_app:unset_config_loaded(),
|
||||
lists:foreach(fun stop_one_app/1, lists:reverse(sorted_reboot_apps())),
|
||||
%% Mute otel deps application.
|
||||
ok = emqx_otel_app:stop_deps().
|
||||
lists:foreach(fun stop_one_app/1, lists:reverse(sorted_reboot_apps())).
|
||||
|
||||
%% Those port apps are terminated after the main apps
|
||||
%% Don't need to stop when reboot.
|
||||
|
|
|
@ -3,13 +3,13 @@
|
|||
{deps,
|
||||
[{emqx, {path, "../emqx"}}
|
||||
%% trace
|
||||
, {opentelemetry_api, {git_subdir, "https://github.com/emqx/opentelemetry-erlang", {tag, "v1.4.4-emqx"}, "apps/opentelemetry_api"}}
|
||||
, {opentelemetry, {git_subdir, "https://github.com/emqx/opentelemetry-erlang", {tag, "v1.4.4-emqx"}, "apps/opentelemetry"}}
|
||||
%% log metrics
|
||||
, {opentelemetry_experimental, {git_subdir, "https://github.com/emqx/opentelemetry-erlang", {tag, "v1.4.4-emqx"}, "apps/opentelemetry_experimental"}}
|
||||
, {opentelemetry_api_experimental, {git_subdir, "https://github.com/emqx/opentelemetry-erlang", {tag, "v1.4.4-emqx"}, "apps/opentelemetry_api_experimental"}}
|
||||
, {opentelemetry_api, {git_subdir, "https://github.com/emqx/opentelemetry-erlang", {tag, "v1.4.5-emqx"}, "apps/opentelemetry_api"}}
|
||||
, {opentelemetry, {git_subdir, "https://github.com/emqx/opentelemetry-erlang", {tag, "v1.4.5-emqx"}, "apps/opentelemetry"}}
|
||||
%% logs, metrics
|
||||
, {opentelemetry_experimental, {git_subdir, "https://github.com/emqx/opentelemetry-erlang", {tag, "v1.4.5-emqx"}, "apps/opentelemetry_experimental"}}
|
||||
, {opentelemetry_api_experimental, {git_subdir, "https://github.com/emqx/opentelemetry-erlang", {tag, "v1.4.5-emqx"}, "apps/opentelemetry_api_experimental"}}
|
||||
%% export
|
||||
, {opentelemetry_exporter, {git_subdir, "https://github.com/emqx/opentelemetry-erlang", {tag, "v1.4.4-emqx"}, "apps/opentelemetry_exporter"}}
|
||||
, {opentelemetry_exporter, {git_subdir, "https://github.com/emqx/opentelemetry-erlang", {tag, "v1.4.5-emqx"}, "apps/opentelemetry_exporter"}}
|
||||
]}.
|
||||
|
||||
{edoc_opts, [{preprocess, true}]}.
|
||||
|
|
|
@ -8,14 +8,12 @@
|
|||
stdlib,
|
||||
emqx,
|
||||
%% otel metrics depend on emqx_mgmt_cache
|
||||
emqx_management
|
||||
]},
|
||||
{included_applications, [
|
||||
emqx_management,
|
||||
opentelemetry_exporter,
|
||||
opentelemetry,
|
||||
opentelemetry_api,
|
||||
opentelemetry_api_experimental,
|
||||
opentelemetry_experimental,
|
||||
opentelemetry_exporter
|
||||
opentelemetry_api,
|
||||
opentelemetry_api_experimental
|
||||
]},
|
||||
{env, []},
|
||||
{modules, []},
|
||||
|
|
|
@ -19,7 +19,7 @@
|
|||
-behaviour(application).
|
||||
|
||||
-export([start/2, stop/1]).
|
||||
-export([stop_deps/0]).
|
||||
-export([configure_otel_deps/0]).
|
||||
|
||||
start(_StartType, _StartArgs) ->
|
||||
emqx_otel_config:add_handler(),
|
||||
|
@ -33,5 +33,12 @@ stop(_State) ->
|
|||
_ = emqx_otel_config:remove_otel_log_handler(),
|
||||
ok.
|
||||
|
||||
stop_deps() ->
|
||||
emqx_otel_config:stop_all_otel_apps().
|
||||
configure_otel_deps() ->
|
||||
%% default tracer and metrics are started only on demand
|
||||
ok = application:set_env(
|
||||
[
|
||||
{opentelemetry, [{start_default_tracer, false}]},
|
||||
{opentelemetry_experimental, [{start_default_metrics, false}]}
|
||||
],
|
||||
[{persistent, true}]
|
||||
).
|
||||
|
|
|
@ -27,7 +27,6 @@
|
|||
-export([post_config_update/5]).
|
||||
-export([update/1]).
|
||||
-export([add_otel_log_handler/0, remove_otel_log_handler/0]).
|
||||
-export([stop_all_otel_apps/0]).
|
||||
-export([otel_exporter/1]).
|
||||
|
||||
update(Config) ->
|
||||
|
@ -59,7 +58,6 @@ post_config_update(?OPTL, _Req, New, Old, AppEnvs) ->
|
|||
MetricsRes = ensure_otel_metrics(New, Old),
|
||||
LogsRes = ensure_otel_logs(New, Old),
|
||||
TracesRes = ensure_otel_traces(New, Old),
|
||||
_ = maybe_stop_all_otel_apps(New),
|
||||
case {MetricsRes, LogsRes, TracesRes} of
|
||||
{ok, ok, ok} -> ok;
|
||||
Other -> {error, Other}
|
||||
|
@ -67,9 +65,6 @@ post_config_update(?OPTL, _Req, New, Old, AppEnvs) ->
|
|||
post_config_update(_ConfPath, _Req, _NewConf, _OldConf, _AppEnvs) ->
|
||||
ok.
|
||||
|
||||
stop_all_otel_apps() ->
|
||||
stop_all_otel_apps(true).
|
||||
|
||||
add_otel_log_handler() ->
|
||||
ensure_otel_logs(emqx:get_config(?OPTL), #{}).
|
||||
|
||||
|
@ -104,7 +99,6 @@ ensure_otel_logs(#{logs := LogsConf}, #{logs := LogsConf}) ->
|
|||
ok;
|
||||
ensure_otel_logs(#{logs := #{enable := true} = LogsConf}, _OldConf) ->
|
||||
ok = remove_handler_if_present(?OTEL_LOG_HANDLER_ID),
|
||||
ok = ensure_log_apps(),
|
||||
HandlerConf = tr_handler_conf(LogsConf),
|
||||
%% NOTE: should primary logger level be updated if it's higher than otel log level?
|
||||
logger:add_handler(?OTEL_LOG_HANDLER_ID, ?OTEL_LOG_HANDLER, HandlerConf);
|
||||
|
@ -126,21 +120,6 @@ remove_handler_if_present(HandlerId) ->
|
|||
ok
|
||||
end.
|
||||
|
||||
ensure_log_apps() ->
|
||||
{ok, _} = application:ensure_all_started(opentelemetry_exporter),
|
||||
{ok, _} = application:ensure_all_started(opentelemetry_experimental),
|
||||
ok.
|
||||
|
||||
maybe_stop_all_otel_apps(#{
|
||||
metrics := #{enable := false},
|
||||
logs := #{enable := false},
|
||||
traces := #{enable := false}
|
||||
}) ->
|
||||
IsShutdown = false,
|
||||
stop_all_otel_apps(IsShutdown);
|
||||
maybe_stop_all_otel_apps(_) ->
|
||||
ok.
|
||||
|
||||
tr_handler_conf(Conf) ->
|
||||
#{
|
||||
level := Level,
|
||||
|
@ -171,18 +150,3 @@ is_ssl(<<"https://", _/binary>> = _Endpoint) ->
|
|||
true;
|
||||
is_ssl(_Endpoint) ->
|
||||
false.
|
||||
|
||||
stop_all_otel_apps(IsShutdown) ->
|
||||
%% if traces were enabled, it's not safe to stop opentelemetry app,
|
||||
%% as there could be not finsihed traces that would crash if spans ETS tables are deleted
|
||||
_ =
|
||||
case IsShutdown of
|
||||
true ->
|
||||
_ = application:stop(opentelemetry);
|
||||
false ->
|
||||
ok
|
||||
end,
|
||||
_ = application:stop(opentelemetry_experimental),
|
||||
_ = application:stop(opentelemetry_experimental_api),
|
||||
_ = application:stop(opentelemetry_exporter),
|
||||
ok.
|
||||
|
|
|
@ -74,10 +74,6 @@ setup(_Conf) ->
|
|||
|
||||
ensure_apps(Conf) ->
|
||||
#{exporter := #{interval := ExporterInterval} = Exporter} = Conf,
|
||||
{ok, _} = application:ensure_all_started(opentelemetry_exporter),
|
||||
{ok, _} = application:ensure_all_started(opentelemetry),
|
||||
{ok, _} = application:ensure_all_started(opentelemetry_experimental),
|
||||
{ok, _} = application:ensure_all_started(opentelemetry_api_experimental),
|
||||
|
||||
_ = opentelemetry_experimental:stop_default_metrics(),
|
||||
ok = application:set_env(
|
||||
|
|
|
@ -78,7 +78,6 @@ start(Conf) ->
|
|||
],
|
||||
set_trace_all(TraceAll),
|
||||
ok = application:set_env([{opentelemetry, OtelEnv}]),
|
||||
_ = application:ensure_all_started(opentelemetry),
|
||||
Res = assert_started(opentelemetry:start_default_tracer_provider()),
|
||||
case Res of
|
||||
ok ->
|
||||
|
|
Loading…
Reference in New Issue