Merge pull request #7766 from thalesmg/bugfix-telemetry-bridge

fix: avoid crashing telemetry if bridge app is not ready
This commit is contained in:
Thales Macedo Garitezi 2022-04-26 09:03:29 -03:00 committed by GitHub
commit f8f97d39d3
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 43 additions and 21 deletions

View File

@ -431,23 +431,30 @@ if_only_to_toggle_enable(OldConf, Conf) ->
} }
} when BridgeType :: atom(). } when BridgeType :: atom().
get_basic_usage_info() -> get_basic_usage_info() ->
lists:foldl( InitialAcc = #{num_bridges => 0, count_by_type => #{}},
fun(#{resource_data := #{config := #{enable := false}}}, Acc) -> try
Acc; lists:foldl(
(#{type := BridgeType}, Acc) -> fun(#{resource_data := #{config := #{enable := false}}}, Acc) ->
NumBridges = maps:get(num_bridges, Acc), Acc;
CountByType0 = maps:get(count_by_type, Acc), (#{type := BridgeType}, Acc) ->
CountByType = maps:update_with( NumBridges = maps:get(num_bridges, Acc),
binary_to_atom(BridgeType, utf8), CountByType0 = maps:get(count_by_type, Acc),
fun(X) -> X + 1 end, CountByType = maps:update_with(
1, binary_to_atom(BridgeType, utf8),
CountByType0), fun(X) -> X + 1 end,
Acc#{ num_bridges => NumBridges + 1 1,
, count_by_type => CountByType CountByType0),
} Acc#{ num_bridges => NumBridges + 1
end, , count_by_type => CountByType
#{num_bridges => 0, count_by_type => #{}}, }
list()). end,
InitialAcc,
list())
catch
%% for instance, when the bridge app is not ready yet.
_:_ ->
InitialAcc
end.
bin(Bin) when is_binary(Bin) -> Bin; bin(Bin) when is_binary(Bin) -> Bin;
bin(Str) when is_list(Str) -> list_to_binary(Str); bin(Str) when is_list(Str) -> list_to_binary(Str);

View File

@ -145,9 +145,13 @@ init(_Opts) ->
{NodeUUID, ClusterUUID} = ensure_uuids(), {NodeUUID, ClusterUUID} = ensure_uuids(),
{ok, State0#state{node_uuid = NodeUUID, cluster_uuid = ClusterUUID}}. {ok, State0#state{node_uuid = NodeUUID, cluster_uuid = ClusterUUID}}.
handle_call(enable, _From, State0) -> handle_call(enable, _From, State) ->
State = report_telemetry(State0), %% Wait a few moments before reporting the first telemetry, as the
{reply, ok, ensure_report_timer(State)}; %% apps might still be starting up. Also, this avoids hanging
%% `emqx_modules_app' initialization in case the POST request
%% takes a lot of time.
FirstReportTimeoutMS = timer:seconds(10),
{reply, ok, ensure_report_timer(FirstReportTimeoutMS, State)};
handle_call(disable, _From, State = #state{timer = Timer}) -> handle_call(disable, _From, State = #state{timer = Timer}) ->
emqx_misc:cancel_timer(Timer), emqx_misc:cancel_timer(Timer),
{reply, ok, State#state{timer = undefined}}; {reply, ok, State#state{timer = undefined}};
@ -194,6 +198,9 @@ official_version(Version) ->
match =:= re:run(Version, Pt, [{capture, none}]). match =:= re:run(Version, Pt, [{capture, none}]).
ensure_report_timer(State = #state{report_interval = ReportInterval}) -> ensure_report_timer(State = #state{report_interval = ReportInterval}) ->
ensure_report_timer(ReportInterval, State).
ensure_report_timer(ReportInterval, State) ->
State#state{timer = emqx_misc:start_timer(ReportInterval, time_to_report_telemetry_data)}. State#state{timer = emqx_misc:start_timer(ReportInterval, time_to_report_telemetry_data)}.
os_info() -> os_info() ->

View File

@ -367,7 +367,15 @@ t_send_after_enable(_) ->
ok = snabbkaffe:start_trace(), ok = snabbkaffe:start_trace(),
try try
ok = emqx_telemetry:enable(), ok = emqx_telemetry:enable(),
?assertMatch({ok, _}, ?block_until(#{?snk_kind := telemetry_data_reported}, 2000, 100)), Timeout = 12_000,
?assertMatch(
{ok, _},
?wait_async_action(
ok = emqx_telemetry:enable(),
#{?snk_kind := telemetry_data_reported},
Timeout
)
),
receive receive
{request, post, _URL, _Headers, Body} -> {request, post, _URL, _Headers, Body} ->
{ok, Decoded} = emqx_json:safe_decode(Body, [return_maps]), {ok, Decoded} = emqx_json:safe_decode(Body, [return_maps]),