Merge pull request #7766 from thalesmg/bugfix-telemetry-bridge

fix: avoid crashing telemetry if bridge app is not ready
This commit is contained in:
Thales Macedo Garitezi 2022-04-26 09:03:29 -03:00 committed by GitHub
commit f8f97d39d3
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 43 additions and 21 deletions

View File

@ -431,23 +431,30 @@ if_only_to_toggle_enable(OldConf, Conf) ->
}
} when BridgeType :: atom().
get_basic_usage_info() ->
lists:foldl(
fun(#{resource_data := #{config := #{enable := false}}}, Acc) ->
Acc;
(#{type := BridgeType}, Acc) ->
NumBridges = maps:get(num_bridges, Acc),
CountByType0 = maps:get(count_by_type, Acc),
CountByType = maps:update_with(
binary_to_atom(BridgeType, utf8),
fun(X) -> X + 1 end,
1,
CountByType0),
Acc#{ num_bridges => NumBridges + 1
, count_by_type => CountByType
}
end,
#{num_bridges => 0, count_by_type => #{}},
list()).
InitialAcc = #{num_bridges => 0, count_by_type => #{}},
try
lists:foldl(
fun(#{resource_data := #{config := #{enable := false}}}, Acc) ->
Acc;
(#{type := BridgeType}, Acc) ->
NumBridges = maps:get(num_bridges, Acc),
CountByType0 = maps:get(count_by_type, Acc),
CountByType = maps:update_with(
binary_to_atom(BridgeType, utf8),
fun(X) -> X + 1 end,
1,
CountByType0),
Acc#{ num_bridges => NumBridges + 1
, count_by_type => CountByType
}
end,
InitialAcc,
list())
catch
%% for instance, when the bridge app is not ready yet.
_:_ ->
InitialAcc
end.
bin(Bin) when is_binary(Bin) -> Bin;
bin(Str) when is_list(Str) -> list_to_binary(Str);

View File

@ -145,9 +145,13 @@ init(_Opts) ->
{NodeUUID, ClusterUUID} = ensure_uuids(),
{ok, State0#state{node_uuid = NodeUUID, cluster_uuid = ClusterUUID}}.
handle_call(enable, _From, State0) ->
State = report_telemetry(State0),
{reply, ok, ensure_report_timer(State)};
handle_call(enable, _From, State) ->
%% Wait a few moments before reporting the first telemetry, as the
%% apps might still be starting up. Also, this avoids hanging
%% `emqx_modules_app' initialization in case the POST request
%% takes a lot of time.
FirstReportTimeoutMS = timer:seconds(10),
{reply, ok, ensure_report_timer(FirstReportTimeoutMS, State)};
handle_call(disable, _From, State = #state{timer = Timer}) ->
emqx_misc:cancel_timer(Timer),
{reply, ok, State#state{timer = undefined}};
@ -194,6 +198,9 @@ official_version(Version) ->
match =:= re:run(Version, Pt, [{capture, none}]).
ensure_report_timer(State = #state{report_interval = ReportInterval}) ->
ensure_report_timer(ReportInterval, State).
ensure_report_timer(ReportInterval, State) ->
State#state{timer = emqx_misc:start_timer(ReportInterval, time_to_report_telemetry_data)}.
os_info() ->

View File

@ -367,7 +367,15 @@ t_send_after_enable(_) ->
ok = snabbkaffe:start_trace(),
try
ok = emqx_telemetry:enable(),
?assertMatch({ok, _}, ?block_until(#{?snk_kind := telemetry_data_reported}, 2000, 100)),
Timeout = 12_000,
?assertMatch(
{ok, _},
?wait_async_action(
ok = emqx_telemetry:enable(),
#{?snk_kind := telemetry_data_reported},
Timeout
)
),
receive
{request, post, _URL, _Headers, Body} ->
{ok, Decoded} = emqx_json:safe_decode(Body, [return_maps]),