Merge pull request #12047 from thalesmg/test-more-flaky-tests-r53-20231128

test(flaky): more adjustments
This commit is contained in:
zhongwencool 2023-11-30 09:33:55 +08:00 committed by GitHub
commit 773bd8ecb2
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 64 additions and 35 deletions

View File

@ -237,7 +237,10 @@ handle_continue(?patch_subscription, State0) ->
), ),
{noreply, State0}; {noreply, State0};
error -> error ->
%% retry %% retry; add a random delay for the case where multiple workers step on each
%% other's toes before retrying.
RandomMS = rand:uniform(500),
timer:sleep(RandomMS),
{noreply, State0, {continue, ?patch_subscription}} {noreply, State0, {continue, ?patch_subscription}}
end. end.
@ -478,7 +481,6 @@ do_pull_async(State0) ->
Body = body(State0, pull), Body = body(State0, pull),
PreparedRequest = {prepared_request, {Method, Path, Body}}, PreparedRequest = {prepared_request, {Method, Path, Body}},
ReplyFunAndArgs = {fun ?MODULE:reply_delegator/4, [self(), pull_async, InstanceId]}, ReplyFunAndArgs = {fun ?MODULE:reply_delegator/4, [self(), pull_async, InstanceId]},
%% `ehttpc_pool'/`gproc_pool' might return `false' if there are no workers...
Res = emqx_bridge_gcp_pubsub_client:query_async( Res = emqx_bridge_gcp_pubsub_client:query_async(
PreparedRequest, PreparedRequest,
ReplyFunAndArgs, ReplyFunAndArgs,

View File

@ -196,7 +196,7 @@ consumer_config(TestCase, Config) ->
" connect_timeout = \"5s\"\n" " connect_timeout = \"5s\"\n"
" service_account_json = ~s\n" " service_account_json = ~s\n"
" consumer {\n" " consumer {\n"
" ack_deadline = \"60s\"\n" " ack_deadline = \"10s\"\n"
" ack_retry_interval = \"1s\"\n" " ack_retry_interval = \"1s\"\n"
" pull_max_messages = 10\n" " pull_max_messages = 10\n"
" consumer_workers_per_topic = 1\n" " consumer_workers_per_topic = 1\n"
@ -512,10 +512,23 @@ wait_acked(Opts) ->
%% no need to check return value; we check the property in %% no need to check return value; we check the property in
%% the check phase. this is just to give it a chance to do %% the check phase. this is just to give it a chance to do
%% so and avoid flakiness. should be fast. %% so and avoid flakiness. should be fast.
snabbkaffe:block_until( Res = snabbkaffe:block_until(
?match_n_events(N, #{?snk_kind := gcp_pubsub_consumer_worker_acknowledged}), ?match_n_events(N, #{?snk_kind := gcp_pubsub_consumer_worker_acknowledged}),
Timeout Timeout
), ),
case Res of
{ok, _} ->
ok;
{timeout, Evts} ->
%% Fixme: apparently, snabbkaffe may timeout but still return the expected
%% events here.
case length(Evts) >= N of
true ->
ok;
false ->
ct:pal("timed out waiting for acks;\n expected: ~b\n received:\n ~p", [N, Evts])
end
end,
ok. ok.
wait_forgotten() -> wait_forgotten() ->
@ -652,25 +665,24 @@ setup_and_start_listeners(Node, NodeOpts) ->
end end
). ).
dedup([]) ->
[];
dedup([X]) ->
[X];
dedup([X | Rest]) ->
[X | dedup(X, Rest)].
dedup(X, [X | Rest]) ->
dedup(X, Rest);
dedup(_X, [Y | Rest]) ->
[Y | dedup(Y, Rest)];
dedup(_X, []) ->
[].
%%------------------------------------------------------------------------------ %%------------------------------------------------------------------------------
%% Trace properties %% Trace properties
%%------------------------------------------------------------------------------ %%------------------------------------------------------------------------------
prop_pulled_only_once() ->
{"all pulled message ids are unique", fun ?MODULE:prop_pulled_only_once/1}.
prop_pulled_only_once(Trace) ->
PulledIds =
[
MsgId
|| #{messages := Msgs} <- ?of_kind(gcp_pubsub_consumer_worker_decoded_messages, Trace),
#{<<"message">> := #{<<"messageId">> := MsgId}} <- Msgs
],
NumPulled = length(PulledIds),
UniquePulledIds = sets:from_list(PulledIds, [{version, 2}]),
UniqueNumPulled = sets:size(UniquePulledIds),
?assertEqual(UniqueNumPulled, NumPulled, #{pulled_ids => PulledIds}),
ok.
prop_handled_only_once() -> prop_handled_only_once() ->
{"all pulled message are processed only once", fun ?MODULE:prop_handled_only_once/1}. {"all pulled message are processed only once", fun ?MODULE:prop_handled_only_once/1}.
prop_handled_only_once(Trace) -> prop_handled_only_once(Trace) ->
@ -1046,7 +1058,6 @@ t_consume_ok(Config) ->
end, end,
[ [
prop_all_pulled_are_acked(), prop_all_pulled_are_acked(),
prop_pulled_only_once(),
prop_handled_only_once(), prop_handled_only_once(),
prop_acked_ids_eventually_forgotten() prop_acked_ids_eventually_forgotten()
] ]
@ -1119,7 +1130,6 @@ t_bridge_rule_action_source(Config) ->
#{payload => Payload0} #{payload => Payload0}
end, end,
[ [
prop_pulled_only_once(),
prop_handled_only_once() prop_handled_only_once()
] ]
), ),
@ -1237,7 +1247,6 @@ t_multiple_topic_mappings(Config) ->
end, end,
[ [
prop_all_pulled_are_acked(), prop_all_pulled_are_acked(),
prop_pulled_only_once(),
prop_handled_only_once() prop_handled_only_once()
] ]
), ),
@ -1270,7 +1279,7 @@ t_multiple_pull_workers(Config) ->
}, },
<<"resource_opts">> => #{ <<"resource_opts">> => #{
%% reduce flakiness %% reduce flakiness
<<"request_ttl">> => <<"4s">> <<"request_ttl">> => <<"20s">>
} }
} }
), ),
@ -1298,7 +1307,6 @@ t_multiple_pull_workers(Config) ->
end, end,
[ [
prop_all_pulled_are_acked(), prop_all_pulled_are_acked(),
prop_pulled_only_once(),
prop_handled_only_once(), prop_handled_only_once(),
{"message is processed only once", fun(Trace) -> {"message is processed only once", fun(Trace) ->
?assertMatch({timeout, _}, receive_published(#{timeout => 5_000})), ?assertMatch({timeout, _}, receive_published(#{timeout => 5_000})),
@ -1532,11 +1540,12 @@ t_async_worker_death_mid_pull(Config) ->
ct:pal("published message"), ct:pal("published message"),
AsyncWorkerPids = get_async_worker_pids(Config), AsyncWorkerPids = get_async_worker_pids(Config),
Timeout = 20_000,
emqx_utils:pmap( emqx_utils:pmap(
fun(AsyncWorkerPid) -> fun(AsyncWorkerPid) ->
Ref = monitor(process, AsyncWorkerPid), Ref = monitor(process, AsyncWorkerPid),
ct:pal("killing pid ~p", [AsyncWorkerPid]), ct:pal("killing pid ~p", [AsyncWorkerPid]),
sys:terminate(AsyncWorkerPid, die, 20_000), exit(AsyncWorkerPid, kill),
receive receive
{'DOWN', Ref, process, AsyncWorkerPid, _} -> {'DOWN', Ref, process, AsyncWorkerPid, _} ->
ct:pal("killed pid ~p", [AsyncWorkerPid]), ct:pal("killed pid ~p", [AsyncWorkerPid]),
@ -1545,7 +1554,8 @@ t_async_worker_death_mid_pull(Config) ->
end, end,
ok ok
end, end,
AsyncWorkerPids AsyncWorkerPids,
Timeout + 2_000
), ),
ok ok
@ -1559,7 +1569,13 @@ t_async_worker_death_mid_pull(Config) ->
?wait_async_action( ?wait_async_action(
create_bridge( create_bridge(
Config, Config,
#{<<"pool_size">> => 1} #{
<<"pool_size">> => 1,
<<"consumer">> => #{
<<"ack_deadline">> => <<"10s">>,
<<"ack_retry_interval">> => <<"1s">>
}
}
), ),
#{?snk_kind := gcp_pubsub_consumer_worker_init}, #{?snk_kind := gcp_pubsub_consumer_worker_init},
10_000 10_000
@ -1591,18 +1607,19 @@ t_async_worker_death_mid_pull(Config) ->
], ],
Trace Trace
), ),
SubTraceEvts = ?projection(?snk_kind, SubTrace),
?assertMatch( ?assertMatch(
[ [
#{?snk_kind := gcp_pubsub_consumer_worker_handled_async_worker_down}, gcp_pubsub_consumer_worker_handled_async_worker_down,
#{?snk_kind := gcp_pubsub_consumer_worker_reply_delegator} gcp_pubsub_consumer_worker_reply_delegator
| _ | _
], ],
SubTrace, dedup(SubTraceEvts),
#{sub_trace => projection_optional_span(SubTrace)} #{sub_trace => projection_optional_span(SubTrace)}
), ),
?assertMatch( ?assertMatch(
#{?snk_kind := gcp_pubsub_consumer_worker_pull_response_received}, gcp_pubsub_consumer_worker_pull_response_received,
lists:last(SubTrace) lists:last(SubTraceEvts)
), ),
ok ok
end end
@ -1934,7 +1951,6 @@ t_connection_down_during_ack(Config) ->
end, end,
[ [
prop_all_pulled_are_acked(), prop_all_pulled_are_acked(),
prop_pulled_only_once(),
prop_handled_only_once(), prop_handled_only_once(),
{"message is processed only once", fun(Trace) -> {"message is processed only once", fun(Trace) ->
?assertMatch({timeout, _}, receive_published(#{timeout => 5_000})), ?assertMatch({timeout, _}, receive_published(#{timeout => 5_000})),
@ -1959,7 +1975,15 @@ t_connection_down_during_ack_redeliver(Config) ->
?wait_async_action( ?wait_async_action(
create_bridge( create_bridge(
Config, Config,
#{<<"consumer">> => #{<<"ack_deadline">> => <<"10s">>}} #{
<<"consumer">> => #{
<<"ack_deadline">> => <<"12s">>,
<<"ack_retry_interval">> => <<"1s">>
},
<<"resource_opts">> => #{
<<"request_ttl">> => <<"11s">>
}
}
), ),
#{?snk_kind := "gcp_pubsub_consumer_worker_subscription_ready"}, #{?snk_kind := "gcp_pubsub_consumer_worker_subscription_ready"},
10_000 10_000
@ -2032,7 +2056,10 @@ t_connection_down_during_pull(Config) ->
?wait_async_action( ?wait_async_action(
create_bridge( create_bridge(
Config, Config,
#{<<"consumer">> => #{<<"ack_retry_interval">> => <<"1s">>}} #{
<<"consumer">> => #{<<"ack_retry_interval">> => <<"1s">>},
<<"resource_opts">> => #{<<"request_ttl">> => <<"11s">>}
}
), ),
#{?snk_kind := "gcp_pubsub_consumer_worker_subscription_ready"}, #{?snk_kind := "gcp_pubsub_consumer_worker_subscription_ready"},
10_000 10_000