Merge pull request #12047 from thalesmg/test-more-flaky-tests-r53-20231128

test(flaky): more adjustments
This commit is contained in:
zhongwencool 2023-11-30 09:33:55 +08:00 committed by GitHub
commit 773bd8ecb2
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 64 additions and 35 deletions

View File

@ -237,7 +237,10 @@ handle_continue(?patch_subscription, State0) ->
),
{noreply, State0};
error ->
%% retry
%% retry; add a random delay for the case where multiple workers step on each
%% other's toes before retrying.
RandomMS = rand:uniform(500),
timer:sleep(RandomMS),
{noreply, State0, {continue, ?patch_subscription}}
end.
@ -478,7 +481,6 @@ do_pull_async(State0) ->
Body = body(State0, pull),
PreparedRequest = {prepared_request, {Method, Path, Body}},
ReplyFunAndArgs = {fun ?MODULE:reply_delegator/4, [self(), pull_async, InstanceId]},
%% `ehttpc_pool'/`gproc_pool' might return `false' if there are no workers...
Res = emqx_bridge_gcp_pubsub_client:query_async(
PreparedRequest,
ReplyFunAndArgs,

View File

@ -196,7 +196,7 @@ consumer_config(TestCase, Config) ->
" connect_timeout = \"5s\"\n"
" service_account_json = ~s\n"
" consumer {\n"
" ack_deadline = \"60s\"\n"
" ack_deadline = \"10s\"\n"
" ack_retry_interval = \"1s\"\n"
" pull_max_messages = 10\n"
" consumer_workers_per_topic = 1\n"
@ -512,10 +512,23 @@ wait_acked(Opts) ->
%% no need to check return value; we check the property in
%% the check phase. this is just to give it a chance to do
%% so and avoid flakiness. should be fast.
snabbkaffe:block_until(
Res = snabbkaffe:block_until(
?match_n_events(N, #{?snk_kind := gcp_pubsub_consumer_worker_acknowledged}),
Timeout
),
case Res of
{ok, _} ->
ok;
{timeout, Evts} ->
%% Fixme: apparently, snabbkaffe may timeout but still return the expected
%% events here.
case length(Evts) >= N of
true ->
ok;
false ->
ct:pal("timed out waiting for acks;\n expected: ~b\n received:\n ~p", [N, Evts])
end
end,
ok.
wait_forgotten() ->
@ -652,25 +665,24 @@ setup_and_start_listeners(Node, NodeOpts) ->
end
).
dedup([]) ->
[];
dedup([X]) ->
[X];
dedup([X | Rest]) ->
[X | dedup(X, Rest)].
dedup(X, [X | Rest]) ->
dedup(X, Rest);
dedup(_X, [Y | Rest]) ->
[Y | dedup(Y, Rest)];
dedup(_X, []) ->
[].
%%------------------------------------------------------------------------------
%% Trace properties
%%------------------------------------------------------------------------------
prop_pulled_only_once() ->
{"all pulled message ids are unique", fun ?MODULE:prop_pulled_only_once/1}.
prop_pulled_only_once(Trace) ->
PulledIds =
[
MsgId
|| #{messages := Msgs} <- ?of_kind(gcp_pubsub_consumer_worker_decoded_messages, Trace),
#{<<"message">> := #{<<"messageId">> := MsgId}} <- Msgs
],
NumPulled = length(PulledIds),
UniquePulledIds = sets:from_list(PulledIds, [{version, 2}]),
UniqueNumPulled = sets:size(UniquePulledIds),
?assertEqual(UniqueNumPulled, NumPulled, #{pulled_ids => PulledIds}),
ok.
prop_handled_only_once() ->
{"all pulled message are processed only once", fun ?MODULE:prop_handled_only_once/1}.
prop_handled_only_once(Trace) ->
@ -1046,7 +1058,6 @@ t_consume_ok(Config) ->
end,
[
prop_all_pulled_are_acked(),
prop_pulled_only_once(),
prop_handled_only_once(),
prop_acked_ids_eventually_forgotten()
]
@ -1119,7 +1130,6 @@ t_bridge_rule_action_source(Config) ->
#{payload => Payload0}
end,
[
prop_pulled_only_once(),
prop_handled_only_once()
]
),
@ -1237,7 +1247,6 @@ t_multiple_topic_mappings(Config) ->
end,
[
prop_all_pulled_are_acked(),
prop_pulled_only_once(),
prop_handled_only_once()
]
),
@ -1270,7 +1279,7 @@ t_multiple_pull_workers(Config) ->
},
<<"resource_opts">> => #{
%% reduce flakiness
<<"request_ttl">> => <<"4s">>
<<"request_ttl">> => <<"20s">>
}
}
),
@ -1298,7 +1307,6 @@ t_multiple_pull_workers(Config) ->
end,
[
prop_all_pulled_are_acked(),
prop_pulled_only_once(),
prop_handled_only_once(),
{"message is processed only once", fun(Trace) ->
?assertMatch({timeout, _}, receive_published(#{timeout => 5_000})),
@ -1532,11 +1540,12 @@ t_async_worker_death_mid_pull(Config) ->
ct:pal("published message"),
AsyncWorkerPids = get_async_worker_pids(Config),
Timeout = 20_000,
emqx_utils:pmap(
fun(AsyncWorkerPid) ->
Ref = monitor(process, AsyncWorkerPid),
ct:pal("killing pid ~p", [AsyncWorkerPid]),
sys:terminate(AsyncWorkerPid, die, 20_000),
exit(AsyncWorkerPid, kill),
receive
{'DOWN', Ref, process, AsyncWorkerPid, _} ->
ct:pal("killed pid ~p", [AsyncWorkerPid]),
@ -1545,7 +1554,8 @@ t_async_worker_death_mid_pull(Config) ->
end,
ok
end,
AsyncWorkerPids
AsyncWorkerPids,
Timeout + 2_000
),
ok
@ -1559,7 +1569,13 @@ t_async_worker_death_mid_pull(Config) ->
?wait_async_action(
create_bridge(
Config,
#{<<"pool_size">> => 1}
#{
<<"pool_size">> => 1,
<<"consumer">> => #{
<<"ack_deadline">> => <<"10s">>,
<<"ack_retry_interval">> => <<"1s">>
}
}
),
#{?snk_kind := gcp_pubsub_consumer_worker_init},
10_000
@ -1591,18 +1607,19 @@ t_async_worker_death_mid_pull(Config) ->
],
Trace
),
SubTraceEvts = ?projection(?snk_kind, SubTrace),
?assertMatch(
[
#{?snk_kind := gcp_pubsub_consumer_worker_handled_async_worker_down},
#{?snk_kind := gcp_pubsub_consumer_worker_reply_delegator}
gcp_pubsub_consumer_worker_handled_async_worker_down,
gcp_pubsub_consumer_worker_reply_delegator
| _
],
SubTrace,
dedup(SubTraceEvts),
#{sub_trace => projection_optional_span(SubTrace)}
),
?assertMatch(
#{?snk_kind := gcp_pubsub_consumer_worker_pull_response_received},
lists:last(SubTrace)
gcp_pubsub_consumer_worker_pull_response_received,
lists:last(SubTraceEvts)
),
ok
end
@ -1934,7 +1951,6 @@ t_connection_down_during_ack(Config) ->
end,
[
prop_all_pulled_are_acked(),
prop_pulled_only_once(),
prop_handled_only_once(),
{"message is processed only once", fun(Trace) ->
?assertMatch({timeout, _}, receive_published(#{timeout => 5_000})),
@ -1959,7 +1975,15 @@ t_connection_down_during_ack_redeliver(Config) ->
?wait_async_action(
create_bridge(
Config,
#{<<"consumer">> => #{<<"ack_deadline">> => <<"10s">>}}
#{
<<"consumer">> => #{
<<"ack_deadline">> => <<"12s">>,
<<"ack_retry_interval">> => <<"1s">>
},
<<"resource_opts">> => #{
<<"request_ttl">> => <<"11s">>
}
}
),
#{?snk_kind := "gcp_pubsub_consumer_worker_subscription_ready"},
10_000
@ -2032,7 +2056,10 @@ t_connection_down_during_pull(Config) ->
?wait_async_action(
create_bridge(
Config,
#{<<"consumer">> => #{<<"ack_retry_interval">> => <<"1s">>}}
#{
<<"consumer">> => #{<<"ack_retry_interval">> => <<"1s">>},
<<"resource_opts">> => #{<<"request_ttl">> => <<"11s">>}
}
),
#{?snk_kind := "gcp_pubsub_consumer_worker_subscription_ready"},
10_000