From db83457d13a50c62f1b2465caa8c57ff4dff75a4 Mon Sep 17 00:00:00 2001 From: Thales Macedo Garitezi Date: Wed, 22 Nov 2023 16:02:23 -0300 Subject: [PATCH] test: fix flaky test The cause was that the call `sys:terminate/2` was timing out... `exit/2` doens't always work: ``` 2023-11-22 19:14:40.974 killed async workers Error: -22T19:14:40.974563+00:00 [error] crasher: initial call: gun:proc_lib_hack/5, pid: <0.15908.7>, registered_name: [], exit: {{{owner_gone,killed},[{gun,owner_gone,1,[{file,"gun.erl"},{line,970}]},{gun,proc_lib_hack,5,[{file,"gun.erl"},{line,649}]},{proc_lib,init_p_do_apply,3,[{file,"proc_lib.erl"},{line,240}]}]},[{gun,proc_lib_hack,5,[{file,"gun.erl"},{line,654}]},{proc_lib,init_p_do_apply,3,[{file,"proc_lib.erl"},{line,240}]}]}, ancestors: [gun_sup,<0.15387.7>], message_queue_len: 0, messages: [], links: [<0.15388.7>], dictionary: [], trap_exit: false, status: running, heap_size: 987, stack_size: 28, reductions: 1822; neighbours: Error: -22T19:14:40.998051+00:00 [error] Supervisor: {local,gun_sup}. Context: child_terminated. Reason: {{owner_gone,killed},[{gun,owner_gone,1,[{file,"gun.erl"},{line,970}]},{gun,proc_lib_hack,5,[{file,"gun.erl"},{line,649}]},{proc_lib,init_p_do_apply,3,[{file,"proc_lib.erl"},{line,240}]}]}. Offender: id=gun,pid=<0.15908.7>. 2023-11-22T19:15:41.088752+00:00 [critical] Run stage failed: error:{badmatch,{timeout,#{expected_remaining => 1,mailbox => {messages,[]},msgs_so_far => []}}}, Stacktrace: [{emqx_bridge_gcp_pubsub_consumer_SUITE,'-t_async_worker_death_mid_pull/1-fun-17-',3,[{file,"/emqx/apps/emqx_bridge_gcp_pubsub/test/emqx_bridge_gcp_pubsub_consumer_SUITE.erl"},{line,1576}]},{emqx_bridge_gcp_pubsub_consumer_SUITE,t_async_worker_death_mid_pull,1,[{file,"/emqx/apps/emqx_bridge_gcp_pubsub/test/emqx_bridge_gcp_pubsub_consumer_SUITE.erl"},{line,1505}]}], Trace dump: "/emqx/_build/test/logs/ct_run.test@127.0.0.1.2023-11-22_19.14.27/snabbkaffe/1700680540975786370.log", mfa: undefined Error: -22T19:15:46.095702+00:00 [error] crasher: initial call: gun:proc_lib_hack/5, pid: <0.15934.7>, registered_name: [], exit: {{{owner_gone,killed},[{gun,owner_gone,1,[{file,"gun.erl"},{line,970}]},{gun,proc_lib_hack,5,[{file,"gun.erl"},{line,649}]},{proc_lib,init_p_do_apply,3,[{file,"proc_lib.erl"},{line,240}]}]},[{gun,proc_lib_hack,5,[{file,"gun.erl"},{line,654}]},{proc_lib,init_p_do_apply,3,[{file,"proc_lib.erl"},{line,240}]}]}, ancestors: [gun_sup,<0.15387.7>], message_queue_len: 0, messages: [], links: [<0.15388.7>], dictionary: [], trap_exit: false, status: running, heap_size: 610, stack_size: 28, reductions: 1471; neighbours: Error: -22T19:15:46.095192+00:00 [error] Supervisor: {local,ehttpc_sup}. Context: shutdown_error. Reason: killed. Offender: id={ehttpc_pool_sup,<<98,114,105,100,103,101,58,103,99,112,95,112,117,98,115,117,98,95,99,111,110,115,117,109,101,114,58,116,95,97,115,121,110,99,95,119,111,114,107,101,114,95,100,101,97,116,104,95,109,105,100,95,112,117,108,108,45,53,55,54,52,54,48,55,53,50,51,48,51,52,50,50,55,53,49>>},pid=<0.15903.7>. Error: -22T19:15:46.095470+00:00 [error] Supervisor: {<0.15906.7>,ehttpc_worker_sup}. Context: shutdown_error. Reason: killed. Offender: id={worker,1},pid=<0.15924.7>. Error: -22T19:15:46.096762+00:00 [error] Supervisor: {local,gun_sup}. Context: child_terminated. Reason: {{owner_gone,killed},[{gun,owner_gone,1,[{file,"gun.erl"},{line,970}]},{gun,proc_lib_hack,5,[{file,"gun.erl"},{line,649}]},{proc_lib,init_p_do_apply,3,[{file,"proc_lib.erl"},{line,240}]}]}. Offender: id=gun,pid=<0.15934.7>. Warning: 2T19:15:46.098278+00:00 [warning] msg: remove_local_resource_failed, mfa: emqx_resource:remove_local/1(362), error: {error,timeout}, resource_id: <<"bridge:gcp_pubsub_consumer:t_async_worker_death_mid_pull-576460752303422751">> Error: -22T19:15:46.149090+00:00 [error] Generic server <0.15904.7> terminating. Reason: killed. Last message: {'EXIT',<0.15903.7>,killed}. State: {state,<<"bridge:gcp_pubsub_consumer:t_async_worker_death_mid_pull-576460752303422751">>,1,random}. Error: -22T19:15:46.149525+00:00 [error] crasher: initial call: ehttpc_pool:init/1, pid: <0.15904.7>, registered_name: [], exit: {killed,[{gen_server,decode_msg,9,[{file,"gen_server.erl"},{line,909}]},{proc_lib,init_p_do_apply,3,[{file,"proc_lib.erl"},{line,240}]}]}, ancestors: [<0.15903.7>,ehttpc_sup,<0.15731.7>], message_queue_len: 0, messages: [], links: [], dictionary: [], trap_exit: true, status: running, heap_size: 376, stack_size: 28, reductions: 3428; neighbours: ``` --- .../emqx_bridge_gcp_pubsub_consumer_SUITE.erl | 30 +++++++++++-------- 1 file changed, 18 insertions(+), 12 deletions(-) diff --git a/apps/emqx_bridge_gcp_pubsub/test/emqx_bridge_gcp_pubsub_consumer_SUITE.erl b/apps/emqx_bridge_gcp_pubsub/test/emqx_bridge_gcp_pubsub_consumer_SUITE.erl index 5e1e885db..b0e4e4ac8 100644 --- a/apps/emqx_bridge_gcp_pubsub/test/emqx_bridge_gcp_pubsub_consumer_SUITE.erl +++ b/apps/emqx_bridge_gcp_pubsub/test/emqx_bridge_gcp_pubsub_consumer_SUITE.erl @@ -34,16 +34,22 @@ init_per_suite(Config) -> emqx_common_test_helpers:reset_proxy(ProxyHost, ProxyPort), case emqx_common_test_helpers:is_tcp_server_available(GCPEmulatorHost, GCPEmulatorPort) of true -> - ok = emqx_common_test_helpers:start_apps([emqx_conf]), - ok = emqx_connector_test_helpers:start_apps([ - emqx_resource, emqx_bridge, emqx_rule_engine - ]), - {ok, _} = application:ensure_all_started(emqx_connector), + Apps = emqx_cth_suite:start( + [ + emqx, + emqx_conf, + emqx_bridge_gcp_pubsub, + emqx_bridge, + emqx_rule_engine + ], + #{work_dir => emqx_cth_suite:work_dir(Config)} + ), emqx_mgmt_api_test_util:init_suite(), HostPort = GCPEmulatorHost ++ ":" ++ GCPEmulatorPortStr, true = os:putenv("PUBSUB_EMULATOR_HOST", HostPort), Client = start_control_client(), [ + {apps, Apps}, {proxy_name, ProxyName}, {proxy_host, ProxyHost}, {proxy_port, ProxyPort}, @@ -62,12 +68,11 @@ init_per_suite(Config) -> end. end_per_suite(Config) -> + Apps = ?config(apps, Config), Client = ?config(client, Config), stop_control_client(Client), emqx_mgmt_api_test_util:end_suite(), - ok = emqx_common_test_helpers:stop_apps([emqx_conf]), - ok = emqx_connector_test_helpers:stop_apps([emqx_bridge, emqx_resource, emqx_rule_engine]), - _ = application:stop(emqx_connector), + emqx_cth_suite:stop(Apps), os:unsetenv("PUBSUB_EMULATOR_HOST"), ok. @@ -1514,7 +1519,7 @@ t_async_worker_death_mid_pull(Config) -> #{?snk_kind := gcp_pubsub_consumer_worker_reply_delegator} ), spawn_link(fun() -> - ct:pal("will kill async worker"), + ct:pal("will kill async workers"), ?tp_span( kill_async_worker, #{}, @@ -1530,11 +1535,12 @@ t_async_worker_death_mid_pull(Config) -> fun(AsyncWorkerPid) -> Ref = monitor(process, AsyncWorkerPid), ct:pal("killing pid ~p", [AsyncWorkerPid]), - sys:terminate(AsyncWorkerPid, die), + sys:terminate(AsyncWorkerPid, die, 20_000), receive {'DOWN', Ref, process, AsyncWorkerPid, _} -> + ct:pal("killed pid ~p", [AsyncWorkerPid]), ok - after 500 -> ct:fail("async worker didn't die") + after 500 -> ct:fail("async worker ~p didn't die", [AsyncWorkerPid]) end, ok end, @@ -1544,7 +1550,7 @@ t_async_worker_death_mid_pull(Config) -> ok end ), - ct:pal("killed async worker") + ct:pal("killed async workers") end), ?assertMatch(