From 9de9631d6b63095ade736036b4b94e2d689d9a40 Mon Sep 17 00:00:00 2001 From: Serge Tupchii Date: Tue, 5 Sep 2023 19:29:05 +0300 Subject: [PATCH] fix(nodetool): increase graceful stop timeout, handle and report `{badrpc, timeout}` error --- bin/nodetool | 9 ++++++++- changes/ce/fix-11567.en.md | 3 +++ 2 files changed, 11 insertions(+), 1 deletion(-) create mode 100644 changes/ce/fix-11567.en.md diff --git a/bin/nodetool b/bin/nodetool index a96f5f9fd..511ef8e28 100755 --- a/bin/nodetool +++ b/bin/nodetool @@ -8,6 +8,8 @@ %% ------------------------------------------------------------------- -mode(compile). +-define(SHUTDOWN_TIMEOUT_MS, 120_000). + main(Args) -> case os:type() of {win32, nt} -> ok; @@ -85,9 +87,14 @@ do(Args) -> %% a "pong" io:format("pong\n"); ["stop"] -> - case rpc:call(TargetNode, emqx_machine, graceful_shutdown, [], 60000) of + case rpc:call(TargetNode, emqx_machine, graceful_shutdown, [], ?SHUTDOWN_TIMEOUT_MS) of ok -> ok; + {badrpc, timeout} -> + io:format("EMQX is still shutting down, it failed to stop gracefully " + "within the configured timeout of: ~ps\n", + [erlang:convert_time_unit(?SHUTDOWN_TIMEOUT_MS, millisecond, second)]), + halt(1); {badrpc, nodedown} -> %% nodetool commands are always executed after a ping %% which if the code gets here, it's because the target node diff --git a/changes/ce/fix-11567.en.md b/changes/ce/fix-11567.en.md new file mode 100644 index 000000000..026674f69 --- /dev/null +++ b/changes/ce/fix-11567.en.md @@ -0,0 +1,3 @@ +Improve EMQX graceful shutdown (`emqx stop` command): +- increase timeout from 1 to 2 minutes +- print an error message if EMQX can't stop gracefully within the configured timeout