From 9ff53f4293615a1c868758e7c88692b4bd6ee707 Mon Sep 17 00:00:00 2001 From: Andrew Mayorov Date: Mon, 11 Mar 2024 13:03:13 +0100 Subject: [PATCH 1/4] test(conf): fix flaky config sync testcases Before the recent changes these testcases relied on the fact that peer nodes that are part of the test cluster are started one after the other. This is now not the case in general with `emqx_cth_cluster`, so now we need to additionally find the "longest running node" in the cluster before running test logic. --- apps/emqx_conf/test/emqx_conf_app_SUITE.erl | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/apps/emqx_conf/test/emqx_conf_app_SUITE.erl b/apps/emqx_conf/test/emqx_conf_app_SUITE.erl index 4760cbed3..9687e337d 100644 --- a/apps/emqx_conf/test/emqx_conf_app_SUITE.erl +++ b/apps/emqx_conf/test/emqx_conf_app_SUITE.erl @@ -64,7 +64,8 @@ t_copy_new_data_dir(Config) -> ), %% 1. Start all nodes - [First | Rest] = Nodes = start_cluster(Cluster), + Nodes = start_cluster(Cluster), + [First | Rest] = sort_highest_uptime(Nodes), try NodeDataDir = erpc:call(First, emqx, data_dir, []), File = NodeDataDir ++ "/configs/cluster.hocon", @@ -88,7 +89,8 @@ t_copy_deprecated_data_dir(Config) -> ), %% 1. Start all nodes - [First | Rest] = Nodes = start_cluster(Cluster), + Nodes = start_cluster(Cluster), + [First | Rest] = sort_highest_uptime(Nodes), try NodeDataDir = erpc:call(First, emqx, data_dir, []), File = NodeDataDir ++ "/configs/cluster-override.conf", @@ -246,3 +248,11 @@ cluster(TC, Specs, Config) -> cluster_spec({Type, Num}) -> {Type, list_to_atom(atom_to_list(?MODULE) ++ integer_to_list(Num))}. + +sort_highest_uptime(Nodes) -> + Ranking = lists:sort([{-get_node_uptime(N), N} || N <- Nodes]), + element(2, lists:unzip(Ranking)). + +get_node_uptime(Node) -> + {Milliseconds, _} = erpc:call(Node, erlang, statistics, [wall_clock]), + Milliseconds. From d2f12e03c026e1cd0da32b386d483b3411cbc8e8 Mon Sep 17 00:00:00 2001 From: zmstone Date: Fri, 8 Mar 2024 16:58:17 +0100 Subject: [PATCH 2/4] refactor(nodetool): delete stale code lists:join is now available in all supported OTP versions --- bin/nodetool | 72 ++-------------------------------------------------- 1 file changed, 2 insertions(+), 70 deletions(-) diff --git a/bin/nodetool b/bin/nodetool index eedd1c3c1..e1d45c88d 100755 --- a/bin/nodetool +++ b/bin/nodetool @@ -44,8 +44,6 @@ cleanup_key(Str0) -> do(Args) -> ok = do_with_halt(Args, "mnesia_dir", fun create_mnesia_dir/2), - ok = do_with_halt(Args, "chkconfig", fun("-config", X) -> chkconfig(X) end), - ok = do_with_halt(Args, "chkconfig", fun chkconfig/1), Args1 = do_with_ret( Args, "-name", @@ -185,7 +183,7 @@ do(Args) -> Other -> io:format("Other: ~p~n", [Other]), io:format( - "Usage: nodetool chkconfig|getpid|ping|stop|rpc|rpc_infinity|rpcterms|eval|cold_eval [Terms] [RPC]\n" + "Usage: nodetool getpid|ping|stop|rpc|rpc_infinity|rpcterms|eval|cold_eval [Terms] [RPC]\n" ) end, net_kernel:stop(). @@ -205,11 +203,7 @@ shutdown_status_loop() -> parse_eval_args(Args) -> % shells may process args into more than one, and end up stripping % spaces, so this converts all of that to a single string to parse - String = binary_to_list( - list_to_binary( - join(Args, " ") - ) - ), + String = lists:flatten(lists:join(" ", Args)), % then just as a convenience to users, if they forgot a trailing % '.' add it for them. @@ -309,36 +303,6 @@ create_mnesia_dir(DataDir, NodeName) -> io:format("~s", [MnesiaDir]), halt(0). -chkconfig(File) -> - case file:consult(File) of - {ok, Terms} -> - case validate(Terms) of - ok -> - halt(0); - {error, Problems} -> - lists:foreach(fun print_issue/1, Problems), - %% halt(1) if any problems were errors - halt( - case [x || {error, _} <- Problems] of - [] -> 0; - _ -> 1 - end - ) - end; - {error, {Line, Mod, Term}} -> - io:format( - standard_error, ["Error on line ", file:format_error({Line, Mod, Term}), "\n"], [] - ), - halt(1); - {error, Error} -> - io:format( - standard_error, - ["Error reading config file: ", File, " ", file:format_error(Error), "\n"], - [] - ), - halt(1) - end. - check_license(Config) -> ok = ensure_application_load(emqx_license), %% This checks formal license validity to ensure @@ -379,38 +343,6 @@ consult(Cont, Str, Acc) -> consult(Cont1, eof, Acc) end. -%% -%% Validation functions for checking the app.config -%% -validate([Terms]) -> - Results = [ValidateFun(Terms) || ValidateFun <- get_validation_funs()], - Failures = [Res || Res <- Results, Res /= true], - case Failures of - [] -> - ok; - _ -> - {error, Failures} - end. - -%% Some initial and basic checks for the app.config file -get_validation_funs() -> - []. - -print_issue({warning, Warning}) -> - io:format(standard_error, "Warning in app.config: ~s~n", [Warning]); -print_issue({error, Error}) -> - io:format(standard_error, "Error in app.config: ~s~n", [Error]). - -%% string:join/2 copy; string:join/2 is getting obsoleted -%% and replaced by lists:join/2, but lists:join/2 is too new -%% for version support (only appeared in 19.0) so it cannot be -%% used. Instead we just adopt join/2 locally and hope it works -%% for most unicode use cases anyway. -join([], Sep) when is_list(Sep) -> - []; -join([H | T], Sep) -> - H ++ lists:append([Sep ++ X || X <- T]). - add_libs_dir() -> [_ | _] = RootDir = os:getenv("RUNNER_ROOT_DIR"), CurrentVsn = os:getenv("REL_VSN"), From 1c60eb3d75431fb10e340fd413f9e31abcd68082 Mon Sep 17 00:00:00 2001 From: zmstone Date: Fri, 8 Mar 2024 17:19:40 +0100 Subject: [PATCH 3/4] chore: delete an obsolete comment --- bin/emqx | 1 - 1 file changed, 1 deletion(-) diff --git a/bin/emqx b/bin/emqx index 60251ed5d..1db57d7e8 100755 --- a/bin/emqx +++ b/bin/emqx @@ -529,7 +529,6 @@ else tmp_proto_dist=$(echo -e "$PS_LINE" | $GREP -oE '\s-ekka_proto_dist.*' | awk '{print $2}' || echo 'inet_tcp') SSL_DIST_OPTFILE="$(echo -e "$PS_LINE" | $GREP -oE '\-ssl_dist_optfile\s.+\s' | awk '{print $2}' || true)" tmp_ticktime="$(echo -e "$PS_LINE" | $GREP -oE '\s-kernel\snet_ticktime\s.+\s' | awk '{print $3}' || true)" - # data_dir is actually not needed, but kept anyway tmp_datadir="$(echo -e "$PS_LINE" | $GREP -oE "\-emqx_data_dir.*" | sed -E 's#.+emqx_data_dir[[:blank:]]##g' | sed -E 's#[[:blank:]]--$##g' || true)" ## Make the format like what call_hocon multi_get prints out, but only need 4 args EMQX_BOOT_CONFIGS="node.name=${tmp_nodename}\nnode.cookie=${tmp_cookie}\ncluster.proto_dist=${tmp_proto_dist}\nnode.dist_net_ticktime=$tmp_ticktime\nnode.data_dir=${tmp_datadir}" From 9357e6610a5c8b5f62c7d4d088e6e866c324d27e Mon Sep 17 00:00:00 2001 From: zmstone Date: Wed, 13 Mar 2024 09:43:50 +0100 Subject: [PATCH 4/4] docs: add changelog for PR 12672 --- changes/ce/fix-12672.en.md | 10 ++++++++++ 1 file changed, 10 insertions(+) create mode 100644 changes/ce/fix-12672.en.md diff --git a/changes/ce/fix-12672.en.md b/changes/ce/fix-12672.en.md new file mode 100644 index 000000000..9ed3a3922 --- /dev/null +++ b/changes/ce/fix-12672.en.md @@ -0,0 +1,10 @@ +Load `{data_dir}/configs/cluster.hocon` when generating node boot config. + +Logging related config changes made from the dashboard are persisted in `{data_dir}/configs/cluster.hocon`. +Prior to this change, it only takes `etc/emqx.conf` to generate the boot config (including the logger part), +then `{data_dir}/configs/cluster.hocon` is loaded to reconfigure the logger after boot is complete. + +This late reconfigure may cause some log segment files to be lost. + +Now `{data_dir}/configs/cluster.hocon` and `etc/emqx.conf` are both loaded (`emqx.conf` overlaying on top) +to generate boot config.