diff --git a/.github/workflows/apps_version_check.yaml b/.github/workflows/apps_version_check.yaml index b787a2afe..7bfabd340 100644 --- a/.github/workflows/apps_version_check.yaml +++ b/.github/workflows/apps_version_check.yaml @@ -19,6 +19,8 @@ jobs: - uses: actions/checkout@v2 with: fetch-depth: 0 # need full history + - name: fix-git-unsafe-repository + run: git config --global --add safe.directory /__w/emqx/emqx - name: Check relup (ce) if: endsWith(github.repository, 'emqx') run: ./scripts/update-appup.sh emqx --check diff --git a/.github/workflows/build_slim_packages.yaml b/.github/workflows/build_slim_packages.yaml index dab02d1ec..af67d3060 100644 --- a/.github/workflows/build_slim_packages.yaml +++ b/.github/workflows/build_slim_packages.yaml @@ -36,6 +36,8 @@ jobs: else echo "EMQX_NAME=emqx" >> $GITHUB_ENV fi + - name: fix-git-unsafe-repository + run: git config --global --add safe.directory /__w/emqx/emqx - name: build zip packages run: make ${EMQX_NAME}-zip - name: build deb/rpm packages diff --git a/.github/workflows/run_cts_tests.yaml b/.github/workflows/run_cts_tests.yaml index 84aa301c4..6b05a014e 100644 --- a/.github/workflows/run_cts_tests.yaml +++ b/.github/workflows/run_cts_tests.yaml @@ -45,6 +45,8 @@ jobs: if make emqx-ee --dry-run > /dev/null 2>&1; then docker exec -i erlang bash -c "echo \"https://ci%40emqx.io:${{ secrets.CI_GIT_TOKEN }}@github.com\" > /root/.git-credentials && git config --global credential.helper store" fi + - name: fix-git-unsafe-repository + run: docker exec -i erlang sh -c "git config --global --add safe.directory /emqx" - name: run test cases run: | docker exec -i erlang sh -c "make ensure-rebar3" @@ -115,6 +117,8 @@ jobs: if make emqx-ee --dry-run > /dev/null 2>&1; then docker exec -i erlang bash -c "echo \"https://ci%40emqx.io:${{ secrets.CI_GIT_TOKEN }}@github.com\" > /root/.git-credentials && git config --global credential.helper store" fi + - name: fix-git-unsafe-repository + run: docker exec -i erlang sh -c "git config --global --add safe.directory /emqx" - name: run test cases run: | printenv | grep "^EMQX_" > .env @@ -197,6 +201,8 @@ jobs: if make emqx-ee --dry-run > /dev/null 2>&1; then docker exec -i erlang bash -c "echo \"https://ci%40emqx.io:${{ secrets.CI_GIT_TOKEN }}@github.com\" > /root/.git-credentials && git config --global credential.helper store" fi + - name: fix-git-unsafe-repository + run: docker exec -i erlang sh -c "git config --global --add safe.directory /emqx" - name: run test cases run: | printenv | grep "^EMQX_" > .env @@ -268,6 +274,8 @@ jobs: if make emqx-ee --dry-run > /dev/null 2>&1; then docker exec -i erlang bash -c "echo \"https://ci%40emqx.io:${{ secrets.CI_GIT_TOKEN }}@github.com\" > /root/.git-credentials && git config --global credential.helper store" fi + - name: fix-git-unsafe-repository + run: docker exec -i erlang sh -c "git config --global --add safe.directory /emqx" - name: run test cases run: | export EMQX_AUTH__PGSQL__USERNAME=root \ @@ -391,6 +399,8 @@ jobs: if make emqx-ee --dry-run > /dev/null 2>&1; then docker exec -i erlang bash -c "echo \"https://ci%40emqx.io:${{ secrets.CI_GIT_TOKEN }}@github.com\" > /root/.git-credentials && git config --global credential.helper store" fi + - name: fix-git-unsafe-repository + run: docker exec -i erlang sh -c "git config --global --add safe.directory /emqx" - name: run test cases run: | export EMQX_AUTH__REIDS__PASSWORD=public diff --git a/CHANGES-4.3.md b/CHANGES-4.3.md index 0a61ca487..3086e7934 100644 --- a/CHANGES-4.3.md +++ b/CHANGES-4.3.md @@ -38,6 +38,7 @@ File format: startup. [#7876] [ekka-158](https://github.com/emqx/ekka/pull/158) * Add regular expression check ^[0-9A-Za-z_\-]+$ for node name [#7979] +* Fix `node_dump` variable sourcing. [#8026] ## v4.3.14 diff --git a/CHANGES-4.4.md b/CHANGES-4.4.md index c62e82032..5b2e134b1 100644 --- a/CHANGES-4.4.md +++ b/CHANGES-4.4.md @@ -1,6 +1,5 @@ # EMQX 4.4 Changes - ## v4.4.4 ### Enhancements (synced from v4.3.15) @@ -17,6 +16,13 @@ * Mnesia auth/acl http api support multiple condition queries. * Inflight QoS1 Messages for shared topics are now redispatched to another alive subscribers upon chosen subscriber session termination. * Make auth metrics name more understandable. +* Allow emqx_management http listener binding to specific interface [#8005] +* Add rule-engine function float2str/2, user can specify the float output precision [#7991] + +### Bug fixes + +- Allow uploading or referencing a backup file outside the + `data/backup` directory when restoring a backup. [#7996] ### Bug fixes (synced from v4.3.15) @@ -24,14 +30,11 @@ * SSL closed error bug fixed for redis client. * Fix mqtt-sn client disconnected due to re-send a duplicated qos2 message * Rule-engine function hexstr2bin/1 support half byte [#7977] -* Add rule-engine function float2str/2, user can specify the float output precision [#7991] - * Improved resilience against autocluster partitioning during cluster startup. [#7876] [ekka-158](https://github.com/emqx/ekka/pull/158) * Add regular expression check ^[0-9A-Za-z_\-]+$ for node name [#7979] -* Allow uploading or referencing a backup file outside the - `data/backup` directory when restoring a backup. [#7996] +* Fix `node_dump` variable sourcing. [#8026] ## v4.4.3 diff --git a/apps/emqx_auth_pgsql/rebar.config b/apps/emqx_auth_pgsql/rebar.config index 7a6aaf411..c351ed13e 100644 --- a/apps/emqx_auth_pgsql/rebar.config +++ b/apps/emqx_auth_pgsql/rebar.config @@ -1,5 +1,5 @@ {deps, - [{epgsql, {git, "https://github.com/emqx/epgsql.git", {tag, "4.6.0"}}} + [ ]}. {erl_opts, [warn_unused_vars, diff --git a/apps/emqx_management/src/emqx_mgmt_data_backup.erl b/apps/emqx_management/src/emqx_mgmt_data_backup.erl index 47b590530..7b31dd8be 100644 --- a/apps/emqx_management/src/emqx_mgmt_data_backup.erl +++ b/apps/emqx_management/src/emqx_mgmt_data_backup.erl @@ -869,26 +869,38 @@ check_json(MaybeJson) -> end. do_import_data(Data, Version) -> - do_import_extra_data(Data, Version), import_resources_and_rules(maps:get(<<"resources">>, Data, []), maps:get(<<"rules">>, Data, []), Version), import_blacklist(maps:get(<<"blacklist">>, Data, [])), import_applications(maps:get(<<"apps">>, Data, [])), import_users(maps:get(<<"users">>, Data, [])), + %% Import modules first to ensure the data of auth_mnesia module can be imported. + %% XXX: In opensource version, can't import if the emqx_auth_mnesia plug-in is not started?? + do_import_enterprise_modules(Data, Version), import_auth_clientid(maps:get(<<"auth_clientid">>, Data, [])), import_auth_username(maps:get(<<"auth_username">>, Data, [])), import_auth_mnesia(maps:get(<<"auth_mnesia">>, Data, [])), - import_acl_mnesia(maps:get(<<"acl_mnesia">>, Data, [])). + import_acl_mnesia(maps:get(<<"acl_mnesia">>, Data, [])), + %% always do extra import at last, to make sure resources are initiated before + %% creating the schemas + do_import_extra_data(Data, Version). -ifdef(EMQX_ENTERPRISE). do_import_extra_data(Data, _Version) -> _ = import_confs(maps:get(<<"configs">>, Data, []), maps:get(<<"listeners_state">>, Data, [])), - _ = import_modules(maps:get(<<"modules">>, Data, [])), _ = import_schemas(maps:get(<<"schemas">>, Data, [])), ok. -else. do_import_extra_data(_Data, _Version) -> ok. -endif. +-ifdef(EMQX_ENTERPRISE). +do_import_enterprise_modules(Data, _Version) -> + _ = import_modules(maps:get(<<"modules">>, Data, [])), + ok. +-else. +do_import_enterprise_modules(_Data, _Version) -> ok. +-endif. + covert_empty_headers([]) -> #{}; covert_empty_headers(Other) -> Other. diff --git a/apps/emqx_management/test/emqx_auth_mnesia_data_export_import_SUITE.erl b/apps/emqx_management/test/emqx_auth_mnesia_data_export_import_SUITE.erl new file mode 100644 index 000000000..021cf735a --- /dev/null +++ b/apps/emqx_management/test/emqx_auth_mnesia_data_export_import_SUITE.erl @@ -0,0 +1,73 @@ +%%-------------------------------------------------------------------- +%% Copyright (c) 2022 EMQ Technologies Co., Ltd. All Rights Reserved. +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. +%%-------------------------------------------------------------------- + +-module(emqx_auth_mnesia_data_export_import_SUITE). + +-compile([export_all, nowarn_export_all]). + +-ifdef(EMQX_ENTERPRISE). + +-include_lib("eunit/include/eunit.hrl"). +-include_lib("emqx_modules/include/emqx_modules.hrl"). + +%%-------------------------------------------------------------------- +%% Setups +%%-------------------------------------------------------------------- +all() -> + emqx_ct:all(?MODULE). + +init_per_suite(Cfg) -> + _ = application:load(emqx_modules_spec), + emqx_ct_helpers:start_apps([emqx_rule_engine, emqx_modules, + emqx_management, emqx_dashboard]), + Cfg. + +end_per_suite(Cfg) -> + emqx_ct_helpers:stop_apps([emqx_dashboard, emqx_management, + emqx_modules, emqx_rule_engine]), + Cfg. + +get_data_path() -> + emqx_ct_helpers:deps_path(emqx_management, "test/emqx_auth_mnesia_data_export_import_SUITE_data/"). + +import(FilePath, _Version) -> + ok = emqx_mgmt_data_backup:import(get_data_path() ++ "/" ++ FilePath, <<"{}">>), + [_] = lists:filter( + fun(#module{type = mnesia_authentication}) -> true; + (_) -> false + end, emqx_modules_registry:get_modules()), + ?assertNotEqual(0, ets:info(emqx_user, size)), + ?assertNotEqual(0, ets:info(emqx_acl, size)). + +%%-------------------------------------------------------------------- +%% Cases +%%-------------------------------------------------------------------- + +t_importee427(_) -> + import("ee427.json", ee427), + {ok, _} = emqx_mgmt_data_backup:export(), + remove_all_users_and_acl(). + +t_importee430(_) -> + import("ee435.json", ee435), + {ok, _} = emqx_mgmt_data_backup:export(), + remove_all_users_and_acl(). + +remove_all_users_and_acl() -> + mnesia:delete_table(emqx_user), + mnesia:delete_table(emqx_acl). + +-endif. diff --git a/apps/emqx_management/test/emqx_auth_mnesia_data_export_import_SUITE_data/ee427.json b/apps/emqx_management/test/emqx_auth_mnesia_data_export_import_SUITE_data/ee427.json new file mode 100644 index 000000000..43d58538a --- /dev/null +++ b/apps/emqx_management/test/emqx_auth_mnesia_data_export_import_SUITE_data/ee427.json @@ -0,0 +1,88 @@ +{ + "version": "4.2", + "date": "2022-05-24 12:09:56", + "modules": [ + { + "id": "module:842a5c57", + "type": "mnesia_authentication", + "config": { + "password_hash": "sha256" + }, + "enabled": true, + "created_at": 1653365372585, + "description": "" + }, + { + "id": "module:e3d38e5a", + "type": "retainer", + "config": { + "storage_type": "ram", + "max_retained_messages": 0, + "max_payload_size": "1MB", + "expiry_interval": 0 + }, + "enabled": true, + "created_at": 1652436434273, + "description": "" + }, + { + "id": "module:4f911150", + "type": "presence", + "config": { + "qos": 0 + }, + "enabled": true, + "created_at": 1652436434273, + "description": "" + }, + { + "id": "module:db11d08f", + "type": "recon", + "config": {}, + "enabled": true, + "created_at": 1652436434273, + "description": "" + } + ], + "rules": [], + "resources": [], + "blacklist": [], + "apps": [ + { + "id": "admin", + "secret": "public", + "name": "Default", + "desc": "Application user", + "status": true, + "expired": "undefined" + } + ], + "users": [ + { + "username": "admin", + "password": "oFu0ZiOAYJmB1DyOzoLwEervBK0=", + "tags": "administrator" + } + ], + "auth_mnesia": [ + { + "login": "usera", + "type": "clientid", + "password": "WVlGsjBiNGJkOWRkN2QyZmYyOWViYjI4MzRiZjQyMDgzNDhkYzJjZmZlOGVjMjUzOGU5NDkwYmYyYjY5N2Q3NjUyMDU=", + "created_at": 1653365382492 + } + ], + "acl_mnesia": [ + { + "type": "clientid", + "type_value": "clientida", + "topic": "t/a", + "action": "pubsub", + "access": "allow", + "created_at": 1653365390351 + } + ], + "schemas": [], + "configs": [], + "listeners_state": [] +} diff --git a/apps/emqx_management/test/emqx_auth_mnesia_data_export_import_SUITE_data/ee435.json b/apps/emqx_management/test/emqx_auth_mnesia_data_export_import_SUITE_data/ee435.json new file mode 100644 index 000000000..e6d9cbb3b --- /dev/null +++ b/apps/emqx_management/test/emqx_auth_mnesia_data_export_import_SUITE_data/ee435.json @@ -0,0 +1,96 @@ +{ + "version": "4.3", + "rules": [], + "resources": [], + "blacklist": [], + "apps": [ + { + "id": "admin", + "secret": "public", + "name": "Default", + "desc": "Application user", + "status": true, + "expired": "undefined" + } + ], + "users": [ + { + "username": "admin", + "password": "02BzoSYaTxkscy2MDtU92EbX7b4=", + "tags": "administrator" + } + ], + "auth_mnesia": [ + { + "login": "usera", + "type": "clientid", + "password": "joYZ7GY2NzcxNTQwMzY4OTRjNWUyMTdmNDlkNmE5Yzc5MDJiNjA5OWRkMWRkZjc5N2E5OGI4YWFlYTdlOWNiMjU5OWE=", + "created_at": 1653360665243 + } + ], + "acl_mnesia": [ + { + "type": "clientid", + "type_value": "clientida", + "topic": "t/a", + "action": "pub", + "access": "allow", + "created_at": 1653360687955 + }, + { + "type": "clientid", + "type_value": "clientida", + "topic": "t/a", + "action": "sub", + "access": "allow", + "created_at": 1653360687955 + } + ], + "modules": [ + { + "id": "module:fcda7532", + "type": "mnesia_authentication", + "config": { + "password_hash": "sha256" + }, + "enabled": true, + "created_at": 1653360656060, + "description": "" + }, + { + "id": "module:db849123", + "type": "retainer", + "config": { + "storage_type": "ram", + "max_retained_messages": 0, + "max_payload_size": "1MB", + "expiry_interval": 0 + }, + "enabled": true, + "created_at": 1653360591111, + "description": "" + }, + { + "id": "module:55987aaa", + "type": "presence", + "config": { + "qos": 0 + }, + "enabled": true, + "created_at": 1653360591111, + "description": "" + }, + { + "id": "module:78cae4f9", + "type": "recon", + "config": {}, + "enabled": true, + "created_at": 1653360591111, + "description": "" + } + ], + "schemas": [], + "configs": [], + "listeners_state": [], + "date": "2022-05-24 10:51:39" +} diff --git a/bin/emqx b/bin/emqx index 4232d8162..2394484b1 100755 --- a/bin/emqx +++ b/bin/emqx @@ -343,14 +343,12 @@ generate_config() { ## changing the config 'log.rotation.size' rm -rf "${RUNNER_LOG_DIR}"/*.siz - EMQX_LICENSE_CONF_OPTION="" - if [ "${EMQX_LICENSE_CONF:-}" != "" ]; then - EMQX_LICENSE_CONF_OPTION="-i ${EMQX_LICENSE_CONF}" - fi - set +e - # shellcheck disable=SC2086 - CUTTLEFISH_OUTPUT="$("$ERTS_PATH"/escript "$RUNNER_ROOT_DIR"/bin/cuttlefish -v -i "$REL_DIR"/emqx.schema $EMQX_LICENSE_CONF_OPTION -c "$RUNNER_ETC_DIR"/emqx.conf -d "$RUNNER_DATA_DIR"/configs generate)" + if [ "${EMQX_LICENSE_CONF:-}" = "" ]; then + CUTTLEFISH_OUTPUT="$("$ERTS_PATH"/escript "$RUNNER_ROOT_DIR"/bin/cuttlefish -v -i "$REL_DIR"/emqx.schema -c "$RUNNER_ETC_DIR"/emqx.conf -d "$RUNNER_DATA_DIR"/configs generate)" + else + CUTTLEFISH_OUTPUT="$("$ERTS_PATH"/escript "$RUNNER_ROOT_DIR"/bin/cuttlefish -v -i "$REL_DIR"/emqx.schema -i "${EMQX_LICENSE_CONF}" -c "$RUNNER_ETC_DIR"/emqx.conf -d "$RUNNER_DATA_DIR"/configs generate)" + fi # shellcheck disable=SC2181 RESULT=$? set -e diff --git a/bin/node_dump b/bin/node_dump index 83b41332b..79b469112 100755 --- a/bin/node_dump +++ b/bin/node_dump @@ -1,13 +1,13 @@ #!/bin/sh set -eu -ROOT_DIR="$(cd "$(dirname "$(readlink "$0" || echo "$0")")"/..; pwd -P)" -echo "Running node dump in ${ROOT_DIR}" +RUNNER_ROOT_DIR="$(cd "$(dirname "$(readlink "$0" || echo "$0")")"/..; pwd -P)" +echo "Running node dump in ${RUNNER_ROOT_DIR}" # shellcheck disable=SC1090 -. "$ROOT_DIR"/releases/emqx_vars +. "$RUNNER_ROOT_DIR"/releases/emqx_vars -cd "${ROOT_DIR}" +cd "${RUNNER_ROOT_DIR}" DUMP="$RUNNER_LOG_DIR/node_dump_$(date +"%Y%m%d_%H%M%S").tar.gz" CONF_DUMP="$RUNNER_LOG_DIR/conf.dump" diff --git a/include/emqx_release.hrl b/include/emqx_release.hrl index 15cb64b79..b5765abf3 100644 --- a/include/emqx_release.hrl +++ b/include/emqx_release.hrl @@ -29,7 +29,7 @@ -ifndef(EMQX_ENTERPRISE). --define(EMQX_RELEASE, {opensource, "4.4.4-rc.1"}). +-define(EMQX_RELEASE, {opensource, "4.4.4-rc.2"}). -else. diff --git a/rebar.config b/rebar.config index 7835a0e2f..e408a6dc4 100644 --- a/rebar.config +++ b/rebar.config @@ -58,8 +58,9 @@ , {observer_cli, "1.6.1"} % NOTE: depends on recon 2.5.1 , {getopt, "1.0.1"} , {snabbkaffe, {git, "https://github.com/kafka4beam/snabbkaffe.git", {tag, "0.15.0"}}} - , {lc, {git, "https://github.com/emqx/lc.git", {tag, "0.2.1"}}} + , {lc, {git, "https://github.com/emqx/lc.git", {tag, "0.3.1"}}} , {mongodb, {git,"https://github.com/emqx/mongodb-erlang", {tag, "v3.0.13"}}} + , {epgsql, {git, "https://github.com/emqx/epgsql.git", {tag, "4.6.0"}}} ]}. {xref_ignores, diff --git a/scripts/update_appup.escript b/scripts/update_appup.escript index 840f63509..dd8dd9ca2 100755 --- a/scripts/update_appup.escript +++ b/scripts/update_appup.escript @@ -168,8 +168,8 @@ find_appup_actions(App, OldDowngrade = ensure_all_patch_versions(App, CurrVersion, OldDowngrade0), UpDiff = diff_app(up, App, CurrAppIdx, PrevAppIdx), DownDiff = diff_app(down, App, PrevAppIdx, CurrAppIdx), - Upgrade = merge_update_actions(App, UpDiff, OldUpgrade), - Downgrade = merge_update_actions(App, DownDiff, OldDowngrade), + Upgrade = merge_update_actions(App, UpDiff, OldUpgrade, PrevVersion), + Downgrade = merge_update_actions(App, DownDiff, OldDowngrade, PrevVersion), case OldUpgrade =:= Upgrade andalso OldDowngrade =:= Downgrade of true -> []; false -> [{App, {Upgrade, Downgrade, OldUpgrade, OldDowngrade}}] @@ -258,14 +258,40 @@ find_base_appup_actions(App, PrevVersion) -> end, {ensure_version(PrevVersion, Upgrade), ensure_version(PrevVersion, Downgrade)}. -merge_update_actions(App, Changes, Vsns) -> +merge_update_actions(App, Changes, Vsns, PrevVersion) -> lists:map(fun(Ret = {<<".*">>, _}) -> Ret; ({Vsn, Actions}) -> - {Vsn, do_merge_update_actions(App, Changes, Actions)} + case is_skipped_version(App, Vsn, PrevVersion) of + true -> + log("WARN: ~p has version ~s skipped over?~n", [App, Vsn]), + {Vsn, Actions}; + false -> + {Vsn, do_merge_update_actions(App, Changes, Actions)} + end end, Vsns). +%% say current version is 1.1.3, and the compare base is version 1.1.1, +%% but there is a 1.1.2 in appup we may skip merging instructions for +%% 1.1.2 because it's not used and no way to know what has been changed +is_skipped_version(App, Vsn, PrevVersion) when is_list(Vsn) andalso is_list(PrevVersion) -> + case is_app_external(App) andalso parse_version_number(Vsn) of + {ok, VsnTuple} -> + case parse_version_number(PrevVersion) of + {ok, PrevVsnTuple} -> + VsnTuple > PrevVsnTuple; + _ -> + false + end; + _ -> + false + end; +is_skipped_version(_App, _Vsn, _PrevVersion) -> + %% if app version is a regexp, we don't know for sure + %% return 'false' to be on the safe side + false. + do_merge_update_actions(App, {New0, Changed0, Deleted0}, OldActions) -> AppSpecific = app_specific_actions(App) -- OldActions, AlreadyHandled = lists:flatten(lists:map(fun process_old_action/1, OldActions)), diff --git a/src/emqx.appup.src b/src/emqx.appup.src index 147b37903..6b15d8978 100644 --- a/src/emqx.appup.src +++ b/src/emqx.appup.src @@ -8,6 +8,9 @@ {load_module,emqx_frame,brutal_purge,soft_purge,[]}, {load_module,emqx_channel,brutal_purge,soft_purge,[]}, {load_module,emqx_access_rule,brutal_purge,soft_purge,[]}, + {load_module,emqx_alarm_handler,brutal_purge,soft_purge,[]}, + {load_module,emqx_alarm,brutal_purge,soft_purge,[]}, + {update,emqx_os_mon,{advanced,[]}}, {load_module,emqx_metrics,brutal_purge,soft_purge,[]}, {apply,{emqx_metrics,assign_auth_stats_from_ets_to_counter,[]}}, {load_module,emqx_access_control,brutal_purge,soft_purge,[]}, @@ -17,6 +20,9 @@ [{load_module,emqx_session,brutal_purge,soft_purge,[]}, {load_module,emqx_misc,brutal_purge,soft_purge,[]}, {load_module,emqx_access_rule,brutal_purge,soft_purge,[]}, + {load_module,emqx_alarm_handler,brutal_purge,soft_purge,[]}, + {load_module,emqx_alarm,brutal_purge,soft_purge,[]}, + {update,emqx_os_mon,{advanced,[]}}, {load_module,emqx_metrics,brutal_purge,soft_purge,[]}, {apply,{emqx_metrics,assign_auth_stats_from_ets_to_counter,[]}}, {load_module,emqx_access_control,brutal_purge,soft_purge,[]}, @@ -31,6 +37,9 @@ {load_module,emqx_relup}]}, {"4.4.1", [{load_module,emqx_access_rule,brutal_purge,soft_purge,[]}, + {load_module,emqx_alarm_handler,brutal_purge,soft_purge,[]}, + {load_module,emqx_alarm,brutal_purge,soft_purge,[]}, + {update,emqx_os_mon,{advanced,[]}}, {load_module,emqx_metrics,brutal_purge,soft_purge,[]}, {apply,{emqx_metrics,assign_auth_stats_from_ets_to_counter,[]}}, {load_module,emqx,brutal_purge,soft_purge,[]}, @@ -50,13 +59,15 @@ {load_module,emqx_app,brutal_purge,soft_purge,[]}, {load_module,emqx_banned,brutal_purge,soft_purge,[]}, {load_module,emqx_access_control,brutal_purge,soft_purge,[]}, - {load_module,emqx_os_mon,brutal_purge,soft_purge,[]}, {load_module,emqx_sys_mon,brutal_purge,soft_purge,[]}, {load_module,emqx_misc,brutal_purge,soft_purge,[]}, {load_module,emqx_connection,brutal_purge,soft_purge,[]}, {add_module,emqx_relup}]}, {"4.4.0", [{load_module,emqx_access_rule,brutal_purge,soft_purge,[]}, + {load_module,emqx_alarm_handler,brutal_purge,soft_purge,[]}, + {load_module,emqx_alarm,brutal_purge,soft_purge,[]}, + {update,emqx_os_mon,{advanced,[]}}, {load_module,emqx_hooks,brutal_purge,soft_purge,[]}, {load_module,emqx_listeners,brutal_purge,soft_purge,[]}, {load_module,emqx_cm,brutal_purge,soft_purge,[]}, @@ -79,9 +90,7 @@ {apply,{emqx_metrics,assign_auth_stats_from_ets_to_counter,[]}}, {apply,{emqx_metrics,assign_acl_stats_from_ets_to_counter,[]}}, {load_module,emqx_access_control,brutal_purge,soft_purge,[]}, - {load_module,emqx_alarm,brutal_purge,soft_purge,[]}, {load_module,emqx_session,brutal_purge,soft_purge,[]}, - {load_module,emqx_os_mon,brutal_purge,soft_purge,[]}, {load_module,emqx,brutal_purge,soft_purge,[]}, {load_module,emqx_app,brutal_purge,soft_purge,[]}, {load_module,emqx_message,brutal_purge,soft_purge,[]}, @@ -94,6 +103,9 @@ {load_module,emqx_frame,brutal_purge,soft_purge,[]}, {load_module,emqx_channel,brutal_purge,soft_purge,[]}, {load_module,emqx_access_rule,brutal_purge,soft_purge,[]}, + {load_module,emqx_alarm_handler,brutal_purge,soft_purge,[]}, + {load_module,emqx_alarm,brutal_purge,soft_purge,[]}, + {update,emqx_os_mon,{advanced,[]}}, {load_module,emqx_metrics,brutal_purge,soft_purge,[]}, {load_module,emqx_access_control,brutal_purge,soft_purge,[]}, {load_module,emqx_app,brutal_purge,soft_purge,[]}, @@ -104,6 +116,9 @@ {load_module,emqx_access_rule,brutal_purge,soft_purge,[]}, {load_module,emqx_metrics,brutal_purge,soft_purge,[]}, {load_module,emqx_access_control,brutal_purge,soft_purge,[]}, + {load_module,emqx_alarm_handler,brutal_purge,soft_purge,[]}, + {load_module,emqx_alarm,brutal_purge,soft_purge,[]}, + {update,emqx_os_mon,{advanced,[]}}, {load_module,emqx_plugins,brutal_purge,soft_purge,[]}, {load_module,emqx_frame,brutal_purge,soft_purge,[]}, {load_module,emqx_channel,brutal_purge,soft_purge,[]}, @@ -115,6 +130,9 @@ {load_module,emqx_relup}]}, {"4.4.1", [{load_module,emqx_access_rule,brutal_purge,soft_purge,[]}, + {load_module,emqx_alarm_handler,brutal_purge,soft_purge,[]}, + {load_module,emqx_alarm,brutal_purge,soft_purge,[]}, + {update,emqx_os_mon,{advanced,[]}}, {load_module,emqx_metrics,brutal_purge,soft_purge,[]}, {load_module,emqx,brutal_purge,soft_purge,[]}, {load_module,emqx_hooks,brutal_purge,soft_purge,[]}, @@ -133,13 +151,15 @@ {load_module,emqx_app,brutal_purge,soft_purge,[]}, {load_module,emqx_banned,brutal_purge,soft_purge,[]}, {load_module,emqx_access_control,brutal_purge,soft_purge,[]}, - {load_module,emqx_os_mon,brutal_purge,soft_purge,[]}, {load_module,emqx_sys_mon,brutal_purge,soft_purge,[]}, {load_module,emqx_misc,brutal_purge,soft_purge,[]}, {load_module,emqx_connection,brutal_purge,soft_purge,[]}, {delete_module,emqx_relup}]}, {"4.4.0", [{load_module,emqx_access_rule,brutal_purge,soft_purge,[]}, + {load_module,emqx_alarm_handler,brutal_purge,soft_purge,[]}, + {load_module,emqx_alarm,brutal_purge,soft_purge,[]}, + {update,emqx_os_mon,{advanced,[]}}, {load_module,emqx_hooks,brutal_purge,soft_purge,[]}, {load_module,emqx_listeners,brutal_purge,soft_purge,[]}, {load_module,emqx_cm,brutal_purge,soft_purge,[]}, @@ -159,9 +179,7 @@ {load_module,emqx_ws_connection,brutal_purge,soft_purge,[]}, {load_module,emqx_metrics,brutal_purge,soft_purge,[]}, {load_module,emqx_access_control,brutal_purge,soft_purge,[]}, - {load_module,emqx_alarm,brutal_purge,soft_purge,[]}, {load_module,emqx_session,brutal_purge,soft_purge,[]}, - {load_module,emqx_os_mon,brutal_purge,soft_purge,[]}, {load_module,emqx,brutal_purge,soft_purge,[]}, {load_module,emqx_app,brutal_purge,soft_purge,[]}, {load_module,emqx_message,brutal_purge,soft_purge,[]}, diff --git a/src/emqx_alarm.erl b/src/emqx_alarm.erl index 6573ec1b5..e3b45e295 100644 --- a/src/emqx_alarm.erl +++ b/src/emqx_alarm.erl @@ -39,6 +39,8 @@ , deactivate/1 , deactivate/2 , delete_all_deactivated_alarms/0 + , ensure_deactivated/1 + , ensure_deactivated/2 , get_alarms/0 , get_alarms/1 ]). @@ -132,6 +134,24 @@ activate(Name) -> activate(Name, Details) -> gen_server:call(?MODULE, {activate_alarm, Name, Details}). +-spec ensure_deactivated(binary() | atom()) -> ok. +ensure_deactivated(Name) -> + ensure_deactivated(Name, no_details). + +-spec ensure_deactivated(binary() | atom(), atom() | map()) -> ok. +ensure_deactivated(Name, Data) -> + %% this duplicates the dirty read in handle_call, + %% intention is to avoid making gen_server calls when there is no alarm + case mnesia:dirty_read(?ACTIVATED_ALARM, Name) of + [] -> + ok; + _ -> + case deactivate(Name, Data) of + {error, not_found} -> ok; + Other -> Other + end + end. + deactivate(Name) -> gen_server:call(?MODULE, {deactivate_alarm, Name, no_details}). diff --git a/src/emqx_alarm_handler.erl b/src/emqx_alarm_handler.erl index b389d4175..1520a231b 100644 --- a/src/emqx_alarm_handler.erl +++ b/src/emqx_alarm_handler.erl @@ -56,21 +56,12 @@ init({_Args, {alarm_handler, _ExistingAlarms}}) -> init(_) -> {ok, []}. -handle_event({set_alarm, {system_memory_high_watermark, []}}, State) -> - emqx_alarm:activate(high_system_memory_usage, #{high_watermark => emqx_os_mon:get_sysmem_high_watermark()}), - {ok, State}; - -handle_event({set_alarm, {process_memory_high_watermark, Pid}}, State) -> +handle_event({set_alarm, {process_memory_high_watermark, Pid}}, State) -> emqx_alarm:activate(high_process_memory_usage, #{pid => list_to_binary(pid_to_list(Pid)), high_watermark => emqx_os_mon:get_procmem_high_watermark()}), {ok, State}; - -handle_event({clear_alarm, system_memory_high_watermark}, State) -> - emqx_alarm:deactivate(high_system_memory_usage), - {ok, State}; - -handle_event({clear_alarm, process_memory_high_watermark}, State) -> - emqx_alarm:deactivate(high_process_memory_usage), +handle_event({clear_alarm, process_memory_high_watermark}, State) -> + emqx_alarm:ensure_deactivate(high_process_memory_usage), {ok, State}; handle_event(_, State) -> diff --git a/src/emqx_os_mon.erl b/src/emqx_os_mon.erl index b23f3e65d..bbf4fd80e 100644 --- a/src/emqx_os_mon.erl +++ b/src/emqx_os_mon.erl @@ -78,32 +78,29 @@ set_cpu_low_watermark(Float) -> call({set_cpu_low_watermark, Float}). get_mem_check_interval() -> - memsup:get_check_interval() div 1000. + call(?FUNCTION_NAME). -set_mem_check_interval(Seconds) when Seconds < 60 -> - memsup:set_check_interval(1); set_mem_check_interval(Seconds) -> - memsup:set_check_interval(Seconds div 60). + call({?FUNCTION_NAME, Seconds}). get_sysmem_high_watermark() -> - memsup:get_sysmem_high_watermark(). + call(?FUNCTION_NAME). -set_sysmem_high_watermark(Float) -> - V = Float/100, +set_sysmem_high_watermark(HW) -> case load_ctl:get_config() of #{ ?MEM_MON_F0 := true } = OldLC -> ok = load_ctl:put_config(OldLC#{ ?MEM_MON_F0 => true - , ?MEM_MON_F1 => V}); + , ?MEM_MON_F1 => HW / 100}); _ -> skip end, - memsup:set_sysmem_high_watermark(V). + gen_server:call(?OS_MON, {?FUNCTION_NAME, HW}, infinity). get_procmem_high_watermark() -> memsup:get_procmem_high_watermark(). -set_procmem_high_watermark(Float) -> - memsup:set_procmem_high_watermark(Float / 100). +set_procmem_high_watermark(HW) -> + memsup:set_procmem_high_watermark(HW / 100). call(Req) -> gen_server:call(?OS_MON, Req, infinity). @@ -113,16 +110,38 @@ call(Req) -> %%-------------------------------------------------------------------- init([Opts]) -> - set_mem_check_interval(proplists:get_value(mem_check_interval, Opts)), - SysHW = proplists:get_value(sysmem_high_watermark, Opts), - set_sysmem_high_watermark(SysHW), + process_flag(trap_exit, true), + %% make sure memsup will not emit system memory alarms + memsup:set_sysmem_high_watermark(1), set_procmem_high_watermark(proplists:get_value(procmem_high_watermark, Opts)), - ensure_system_memory_alarm(SysHW), - {ok, ensure_check_timer(#{cpu_high_watermark => proplists:get_value(cpu_high_watermark, Opts), + MemCheckInterval = do_resolve_mem_check_interval(proplists:get_value(mem_check_interval, Opts)), + SysHW = proplists:get_value(sysmem_high_watermark, Opts), + St = ensure_check_timer(#{cpu_high_watermark => proplists:get_value(cpu_high_watermark, Opts), cpu_low_watermark => proplists:get_value(cpu_low_watermark, Opts), cpu_check_interval => proplists:get_value(cpu_check_interval, Opts), - timer => undefined})}. + sysmem_high_watermark => SysHW, + mem_check_interval => MemCheckInterval, + timer => undefined}), + ok = do_set_mem_check_interval(MemCheckInterval), + %% update immediately after start/restart + ok = update_mem_alarm_status(SysHW), + {ok, ensure_mem_check_timer(St)}. +handle_call(get_sysmem_high_watermark, _From, State) -> + #{sysmem_high_watermark := SysHW} = State, + {reply, maybe_round(SysHW), State}; +handle_call(get_mem_check_interval, _From, State) -> + #{mem_check_interval := Interval} = State, + {reply, Interval, State}; +handle_call({set_sysmem_high_watermark, SysHW}, _From, State) -> + %% update immediately after start/restart + ok = update_mem_alarm_status(SysHW), + {reply, ok, State#{sysmem_high_watermark => SysHW}}; +handle_call({set_mem_check_interval, Seconds0}, _From, State) -> + Seconds = do_resolve_mem_check_interval(Seconds0), + ok = do_set_mem_check_interval(Seconds), + %% will start taking effect when the current timer expires + {reply, ok, State#{mem_check_interval => Seconds}}; handle_call(get_cpu_check_interval, _From, State) -> {reply, maps:get(cpu_check_interval, State, undefined), State}; @@ -168,16 +187,28 @@ handle_info({timeout, Timer, check}, State = #{timer := Timer, ensure_check_timer(State) end, {noreply, NState}; - +handle_info({timeout, Timer, check_mem}, #{mem_check_timer := Timer, + sysmem_high_watermark := SysHW + } = State) -> + ok = update_mem_alarm_status(SysHW), + NState = ensure_mem_check_timer(State#{mem_check_timer := undefined}), + {noreply, NState}; handle_info(Info, State) -> ?LOG(error, "unexpected info: ~p", [Info]), {noreply, State}. -terminate(_Reason, #{timer := Timer}) -> +terminate(_Reason, #{timer := Timer} = St) -> + emqx_misc:cancel_timer(maps:get(mem_check_timer, St, undefined)), emqx_misc:cancel_timer(Timer). code_change(_OldVsn, State, _Extra) -> - {ok, State}. + %% NOTE: downgrade is not handled as the extra fields added to State + %% does not affect old version code. + %% The only thing which may slip through is that a started timer + %% will result in a "unexpected info" error log for the old version code + NewState = ensure_mem_check_timer(State), + SysHW = resolve_sysmem_high_watermark(State), + {ok, NewState#{sysmem_high_watermark => SysHW}}. %%-------------------------------------------------------------------- %% Internal functions @@ -189,19 +220,99 @@ ensure_check_timer(State = #{cpu_check_interval := Interval}) -> _ -> State#{timer := emqx_misc:start_timer(timer:seconds(Interval), check)} end. -%% At startup, memsup starts first and checks for memory alarms, -%% but emqx_alarm_handler is not yet used instead of alarm_handler, -%% so alarm_handler is used directly for notification (normally emqx_alarm_handler should be used). -%%The internal memsup will no longer trigger events that have been alerted, -%% and there is no exported function to remove the alerted flag, -%% so it can only be checked again at startup. -ensure_system_memory_alarm(HW) -> - case erlang:whereis(memsup) of - undefined -> ok; - _Pid -> - {Total, Allocated, _Worst} = memsup:get_memory_data(), - case Total =/= 0 andalso Allocated/Total * 100 > HW of - true -> emqx_alarm:activate(high_system_memory_usage, #{high_watermark => HW}); - false -> ok - end +ensure_mem_check_timer(#{mem_check_timer := Ref} = State) when is_reference(Ref) -> + %% timer already started + State; +ensure_mem_check_timer(State) -> + Interval = resolve_mem_check_interval(State), + case is_sysmem_check_supported() of + true -> + State#{mem_check_timer => emqx_misc:start_timer(timer:seconds(Interval), check_mem), + mem_check_interval => Interval + }; + false -> + State#{mem_check_timer => undefined, + mem_check_interval => Interval + } + end. + +resolve_mem_check_interval(#{mem_check_interval := Seconds}) when is_integer(Seconds) -> + Seconds; +resolve_mem_check_interval(_) -> + %% this only happens when hot-upgrade from older version (< 4.3.14, or < 4.4.4) + try + %% memsup has interval set API using minutes, but returns in milliseconds from get API + IntervalMs = memsup:get_check_interval(), + true = (IntervalMs > 1000), + IntervalMs div 1000 + catch + _ : _ -> + %% this is the memsup default + 60 + end. + +is_sysmem_check_supported() -> + %% sorry Mac and Windows, for now + {unix, linux} =:= os:type(). + +%% we still need to set memsup interval for process (not system) memory check +do_set_mem_check_interval(Seconds) -> + Minutes = Seconds div 60, + _ = memsup:set_check_interval(Minutes), + ok. + +%% keep the time unit alignment with memsup, minmum interval is 60 seconds. +do_resolve_mem_check_interval(Seconds) -> + case is_integer(Seconds) andalso Seconds >= 60 of + true -> Seconds; + false -> 60 + end. + +resolve_sysmem_high_watermark(#{sysmem_high_watermark := SysHW}) -> SysHW; +resolve_sysmem_high_watermark(_) -> + %% sysmem_high_watermark is not found in state map + %% get it from memsup + memsup:get_sysmem_high_watermark(). + +update_mem_alarm_status(SysHW) -> + case is_sysmem_check_supported() of + true -> + do_update_mem_alarm_status(SysHW); + false -> + %% in case the old alarm is activated + ok = emqx_alarm:ensure_deactivated(high_system_memory_usage, #{reason => disabled}) + end. + +do_update_mem_alarm_status(SysHW) -> + Usage = current_sysmem_percent(), + case Usage > SysHW of + true -> + _ = emqx_alarm:activate( + high_system_memory_usage, + #{ + usage => Usage, + high_watermark => SysHW + } + ); + _ -> + ok = emqx_alarm:ensure_deactivated( + high_system_memory_usage, + #{ + usage => Usage, + high_watermark => SysHW + } + ) + end, + ok. + +current_sysmem_percent() -> + Ratio = load_ctl:get_memory_usage(), + erlang:floor(Ratio * 10000) / 100. + +maybe_round(X) when is_integer(X) -> X; +maybe_round(X) when is_float(X) -> + R = erlang:round(X), + case erlang:abs(X - R) > 1.0e-6 of + true -> X; + false -> R end. diff --git a/test/emqx_os_mon_SUITE.erl b/test/emqx_os_mon_SUITE.erl index 575c97225..d4b8a5d2a 100644 --- a/test/emqx_os_mon_SUITE.erl +++ b/test/emqx_os_mon_SUITE.erl @@ -43,14 +43,23 @@ end_per_suite(_Config) -> emqx_ct_helpers:stop_apps([]), application:stop(os_mon). -% t_set_mem_check_interval(_) -> -% error('TODO'). +t_set_mem_check_interval(_) -> + emqx_os_mon:set_mem_check_interval(0), + ?assertEqual(60, emqx_os_mon:get_mem_check_interval()), + emqx_os_mon:set_mem_check_interval(61), + ?assertEqual(61, emqx_os_mon:get_mem_check_interval()), + ok. -% t_set_sysmem_high_watermark(_) -> -% error('TODO'). - -% t_set_procmem_high_watermark(_) -> -% error('TODO'). +t_set_sysmem_high_watermark(_) -> + emqx_os_mon:set_sysmem_high_watermark(10), + ?assertEqual(10, emqx_os_mon:get_sysmem_high_watermark()), + emqx_os_mon:set_sysmem_high_watermark(100), + ?assertEqual(100, emqx_os_mon:get_sysmem_high_watermark()), + emqx_os_mon:set_sysmem_high_watermark(90), + ?assertEqual(90, emqx_os_mon:get_sysmem_high_watermark()), + emqx_os_mon:set_sysmem_high_watermark(93.2), + ?assertEqual(93.2, emqx_os_mon:get_sysmem_high_watermark()), + ok. t_api(_) -> ?assertEqual(1, emqx_os_mon:get_cpu_check_interval()),