From 1fcd06f4bc62506e0c93e1a0fad1f65f29da2e0c Mon Sep 17 00:00:00 2001 From: Dennis Zhuang Date: Fri, 22 Mar 2024 15:33:15 -0700 Subject: [PATCH 001/234] feat: update greptimedb client lib and ci version --- .ci/docker-compose-file/docker-compose-greptimedb.yaml | 2 +- apps/emqx_bridge_greptimedb/rebar.config | 2 +- mix.exs | 3 ++- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/.ci/docker-compose-file/docker-compose-greptimedb.yaml b/.ci/docker-compose-file/docker-compose-greptimedb.yaml index 6813b4983..b564abbf9 100644 --- a/.ci/docker-compose-file/docker-compose-greptimedb.yaml +++ b/.ci/docker-compose-file/docker-compose-greptimedb.yaml @@ -4,7 +4,7 @@ services: greptimedb: container_name: greptimedb hostname: greptimedb - image: greptime/greptimedb:v0.4.4 + image: greptime/greptimedb:v0.7.1 expose: - "4000" - "4001" diff --git a/apps/emqx_bridge_greptimedb/rebar.config b/apps/emqx_bridge_greptimedb/rebar.config index bb37de16e..c1039e6f4 100644 --- a/apps/emqx_bridge_greptimedb/rebar.config +++ b/apps/emqx_bridge_greptimedb/rebar.config @@ -6,7 +6,7 @@ {emqx_connector, {path, "../../apps/emqx_connector"}}, {emqx_resource, {path, "../../apps/emqx_resource"}}, {emqx_bridge, {path, "../../apps/emqx_bridge"}}, - {greptimedb, {git, "https://github.com/GreptimeTeam/greptimedb-client-erl", {tag, "v0.1.7"}}} + {greptimedb, {git, "https://github.com/GreptimeTeam/greptimedb-ingester-erl", {tag, "v0.1.8"}}} ]}. {plugins, [rebar3_path_deps]}. {project_plugins, [erlfmt]}. diff --git a/mix.exs b/mix.exs index 251a1c5f4..93f38d8c3 100644 --- a/mix.exs +++ b/mix.exs @@ -212,7 +212,8 @@ defmodule EMQXUmbrella.MixProject do {:crc32cer, "0.1.8", override: true}, {:supervisor3, "1.1.12", override: true}, {:opentsdb, github: "emqx/opentsdb-client-erl", tag: "v0.5.1", override: true}, - {:greptimedb, github: "GreptimeTeam/greptimedb-client-erl", tag: "v0.1.7", override: true}, + {:greptimedb, + github: "GreptimeTeam/greptimedb-ingester-erl", tag: "v0.1.8", override: true}, # The following two are dependencies of rabbit_common. They are needed here to # make mix not complain about conflicting versions {:thoas, github: "emqx/thoas", tag: "v1.0.0", override: true}, From ef0fee52f450c8c4d3bc55d3ef5f015b89acd5dc Mon Sep 17 00:00:00 2001 From: Dennis Zhuang Date: Fri, 22 Mar 2024 17:09:23 -0700 Subject: [PATCH 002/234] fix: query_by_clientid with v0.7 sql api --- .../test/emqx_bridge_greptimedb_SUITE.erl | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/apps/emqx_bridge_greptimedb/test/emqx_bridge_greptimedb_SUITE.erl b/apps/emqx_bridge_greptimedb/test/emqx_bridge_greptimedb_SUITE.erl index 6e7a23637..cabea247b 100644 --- a/apps/emqx_bridge_greptimedb/test/emqx_bridge_greptimedb_SUITE.erl +++ b/apps/emqx_bridge_greptimedb/test/emqx_bridge_greptimedb_SUITE.erl @@ -324,7 +324,7 @@ query_by_clientid(Topic, ClientId, Config) -> {"Content-Type", "application/x-www-form-urlencoded"} ], Body = <<"sql=select * from \"", Topic/binary, "\" where clientid='", ClientId/binary, "'">>, - {ok, 200, _Headers, RawBody0} = + {ok, _, _Headers, RawBody0} = ehttpc:request( EHttpcPoolName, post, @@ -335,7 +335,6 @@ query_by_clientid(Topic, ClientId, Config) -> case emqx_utils_json:decode(RawBody0, [return_maps]) of #{ - <<"code">> := 0, <<"output">> := [ #{ <<"records">> := #{ From 64eeeb749483f6bc999a2104b04183bd8c56e491 Mon Sep 17 00:00:00 2001 From: Dennis Zhuang Date: Mon, 25 Mar 2024 10:05:39 -0700 Subject: [PATCH 003/234] chore: verify status code --- .../test/emqx_bridge_greptimedb_SUITE.erl | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/apps/emqx_bridge_greptimedb/test/emqx_bridge_greptimedb_SUITE.erl b/apps/emqx_bridge_greptimedb/test/emqx_bridge_greptimedb_SUITE.erl index cabea247b..0fd839b7c 100644 --- a/apps/emqx_bridge_greptimedb/test/emqx_bridge_greptimedb_SUITE.erl +++ b/apps/emqx_bridge_greptimedb/test/emqx_bridge_greptimedb_SUITE.erl @@ -324,7 +324,7 @@ query_by_clientid(Topic, ClientId, Config) -> {"Content-Type", "application/x-www-form-urlencoded"} ], Body = <<"sql=select * from \"", Topic/binary, "\" where clientid='", ClientId/binary, "'">>, - {ok, _, _Headers, RawBody0} = + {ok, StatusCode, _Headers, RawBody0} = ehttpc:request( EHttpcPoolName, post, @@ -343,12 +343,12 @@ query_by_clientid(Topic, ClientId, Config) -> } } ] - } -> + } when StatusCode >= 200 andalso StatusCode =< 300 -> make_row(Schema, Rows); #{ <<"code">> := Code, <<"error">> := Error - } -> + } when StatusCode > 300 -> GreptimedbName = ?config(greptimedb_name, Config), Type = greptimedb_type_bin(?config(greptimedb_type, Config)), BridgeId = emqx_bridge_resource:bridge_id(Type, GreptimedbName), @@ -366,7 +366,9 @@ query_by_clientid(Topic, ClientId, Config) -> _ -> %% Table not found #{} - end + end; + Error -> + {error, Error} end. make_row(null, _Rows) -> From 1d1f595e6f402450d5092d27e58572d30242692f Mon Sep 17 00:00:00 2001 From: Serge Tupchii Date: Thu, 21 Mar 2024 17:30:45 +0200 Subject: [PATCH 004/234] fix(emqx_mgmt_data_backup): remove an uploaded backup file if it's not valid --- apps/emqx_management/src/emqx_management.app.src | 2 +- apps/emqx_management/src/emqx_mgmt_data_backup.erl | 2 ++ .../test/emqx_mgmt_api_data_backup_SUITE.erl | 6 +++++- changes/ce/fix-12759.en.md | 1 + 4 files changed, 9 insertions(+), 2 deletions(-) create mode 100644 changes/ce/fix-12759.en.md diff --git a/apps/emqx_management/src/emqx_management.app.src b/apps/emqx_management/src/emqx_management.app.src index bd596ffd4..bc2425a55 100644 --- a/apps/emqx_management/src/emqx_management.app.src +++ b/apps/emqx_management/src/emqx_management.app.src @@ -2,7 +2,7 @@ {application, emqx_management, [ {description, "EMQX Management API and CLI"}, % strict semver, bump manually! - {vsn, "5.1.0"}, + {vsn, "5.1.1"}, {modules, []}, {registered, [emqx_management_sup]}, {applications, [ diff --git a/apps/emqx_management/src/emqx_mgmt_data_backup.erl b/apps/emqx_management/src/emqx_mgmt_data_backup.erl index d88a4d998..2aaa014a8 100644 --- a/apps/emqx_management/src/emqx_mgmt_data_backup.erl +++ b/apps/emqx_management/src/emqx_mgmt_data_backup.erl @@ -315,8 +315,10 @@ do_upload(BackupFileNameStr, BackupFileContent) -> catch error:{badmatch, {error, Reason}}:Stack -> ?SLOG(error, #{msg => "emqx_data_upload_failed", reason => Reason, stacktrace => Stack}), + _ = file:delete(FilePath), {error, Reason}; Class:Reason:Stack -> + _ = file:delete(FilePath), ?SLOG(error, #{ msg => "emqx_data_upload_failed", exception => Class, diff --git a/apps/emqx_management/test/emqx_mgmt_api_data_backup_SUITE.erl b/apps/emqx_management/test/emqx_mgmt_api_data_backup_SUITE.erl index e94de971d..6a580fd57 100644 --- a/apps/emqx_management/test/emqx_mgmt_api_data_backup_SUITE.erl +++ b/apps/emqx_management/test/emqx_mgmt_api_data_backup_SUITE.erl @@ -199,7 +199,11 @@ upload_backup_test(Config, BackupName) -> ?assertEqual(ok, upload_backup(?NODE3_PORT, Auth, UploadFile)), %% This file was specially forged to pass upload validation bat fail on import ?assertEqual(ok, upload_backup(?NODE2_PORT, Auth, BadImportFile)), - ?assertEqual({error, bad_request}, upload_backup(?NODE1_PORT, Auth, BadUploadFile)). + ?assertEqual({error, bad_request}, upload_backup(?NODE1_PORT, Auth, BadUploadFile)), + %% Invalid file must not be kept + ?assertMatch( + {error, {_, 404, _}}, backup_file_op(get, ?NODE1_PORT, Auth, ?BAD_UPLOAD_BACKUP, []) + ). import_backup_test(Config, BackupName) -> Auth = ?config(auth, Config), diff --git a/changes/ce/fix-12759.en.md b/changes/ce/fix-12759.en.md new file mode 100644 index 000000000..2906bd17e --- /dev/null +++ b/changes/ce/fix-12759.en.md @@ -0,0 +1 @@ +Do not save invalid uploaded backup files. From 906a77d167617509c9509542bf2300f511d628bd Mon Sep 17 00:00:00 2001 From: Serge Tupchii Date: Fri, 22 Mar 2024 12:12:37 +0200 Subject: [PATCH 005/234] chore: rename `message_queue_too_long` error reason to `mailbox_overflow` `mailbox_overflow` is consistent with the corresponding config parameter: 'force_shutdown.max_mailbox_size' --- apps/emqx_utils/src/emqx_utils.app.src | 2 +- apps/emqx_utils/src/emqx_utils.erl | 2 +- apps/emqx_utils/test/emqx_utils_SUITE.erl | 2 +- changes/ce/fix-12766.en.md | 3 +++ 4 files changed, 6 insertions(+), 3 deletions(-) create mode 100644 changes/ce/fix-12766.en.md diff --git a/apps/emqx_utils/src/emqx_utils.app.src b/apps/emqx_utils/src/emqx_utils.app.src index 9e2f77d71..3dffe8cec 100644 --- a/apps/emqx_utils/src/emqx_utils.app.src +++ b/apps/emqx_utils/src/emqx_utils.app.src @@ -2,7 +2,7 @@ {application, emqx_utils, [ {description, "Miscellaneous utilities for EMQX apps"}, % strict semver, bump manually! - {vsn, "5.1.0"}, + {vsn, "5.1.1"}, {modules, [ emqx_utils, emqx_utils_api, diff --git a/apps/emqx_utils/src/emqx_utils.erl b/apps/emqx_utils/src/emqx_utils.erl index 0be489696..aa6b7d2cf 100644 --- a/apps/emqx_utils/src/emqx_utils.erl +++ b/apps/emqx_utils/src/emqx_utils.erl @@ -261,7 +261,7 @@ check_oom(Pid, #{ ok; [{message_queue_len, QLen}, {total_heap_size, HeapSize}] -> do_check_oom([ - {QLen, MaxQLen, message_queue_too_long}, + {QLen, MaxQLen, mailbox_overflow}, {HeapSize, MaxHeapSize, proc_heap_too_large} ]) end. diff --git a/apps/emqx_utils/test/emqx_utils_SUITE.erl b/apps/emqx_utils/test/emqx_utils_SUITE.erl index c3a8cc17f..acb2623de 100644 --- a/apps/emqx_utils/test/emqx_utils_SUITE.erl +++ b/apps/emqx_utils/test/emqx_utils_SUITE.erl @@ -150,7 +150,7 @@ t_check(_) -> ?assertEqual(ok, emqx_utils:check_oom(Policy)), [self() ! {msg, I} || I <- lists:seq(1, 6)], ?assertEqual( - {shutdown, #{reason => message_queue_too_long, value => 11, max => 10}}, + {shutdown, #{reason => mailbox_overflow, value => 11, max => 10}}, emqx_utils:check_oom(Policy) ). diff --git a/changes/ce/fix-12766.en.md b/changes/ce/fix-12766.en.md new file mode 100644 index 000000000..51ace3faf --- /dev/null +++ b/changes/ce/fix-12766.en.md @@ -0,0 +1,3 @@ +Rename `message_queue_too_long` error reason to `mailbox_overflow` + +`mailbox_overflow` is consistent with the corresponding config parameter: `force_shutdown.max_mailbox_size`. From 04bf763890bc0591fc340a13ef01ba1cda31ce12 Mon Sep 17 00:00:00 2001 From: Thales Macedo Garitezi Date: Thu, 21 Mar 2024 11:49:00 -0300 Subject: [PATCH 006/234] fix(kafka-based bridges): avoid trying to get raw config for replayq dir Fixes https://emqx.atlassian.net/browse/EMQX-12049 --- apps/emqx_bridge_kafka/src/emqx_bridge_kafka.app.src | 2 +- .../src/emqx_bridge_kafka_impl_producer.erl | 11 +++++++++-- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/apps/emqx_bridge_kafka/src/emqx_bridge_kafka.app.src b/apps/emqx_bridge_kafka/src/emqx_bridge_kafka.app.src index 74ba58217..b3025113a 100644 --- a/apps/emqx_bridge_kafka/src/emqx_bridge_kafka.app.src +++ b/apps/emqx_bridge_kafka/src/emqx_bridge_kafka.app.src @@ -1,7 +1,7 @@ %% -*- mode: erlang -*- {application, emqx_bridge_kafka, [ {description, "EMQX Enterprise Kafka Bridge"}, - {vsn, "0.2.2"}, + {vsn, "0.2.3"}, {registered, [emqx_bridge_kafka_consumer_sup]}, {applications, [ kernel, diff --git a/apps/emqx_bridge_kafka/src/emqx_bridge_kafka_impl_producer.erl b/apps/emqx_bridge_kafka/src/emqx_bridge_kafka_impl_producer.erl index 20241fdcd..6bb1690ff 100644 --- a/apps/emqx_bridge_kafka/src/emqx_bridge_kafka_impl_producer.erl +++ b/apps/emqx_bridge_kafka/src/emqx_bridge_kafka_impl_producer.erl @@ -668,9 +668,8 @@ partitioner(random) -> random; partitioner(key_dispatch) -> first_key_dispatch. replayq_dir(BridgeType, BridgeName) -> - RawConf = emqx_conf:get_raw([actions, BridgeType, BridgeName]), DirName = iolist_to_binary([ - emqx_bridge_lib:downgrade_type(BridgeType, RawConf), + maybe_v1_type_name(BridgeType), ":", BridgeName, ":", @@ -678,6 +677,14 @@ replayq_dir(BridgeType, BridgeName) -> ]), filename:join([emqx:data_dir(), "kafka", DirName]). +%% To avoid losing queued data on disk, we must use the same directory as the old v1 +%% bridges, if any. Among the Kafka-based bridges that exist since v1, only Kafka changed +%% its type name. Other bridges are either unchanged, or v2-only, and should use their v2 +%% type names. +maybe_v1_type_name(Type) when is_atom(Type) -> maybe_v1_type_name(atom_to_binary(Type)); +maybe_v1_type_name(<<"kafka_producer">>) -> <<"kafka">>; +maybe_v1_type_name(Type) -> Type. + with_log_at_error(Fun, Log) -> try Fun() From 7982dd017b67eb9000206cedc0c40bc40c38af23 Mon Sep 17 00:00:00 2001 From: zmstone Date: Sat, 23 Mar 2024 10:29:05 +0100 Subject: [PATCH 007/234] chore: upgrade http client libs gun-1.3.11 and ehttpc-0.4.13 --- changes/ce/fix-12773.en.md | 8 ++++++++ mix.exs | 4 ++-- rebar.config | 4 ++-- 3 files changed, 12 insertions(+), 4 deletions(-) create mode 100644 changes/ce/fix-12773.en.md diff --git a/changes/ce/fix-12773.en.md b/changes/ce/fix-12773.en.md new file mode 100644 index 000000000..c4a1f80aa --- /dev/null +++ b/changes/ce/fix-12773.en.md @@ -0,0 +1,8 @@ +Upgrade HTTP client libraries. + +The HTTP client library (`gun-1.3`) incorrectly appends a `:portnumber` suffix to the `Host` header for +standard ports (`http` on port 80, `https` on port 443). This could cause compatibility issues with servers or +gateways performing strict `Host` header checks (e.g., AWS Lambda, Alibaba Cloud HTTP gateways), leading to +errors such as `InvalidCustomDomain.NotFound` or "The specified CustomDomain does not exist." + + diff --git a/mix.exs b/mix.exs index 09f5f0692..74c33f6b3 100644 --- a/mix.exs +++ b/mix.exs @@ -49,7 +49,7 @@ defmodule EMQXUmbrella.MixProject do {:redbug, github: "emqx/redbug", tag: "2.0.10"}, {:covertool, github: "zmstone/covertool", tag: "2.0.4.1", override: true}, {:typerefl, github: "ieQu1/typerefl", tag: "0.9.1", override: true}, - {:ehttpc, github: "emqx/ehttpc", tag: "0.4.12", override: true}, + {:ehttpc, github: "emqx/ehttpc", tag: "0.4.13", override: true}, {:gproc, github: "emqx/gproc", tag: "0.9.0.1", override: true}, {:jiffy, github: "emqx/jiffy", tag: "1.0.6", override: true}, {:cowboy, github: "emqx/cowboy", tag: "2.9.2", override: true}, @@ -77,7 +77,7 @@ defmodule EMQXUmbrella.MixProject do {:esasl, github: "emqx/esasl", tag: "0.2.0"}, {:jose, github: "potatosalad/erlang-jose", tag: "1.11.2"}, # in conflict by ehttpc and emqtt - {:gun, github: "emqx/gun", tag: "1.3.10", override: true}, + {:gun, github: "emqx/gun", tag: "1.3.11", override: true}, # in conflict by emqx_connector and system_monitor {:epgsql, github: "emqx/epgsql", tag: "4.7.1.1", override: true}, # in conflict by emqx and observer_cli diff --git a/rebar.config b/rebar.config index 238dca515..b4d52e867 100644 --- a/rebar.config +++ b/rebar.config @@ -76,8 +76,8 @@ {covertool, {git, "https://github.com/zmstone/covertool", {tag, "2.0.4.1"}}}, {gpb, "4.19.9"}, {typerefl, {git, "https://github.com/ieQu1/typerefl", {tag, "0.9.1"}}}, - {gun, {git, "https://github.com/emqx/gun", {tag, "1.3.10"}}}, - {ehttpc, {git, "https://github.com/emqx/ehttpc", {tag, "0.4.12"}}}, + {gun, {git, "https://github.com/emqx/gun", {tag, "1.3.11"}}}, + {ehttpc, {git, "https://github.com/emqx/ehttpc", {tag, "0.4.13"}}}, {gproc, {git, "https://github.com/emqx/gproc", {tag, "0.9.0.1"}}}, {jiffy, {git, "https://github.com/emqx/jiffy", {tag, "1.0.6"}}}, {cowboy, {git, "https://github.com/emqx/cowboy", {tag, "2.9.2"}}}, From 46d6ad8adc0c9e252b10919fc16e565c9a43fb13 Mon Sep 17 00:00:00 2001 From: Ilya Averyanov Date: Thu, 28 Mar 2024 12:19:36 +0300 Subject: [PATCH 008/234] chore(ui-tests): reduce selenium test flakyness --- scripts/ui-tests/dashboard_test.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/scripts/ui-tests/dashboard_test.py b/scripts/ui-tests/dashboard_test.py index 91a7264ec..79eb0640f 100644 --- a/scripts/ui-tests/dashboard_test.py +++ b/scripts/ui-tests/dashboard_test.py @@ -48,20 +48,21 @@ def ensure_current_url(driver, url): count += 1 time.sleep(1) -def wait_title(driver): - return WebDriverWait(driver, 10).until(lambda x: x.find_element("xpath", "//div[@id='app']//h1[@class='header-title']")) +def title(driver): + return driver.find_element("xpath", "//div[@id='app']//h1[@class='header-title']") + +def wait_title_text(driver, text): + return WebDriverWait(driver, 10).until(lambda x: title(x).text == text) def test_basic(driver, login, dashboard_url): driver.get(dashboard_url) - title = wait_title(driver) - assert "Cluster Overview" == title.text + wait_title_text(driver, "Cluster Overview") def test_log(driver, login, dashboard_url): dest_url = urljoin(dashboard_url, "/#/log") driver.get(dest_url) ensure_current_url(driver, dest_url) - title = wait_title(driver) - assert "Logging" == title.text + wait_title_text(driver, "Logging") label = driver.find_element(By.XPATH, "//div[@id='app']//form//label[contains(., 'Enable Log Handler')]") assert driver.find_elements(By.ID, label.get_attribute("for")) From cb3be25ff48683c9ff50276e4788fbbe09641561 Mon Sep 17 00:00:00 2001 From: Thales Macedo Garitezi Date: Thu, 28 Mar 2024 10:06:55 -0300 Subject: [PATCH 009/234] ci(fix): skip flow when there are no changes ``` Error when evaluating 'strategy' for job 'run_emqx_app_tests'. emqx/emqx/.github/workflows/run_emqx_app_tests.yaml@689347f60e5e2e5fc612aa0282c74c0eb26e8281 (Line: 65, Col: 9): Matrix must define at least one vector ``` --- .github/workflows/run_emqx_app_tests.yaml | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/.github/workflows/run_emqx_app_tests.yaml b/.github/workflows/run_emqx_app_tests.yaml index 67175a37c..cd0de1582 100644 --- a/.github/workflows/run_emqx_app_tests.yaml +++ b/.github/workflows/run_emqx_app_tests.yaml @@ -35,6 +35,7 @@ jobs: shell: bash outputs: matrix: ${{ steps.matrix.outputs.matrix }} + skip: ${{ steps.matrix.outputs.skip }} steps: - uses: actions/checkout@9bb56186c3b09b4f86b1c65136769dd318469633 # v4.1.2 with: @@ -49,12 +50,16 @@ jobs: changed_files="$(git diff --name-only ${BEFORE_REF} ${AFTER_REF} apps/emqx)" if [ "$changed_files" = '' ]; then echo "nothing changed in apps/emqx, ignored." - echo "matrix=[]" | tee -a $GITHUB_OUTPUT + echo 'matrix=[]' | tee -a $GITHUB_OUTPUT + echo 'skip=true' | tee -a $GITHUB_OUTPUT exit 0 + else + echo 'skip=false' | tee -a $GITHUB_OUTPUT + echo 'matrix=[{"type": "eunit_proper_and_static"},{"type": "1_3"},{"type": "2_3"},{"type": "3_3"}]' | tee -a $GITHUB_OUTPUT fi - echo 'matrix=[{"type": "eunit_proper_and_static"},{"type": "1_3"},{"type": "2_3"},{"type": "3_3"}]' | tee -a $GITHUB_OUTPUT run_emqx_app_tests: + if: needs.prepare_matrix.outputs.skip != 'true' needs: - prepare_matrix runs-on: ${{ endsWith(github.repository, '/emqx') && 'ubuntu-22.04' || fromJSON('["self-hosted","ephemeral","linux","x64"]') }} From 8fb4ef9fe36cb5022ddcc00f7502529896ef1ffa Mon Sep 17 00:00:00 2001 From: Thales Macedo Garitezi Date: Thu, 28 Mar 2024 10:53:44 -0300 Subject: [PATCH 010/234] test: fix flaky test --- apps/emqx/test/emqx_persistent_messages_SUITE.erl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/apps/emqx/test/emqx_persistent_messages_SUITE.erl b/apps/emqx/test/emqx_persistent_messages_SUITE.erl index 94bc58908..1a150d5b1 100644 --- a/apps/emqx/test/emqx_persistent_messages_SUITE.erl +++ b/apps/emqx/test/emqx_persistent_messages_SUITE.erl @@ -584,6 +584,8 @@ message(Topic, Payload, PublishedAt) -> id = emqx_guid:gen() }. +on_message_dropped(#message{flags = #{sys := true}}, _Context, _Res, _TestPid) -> + ok; on_message_dropped(Msg, Context, Res, TestPid) -> ErrCtx = #{msg => Msg, ctx => Context, res => Res}, ct:pal("this hook should not be called.\n ~p", [ErrCtx]), From fa66a640c32e35f0a173df491027486b648fb8f3 Mon Sep 17 00:00:00 2001 From: Andrew Mayorov Date: Tue, 26 Mar 2024 17:13:38 +0100 Subject: [PATCH 011/234] fix(dsrepl): handle RPC errors gracefully when storage is down --- .../src/emqx_ds_replication_layer.erl | 152 ++++++++++++------ .../src/emqx_ds_storage_layer.erl | 9 ++ 2 files changed, 108 insertions(+), 53 deletions(-) diff --git a/apps/emqx_durable_storage/src/emqx_ds_replication_layer.erl b/apps/emqx_durable_storage/src/emqx_ds_replication_layer.erl index 72f142b8f..f8c4980d0 100644 --- a/apps/emqx_durable_storage/src/emqx_ds_replication_layer.erl +++ b/apps/emqx_durable_storage/src/emqx_ds_replication_layer.erl @@ -181,12 +181,19 @@ list_generations_with_lifetimes(DB) -> Shards = list_shards(DB), lists:foldl( fun(Shard, GensAcc) -> + case ra_list_generations_with_lifetimes(DB, Shard) of + Gens = #{} -> + ok; + {error, _Class, _Reason} -> + %% TODO: log error + Gens = #{} + end, maps:fold( fun(GenId, Data, AccInner) -> AccInner#{{Shard, GenId} => Data} end, GensAcc, - ra_list_generations_with_lifetimes(DB, Shard) + Gens ) end, #{}, @@ -221,14 +228,13 @@ get_streams(DB, TopicFilter, StartTime) -> Shards = list_shards(DB), lists:flatmap( fun(Shard) -> - Streams = - try - ra_get_streams(DB, Shard, TopicFilter, StartTime) - catch - error:{erpc, _} -> - %% TODO: log? - [] - end, + case ra_get_streams(DB, Shard, TopicFilter, StartTime) of + Streams when is_list(Streams) -> + ok; + {error, _Class, _Reason} -> + %% TODO: log error + Streams = [] + end, lists:map( fun({RankY, StorageLayerStream}) -> RankX = Shard, @@ -262,14 +268,11 @@ get_delete_streams(DB, TopicFilter, StartTime) -> emqx_ds:make_iterator_result(iterator()). make_iterator(DB, Stream, TopicFilter, StartTime) -> ?stream_v2(Shard, StorageStream) = Stream, - try ra_make_iterator(DB, Shard, StorageStream, TopicFilter, StartTime) of + case ra_make_iterator(DB, Shard, StorageStream, TopicFilter, StartTime) of {ok, Iter} -> {ok, #{?tag => ?IT, ?shard => Shard, ?enc => Iter}}; Error = {error, _, _} -> Error - catch - error:RPCError = {erpc, _} -> - {error, recoverable, RPCError} end. -spec make_delete_iterator(emqx_ds:db(), delete_stream(), emqx_ds:topic_filter(), emqx_ds:time()) -> @@ -279,22 +282,19 @@ make_delete_iterator(DB, Stream, TopicFilter, StartTime) -> case ra_make_delete_iterator(DB, Shard, StorageStream, TopicFilter, StartTime) of {ok, Iter} -> {ok, #{?tag => ?DELETE_IT, ?shard => Shard, ?enc => Iter}}; - Err = {error, _} -> - Err + Error = {error, _, _} -> + Error end. -spec update_iterator(emqx_ds:db(), iterator(), emqx_ds:message_key()) -> emqx_ds:make_iterator_result(iterator()). update_iterator(DB, OldIter, DSKey) -> #{?tag := ?IT, ?shard := Shard, ?enc := StorageIter} = OldIter, - try ra_update_iterator(DB, Shard, StorageIter, DSKey) of + case ra_update_iterator(DB, Shard, StorageIter, DSKey) of {ok, Iter} -> {ok, #{?tag => ?IT, ?shard => Shard, ?enc => Iter}}; Error = {error, _, _} -> Error - catch - error:RPCError = {erpc, _} -> - {error, recoverable, RPCError} end. -spec next(emqx_ds:db(), iterator(), pos_integer()) -> emqx_ds:next_result(iterator()). @@ -312,12 +312,8 @@ next(DB, Iter0, BatchSize) -> {ok, StorageIter, Batch} -> Iter = Iter0#{?enc := StorageIter}, {ok, Iter, Batch}; - Ok = {ok, _} -> - Ok; - Error = {error, _, _} -> - Error; - RPCError = {badrpc, _} -> - {error, recoverable, RPCError} + Other -> + Other end. -spec delete_next(emqx_ds:db(), delete_iterator(), emqx_ds:delete_selector(), pos_integer()) -> @@ -354,6 +350,19 @@ foreach_shard(DB, Fun) -> %% Internal exports (RPC targets) %%================================================================================ +%% NOTE +%% Target node may still be in the process of starting up when RPCs arrive, it's +%% good to have them handled gracefully. +%% TODO +%% There's a possibility of race condition: storage may shut down right after we +%% ask for its status. +-define(IF_STORAGE_RUNNING(SHARDID, EXPR), + case emqx_ds_storage_layer:shard_info(SHARDID, status) of + running -> EXPR; + down -> {error, recoverable, storage_down} + end +). + -spec do_drop_db_v1(emqx_ds:db()) -> ok | {error, _}. do_drop_db_v1(DB) -> MyShards = emqx_ds_replication_layer_meta:my_shards(DB), @@ -386,11 +395,18 @@ do_get_streams_v1(_DB, _Shard, _TopicFilter, _StartTime) -> error(obsolete_api). -spec do_get_streams_v2( - emqx_ds:db(), emqx_ds_replication_layer:shard_id(), emqx_ds:topic_filter(), emqx_ds:time() + emqx_ds:db(), + emqx_ds_replication_layer:shard_id(), + emqx_ds:topic_filter(), + emqx_ds:time() ) -> - [{integer(), emqx_ds_storage_layer:stream()}]. + [{integer(), emqx_ds_storage_layer:stream()}] | emqx_ds:error(storage_down). do_get_streams_v2(DB, Shard, TopicFilter, StartTime) -> - emqx_ds_storage_layer:get_streams({DB, Shard}, TopicFilter, StartTime). + ShardId = {DB, Shard}, + ?IF_STORAGE_RUNNING( + ShardId, + emqx_ds_storage_layer:get_streams(ShardId, TopicFilter, StartTime) + ). -dialyzer({nowarn_function, do_make_iterator_v1/5}). -spec do_make_iterator_v1( @@ -413,7 +429,11 @@ do_make_iterator_v1(_DB, _Shard, _Stream, _TopicFilter, _StartTime) -> ) -> emqx_ds:make_iterator_result(emqx_ds_storage_layer:iterator()). do_make_iterator_v2(DB, Shard, Stream, TopicFilter, StartTime) -> - emqx_ds_storage_layer:make_iterator({DB, Shard}, Stream, TopicFilter, StartTime). + ShardId = {DB, Shard}, + ?IF_STORAGE_RUNNING( + ShardId, + emqx_ds_storage_layer:make_iterator(ShardId, Stream, TopicFilter, StartTime) + ). -spec do_make_delete_iterator_v4( emqx_ds:db(), @@ -434,9 +454,7 @@ do_make_delete_iterator_v4(DB, Shard, Stream, TopicFilter, StartTime) -> ) -> emqx_ds:make_iterator_result(emqx_ds_storage_layer:iterator()). do_update_iterator_v2(DB, Shard, OldIter, DSKey) -> - emqx_ds_storage_layer:update_iterator( - {DB, Shard}, OldIter, DSKey - ). + emqx_ds_storage_layer:update_iterator({DB, Shard}, OldIter, DSKey). -spec do_next_v1( emqx_ds:db(), @@ -446,7 +464,11 @@ do_update_iterator_v2(DB, Shard, OldIter, DSKey) -> ) -> emqx_ds:next_result(emqx_ds_storage_layer:iterator()). do_next_v1(DB, Shard, Iter, BatchSize) -> - emqx_ds_storage_layer:next({DB, Shard}, Iter, BatchSize). + ShardId = {DB, Shard}, + ?IF_STORAGE_RUNNING( + ShardId, + emqx_ds_storage_layer:next(ShardId, Iter, BatchSize) + ). -spec do_delete_next_v4( emqx_ds:db(), @@ -464,9 +486,14 @@ do_add_generation_v2(_DB) -> error(obsolete_api). -spec do_list_generations_with_lifetimes_v3(emqx_ds:db(), shard_id()) -> - #{emqx_ds:ds_specific_generation_rank() => emqx_ds:generation_info()}. -do_list_generations_with_lifetimes_v3(DB, ShardId) -> - emqx_ds_storage_layer:list_generations_with_lifetimes({DB, ShardId}). + #{emqx_ds:ds_specific_generation_rank() => emqx_ds:generation_info()} + | emqx_ds:error(storage_down). +do_list_generations_with_lifetimes_v3(DB, Shard) -> + ShardId = {DB, Shard}, + ?IF_STORAGE_RUNNING( + ShardId, + emqx_ds_storage_layer:list_generations_with_lifetimes(ShardId) + ). -spec do_drop_generation_v3(emqx_ds:db(), shard_id(), emqx_ds_storage_layer:gen_id()) -> ok | {error, _}. @@ -491,6 +518,15 @@ list_nodes() -> %% Too large for normal operation, need better backpressure mechanism. -define(RA_TIMEOUT, 60 * 1000). +-define(SAFERPC(EXPR), + try + EXPR + catch + error:RPCError = {erpc, _} -> + {error, recoverable, RPCError} + end +). + ra_store_batch(DB, Shard, Messages) -> Command = #{ ?tag => ?BATCH, @@ -544,28 +580,34 @@ ra_drop_generation(DB, Shard, GenId) -> ra_get_streams(DB, Shard, TopicFilter, Time) -> {_, Node} = emqx_ds_replication_layer_shard:server(DB, Shard, local_preferred), TimestampUs = timestamp_to_timeus(Time), - emqx_ds_proto_v4:get_streams(Node, DB, Shard, TopicFilter, TimestampUs). + ?SAFERPC(emqx_ds_proto_v4:get_streams(Node, DB, Shard, TopicFilter, TimestampUs)). ra_get_delete_streams(DB, Shard, TopicFilter, Time) -> {_Name, Node} = emqx_ds_replication_layer_shard:server(DB, Shard, local_preferred), - emqx_ds_proto_v4:get_delete_streams(Node, DB, Shard, TopicFilter, Time). + ?SAFERPC(emqx_ds_proto_v4:get_delete_streams(Node, DB, Shard, TopicFilter, Time)). ra_make_iterator(DB, Shard, Stream, TopicFilter, StartTime) -> {_, Node} = emqx_ds_replication_layer_shard:server(DB, Shard, local_preferred), - TimestampUs = timestamp_to_timeus(StartTime), - emqx_ds_proto_v4:make_iterator(Node, DB, Shard, Stream, TopicFilter, TimestampUs). + TimeUs = timestamp_to_timeus(StartTime), + ?SAFERPC(emqx_ds_proto_v4:make_iterator(Node, DB, Shard, Stream, TopicFilter, TimeUs)). ra_make_delete_iterator(DB, Shard, Stream, TopicFilter, StartTime) -> {_Name, Node} = emqx_ds_replication_layer_shard:server(DB, Shard, local_preferred), - emqx_ds_proto_v4:make_delete_iterator(Node, DB, Shard, Stream, TopicFilter, StartTime). + TimeUs = timestamp_to_timeus(StartTime), + ?SAFERPC(emqx_ds_proto_v4:make_delete_iterator(Node, DB, Shard, Stream, TopicFilter, TimeUs)). ra_update_iterator(DB, Shard, Iter, DSKey) -> {_Name, Node} = emqx_ds_replication_layer_shard:server(DB, Shard, local_preferred), - emqx_ds_proto_v4:update_iterator(Node, DB, Shard, Iter, DSKey). + ?SAFERPC(emqx_ds_proto_v4:update_iterator(Node, DB, Shard, Iter, DSKey)). ra_next(DB, Shard, Iter, BatchSize) -> {_Name, Node} = emqx_ds_replication_layer_shard:server(DB, Shard, local_preferred), - emqx_ds_proto_v4:next(Node, DB, Shard, Iter, BatchSize). + case emqx_ds_proto_v4:next(Node, DB, Shard, Iter, BatchSize) of + RPCError = {badrpc, _} -> + {error, recoverable, RPCError}; + Other -> + Other + end. ra_delete_next(DB, Shard, Iter, Selector, BatchSize) -> {_Name, Node} = emqx_ds_replication_layer_shard:server(DB, Shard, local_preferred), @@ -573,16 +615,20 @@ ra_delete_next(DB, Shard, Iter, Selector, BatchSize) -> ra_list_generations_with_lifetimes(DB, Shard) -> {_Name, Node} = emqx_ds_replication_layer_shard:server(DB, Shard, local_preferred), - Gens = emqx_ds_proto_v4:list_generations_with_lifetimes(Node, DB, Shard), - maps:map( - fun(_GenId, Data = #{since := Since, until := Until}) -> - Data#{ - since := timeus_to_timestamp(Since), - until := emqx_maybe:apply(fun timeus_to_timestamp/1, Until) - } - end, - Gens - ). + case ?SAFERPC(emqx_ds_proto_v4:list_generations_with_lifetimes(Node, DB, Shard)) of + Gens = #{} -> + maps:map( + fun(_GenId, Data = #{since := Since, until := Until}) -> + Data#{ + since := timeus_to_timestamp(Since), + until := emqx_maybe:apply(fun timeus_to_timestamp/1, Until) + } + end, + Gens + ); + Error -> + Error + end. ra_drop_shard(DB, Shard) -> ra:delete_cluster(emqx_ds_replication_layer_shard:shard_servers(DB, Shard), ?RA_TIMEOUT). diff --git a/apps/emqx_durable_storage/src/emqx_ds_storage_layer.erl b/apps/emqx_durable_storage/src/emqx_ds_storage_layer.erl index 69f5b8231..5319458e2 100644 --- a/apps/emqx_durable_storage/src/emqx_ds_storage_layer.erl +++ b/apps/emqx_durable_storage/src/emqx_ds_storage_layer.erl @@ -21,6 +21,7 @@ -export([ open_shard/2, drop_shard/1, + shard_info/2, store_batch/3, get_streams/3, get_delete_streams/3, @@ -436,6 +437,14 @@ list_generations_with_lifetimes(ShardId) -> drop_generation(ShardId, GenId) -> gen_server:call(?REF(ShardId), #call_drop_generation{gen_id = GenId}, infinity). +-spec shard_info(shard_id(), status) -> running | down. +shard_info(ShardId, status) -> + try get_schema_runtime(ShardId) of + #{} -> running + catch + error:badarg -> down + end. + %%================================================================================ %% gen_server for the shard %%================================================================================ From 35c43eb8a07141313d34ef88cb7d84bb514790de Mon Sep 17 00:00:00 2001 From: Andrew Mayorov Date: Tue, 26 Mar 2024 18:23:08 +0100 Subject: [PATCH 012/234] feat(sessds): handle recoverable errors in stream scheduler --- apps/emqx/src/emqx_persistent_session_ds.erl | 6 ++-- ...persistent_session_ds_stream_scheduler.erl | 28 ++++++++++++------- 2 files changed, 21 insertions(+), 13 deletions(-) diff --git a/apps/emqx/src/emqx_persistent_session_ds.erl b/apps/emqx/src/emqx_persistent_session_ds.erl index c1ed6aabd..83ed5d465 100644 --- a/apps/emqx/src/emqx_persistent_session_ds.erl +++ b/apps/emqx/src/emqx_persistent_session_ds.erl @@ -571,7 +571,7 @@ replay(ClientInfo, [], Session0 = #{s := S0}) -> Session = replay_streams(Session0#{replay => Streams}, ClientInfo), {ok, [], Session}. -replay_streams(Session0 = #{replay := [{_StreamKey, Srs0} | Rest]}, ClientInfo) -> +replay_streams(Session0 = #{replay := [{StreamKey, Srs0} | Rest]}, ClientInfo) -> case replay_batch(Srs0, Session0, ClientInfo) of Session = #{} -> replay_streams(Session#{replay := Rest}, ClientInfo); @@ -579,7 +579,7 @@ replay_streams(Session0 = #{replay := [{_StreamKey, Srs0} | Rest]}, ClientInfo) RetryTimeout = ?TIMEOUT_RETRY_REPLAY, ?SLOG(warning, #{ msg => "failed_to_fetch_replay_batch", - stream => Srs0, + stream => StreamKey, reason => Reason, class => recoverable, retry_in_ms => RetryTimeout @@ -867,7 +867,7 @@ new_batch({StreamKey, Srs0}, BatchSize, Session0 = #{s := S0}, ClientInfo) -> %% TODO: Handle unrecoverable error. ?SLOG(info, #{ msg => "failed_to_fetch_batch", - stream => Srs1, + stream => StreamKey, reason => Reason, class => Class }), diff --git a/apps/emqx/src/emqx_persistent_session_ds_stream_scheduler.erl b/apps/emqx/src/emqx_persistent_session_ds_stream_scheduler.erl index 286d32ef4..154f59b44 100644 --- a/apps/emqx/src/emqx_persistent_session_ds_stream_scheduler.erl +++ b/apps/emqx/src/emqx_persistent_session_ds_stream_scheduler.erl @@ -208,16 +208,24 @@ ensure_iterator(TopicFilter, StartTime, SubId, {{RankX, RankY}, Stream}, S) -> ?SLOG(debug, #{ msg => new_stream, key => Key, stream => Stream }), - {ok, Iterator} = emqx_ds:make_iterator( - ?PERSISTENT_MESSAGE_DB, Stream, TopicFilter, StartTime - ), - NewStreamState = #srs{ - rank_x = RankX, - rank_y = RankY, - it_begin = Iterator, - it_end = Iterator - }, - emqx_persistent_session_ds_state:put_stream(Key, NewStreamState, S); + case emqx_ds:make_iterator(?PERSISTENT_MESSAGE_DB, Stream, TopicFilter, StartTime) of + {ok, Iterator} -> + NewStreamState = #srs{ + rank_x = RankX, + rank_y = RankY, + it_begin = Iterator, + it_end = Iterator + }, + emqx_persistent_session_ds_state:put_stream(Key, NewStreamState, S); + {error, recoverable, Reason} -> + ?SLOG(warning, #{ + msg => "failed_to_initialize_stream_iterator", + stream => Stream, + class => recoverable, + reason => Reason + }), + S + end; #srs{} -> S end. From 8ccc18a0f3c72b6c419c061d9dcc250f6c6d3d2e Mon Sep 17 00:00:00 2001 From: Ivan Dyachkov Date: Thu, 28 Mar 2024 15:46:35 +0100 Subject: [PATCH 013/234] ci(green_master): only trigger reruns for master branch --- .github/workflows/green_master.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/green_master.yaml b/.github/workflows/green_master.yaml index 1984aa692..7053247e3 100644 --- a/.github/workflows/green_master.yaml +++ b/.github/workflows/green_master.yaml @@ -31,7 +31,7 @@ jobs: env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} run: | - gh api --method GET -f head_branch=master -f status=completed -f exclude_pull_requests=true /repos/emqx/emqx/actions/runs > runs.json + gh api --method GET -f head_sha=$(git rev-parse HEAD) -f status=completed -f exclude_pull_requests=true /repos/emqx/emqx/actions/runs > runs.json for id in $(jq -r '.workflow_runs[] | select((."conclusion" == "failure") and (."name" != "Keep master green") and .run_attempt < 3) | .id' runs.json); do echo "rerun https://github.com/emqx/emqx/actions/runs/$id" gh api --method POST /repos/emqx/emqx/actions/runs/$id/rerun-failed-jobs || true From 9bf65a415b17f635f2a86487920bc5a0ac8df580 Mon Sep 17 00:00:00 2001 From: zmstone Date: Wed, 27 Mar 2024 17:34:18 +0100 Subject: [PATCH 014/234] feat(variform): add a variable transformer --- .gitignore | 2 ++ .../src/variform/emqx_variform_parser.yrl | 16 ++++++++++++ apps/emqx/src/variform/emqx_variform_scan.xrl | 26 +++++++++++++++++++ 3 files changed, 44 insertions(+) create mode 100644 apps/emqx/src/variform/emqx_variform_parser.yrl create mode 100644 apps/emqx/src/variform/emqx_variform_scan.xrl diff --git a/.gitignore b/.gitignore index 5e91d4bc5..a2c8b7e65 100644 --- a/.gitignore +++ b/.gitignore @@ -76,3 +76,5 @@ rebar-git-cache.tar .docker_image_tag .emqx_docker_image_tags .git/ +apps/emqx/src/emqx_variform_parser.erl +apps/emqx/src/variform/emqx_variform_scan.erl diff --git a/apps/emqx/src/variform/emqx_variform_parser.yrl b/apps/emqx/src/variform/emqx_variform_parser.yrl new file mode 100644 index 000000000..3f2b739ba --- /dev/null +++ b/apps/emqx/src/variform/emqx_variform_parser.yrl @@ -0,0 +1,16 @@ +Nonterminals expr call_or_var args. +Terminals identifier number string '(' ')' ','. + +Rootsymbol expr. + +%% Grammar Rules +expr -> call_or_var: '$1'. + +call_or_var -> identifier '(' args ')' : {call, element(3,'$1'), '$3'}. +call_or_var -> identifier : {var, element(3, '$1')}. +args -> expr : ['$1']. +args -> args ',' expr : '$1' ++ ['$3']. + +%% Handling direct values and variables within arguments +expr -> number : {num, element(3, '$1')}. +expr -> string : {str, element(3, '$1')}. diff --git a/apps/emqx/src/variform/emqx_variform_scan.xrl b/apps/emqx/src/variform/emqx_variform_scan.xrl new file mode 100644 index 000000000..53657bad4 --- /dev/null +++ b/apps/emqx/src/variform/emqx_variform_scan.xrl @@ -0,0 +1,26 @@ +Definitions. +%% Define regular expressions for tokens +IDENTIFIER = [a-zA-Z][a-zA-Z0-9_.]* +SQ_STRING = \'[^\']*\' +DQ_STRING = \"[^\"]*\" +NUMBER = [+-]?(\\d+\\.\\d+|[0-9]+) +LPAREN = \( +RPAREN = \) +COMMA = , +WHITESPACE = [\s\t\n]+ + +Rules. +%% Match function names, variable names (with ${}), strings, numbers, and structural characters +{WHITESPACE} : skip_token. +{IDENTIFIER} : {token, {identifier, TokenLine, TokenChars}}. +{SQ_STRING} : {token, {string, TokenLine, unquote(TokenChars, $')}}. +{DQ_STRING} : {token, {string, TokenLine, unquote(TokenChars, $")}}. +{NUMBER} : {token, {number, TokenLine, TokenChars}}. +{LPAREN} : {token, {'(', TokenLine}}. +{RPAREN} : {token, {')', TokenLine}}. +{COMMA} : {token, {',', TokenLine}}. + +Erlang code. + +unquote(String, Char) -> + string:trim(String, both, [Char]). From 5367893427e5f431aefa882cde1aafe27b867ad4 Mon Sep 17 00:00:00 2001 From: Ivan Dyachkov Date: Thu, 28 Mar 2024 16:41:50 +0100 Subject: [PATCH 015/234] ci(build_packages): restore building tgz --- .github/workflows/build_packages.yaml | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/.github/workflows/build_packages.yaml b/.github/workflows/build_packages.yaml index 4ea381acc..9be54e394 100644 --- a/.github/workflows/build_packages.yaml +++ b/.github/workflows/build_packages.yaml @@ -151,7 +151,23 @@ jobs: with: ref: ${{ github.event.inputs.ref }} fetch-depth: 0 - - name: build emqx packages + - name: build tgz + env: + PROFILE: ${{ matrix.profile }} + ARCH: ${{ matrix.arch }} + OS: ${{ matrix.os }} + IS_ELIXIR: ${{ matrix.with_elixir }} + BUILDER: "ghcr.io/emqx/emqx-builder/${{ matrix.builder }}:${{ matrix.elixir }}-${{ matrix.otp }}-${{ matrix.os }}" + BUILDER_SYSTEM: force_docker + run: | + ./scripts/buildx.sh \ + --profile $PROFILE \ + --arch $ARCH \ + --builder $BUILDER \ + --elixir $IS_ELIXIR \ + --pkgtype tgz + - name: build pkg + if: matrix.with_elixir == 'no' env: PROFILE: ${{ matrix.profile }} ARCH: ${{ matrix.arch }} From 3eda182e9a320c9c3afc726359f44b8f5a0c4d6b Mon Sep 17 00:00:00 2001 From: Serge Tupchii Date: Wed, 27 Mar 2024 20:04:16 +0200 Subject: [PATCH 016/234] fix: prevent a node from discovering and re-joining the same cluster after it has (manually) left it. --- apps/emqx/rebar.config | 2 +- .../src/emqx_management.app.src | 2 +- apps/emqx_management/src/emqx_mgmt_cli.erl | 25 +++++- .../test/emqx_mgmt_cli_SUITE.erl | 83 +++++++++++++++++++ changes/ce/fix-12802.en.md | 3 + mix.exs | 2 +- rebar.config | 2 +- 7 files changed, 114 insertions(+), 5 deletions(-) create mode 100644 changes/ce/fix-12802.en.md diff --git a/apps/emqx/rebar.config b/apps/emqx/rebar.config index 60ac6343f..70cf636e7 100644 --- a/apps/emqx/rebar.config +++ b/apps/emqx/rebar.config @@ -28,7 +28,7 @@ {gproc, {git, "https://github.com/emqx/gproc", {tag, "0.9.0.1"}}}, {cowboy, {git, "https://github.com/emqx/cowboy", {tag, "2.9.2"}}}, {esockd, {git, "https://github.com/emqx/esockd", {tag, "5.11.1"}}}, - {ekka, {git, "https://github.com/emqx/ekka", {tag, "0.19.1"}}}, + {ekka, {git, "https://github.com/emqx/ekka", {tag, "0.19.2"}}}, {gen_rpc, {git, "https://github.com/emqx/gen_rpc", {tag, "3.3.1"}}}, {hocon, {git, "https://github.com/emqx/hocon.git", {tag, "0.42.1"}}}, {emqx_http_lib, {git, "https://github.com/emqx/emqx_http_lib.git", {tag, "0.5.3"}}}, diff --git a/apps/emqx_management/src/emqx_management.app.src b/apps/emqx_management/src/emqx_management.app.src index bd596ffd4..bc2425a55 100644 --- a/apps/emqx_management/src/emqx_management.app.src +++ b/apps/emqx_management/src/emqx_management.app.src @@ -2,7 +2,7 @@ {application, emqx_management, [ {description, "EMQX Management API and CLI"}, % strict semver, bump manually! - {vsn, "5.1.0"}, + {vsn, "5.1.1"}, {modules, []}, {registered, [emqx_management_sup]}, {applications, [ diff --git a/apps/emqx_management/src/emqx_mgmt_cli.erl b/apps/emqx_management/src/emqx_mgmt_cli.erl index ddbc60d5c..2af3a8397 100644 --- a/apps/emqx_management/src/emqx_mgmt_cli.erl +++ b/apps/emqx_management/src/emqx_mgmt_cli.erl @@ -108,6 +108,7 @@ cluster(["join", SNode]) -> emqx_ctl:print("Failed to join the cluster: ~0p~n", [Error]) end; cluster(["leave"]) -> + _ = maybe_disable_autocluster(), case mria:leave() of ok -> emqx_ctl:print("Leave the cluster successfully.~n"), @@ -139,12 +140,15 @@ cluster(["status"]) -> cluster(["status", "--json"]) -> Info = sort_map_list_fields(cluster_info()), emqx_ctl:print("~ts~n", [emqx_logger_jsonfmt:best_effort_json(Info)]); +cluster(["discovery", "enable"]) -> + enable_autocluster(); cluster(_) -> emqx_ctl:usage([ {"cluster join ", "Join the cluster"}, {"cluster leave", "Leave the cluster"}, {"cluster force-leave ", "Force the node leave from cluster"}, - {"cluster status [--json]", "Cluster status"} + {"cluster status [--json]", "Cluster status"}, + {"cluster discovery enable", "Enable and run automatic cluster discovery (if configured)"} ]). %% sort lists for deterministic output @@ -163,6 +167,25 @@ sort_map_list_field(Field, Map) -> _ -> Map end. +enable_autocluster() -> + ok = ekka:enable_autocluster(), + _ = ekka:autocluster(emqx), + emqx_ctl:print("Automatic cluster discovery enabled.~n"). + +maybe_disable_autocluster() -> + case ekka:autocluster_enabled() of + true -> + ok = ekka:disable_autocluster(), + emqx_ctl:print( + "Automatic cluster discovery is disabled on this node: ~p to avoid" + " re-joining the same cluster again, if the node is not stopped soon." + " To enable it run: 'emqx ctl cluster discovery enable' or restart the node.~n", + [node()] + ); + false -> + ok + end. + %%-------------------------------------------------------------------- %% @doc Query clients diff --git a/apps/emqx_management/test/emqx_mgmt_cli_SUITE.erl b/apps/emqx_management/test/emqx_mgmt_cli_SUITE.erl index c81881c95..c6f00bff0 100644 --- a/apps/emqx_management/test/emqx_mgmt_cli_SUITE.erl +++ b/apps/emqx_management/test/emqx_mgmt_cli_SUITE.erl @@ -19,6 +19,7 @@ -compile(nowarn_export_all). -include_lib("eunit/include/eunit.hrl"). +-include_lib("common_test/include/ct.hrl"). all() -> emqx_common_test_helpers:all(?MODULE). @@ -31,6 +32,47 @@ init_per_suite(Config) -> end_per_suite(_) -> emqx_mgmt_api_test_util:end_suite([emqx_management, emqx_conf]). +init_per_testcase(t_autocluster_leave = TC, Config) -> + [Core1, Core2, Core3, Repl] = + Nodes = [ + t_autocluster_leave_core1, + t_autocluster_leave_core2, + t_autocluster_leave_core3, + t_autocluster_leave_replicant + ], + + NodeNames = [emqx_cth_cluster:node_name(N) || N <- Nodes], + AppSpec = [ + emqx, + {emqx_conf, #{ + config => #{ + cluster => #{ + discovery_strategy => static, + static => #{seeds => NodeNames} + } + } + }}, + emqx_management + ], + Cluster = emqx_cth_cluster:start( + [ + {Core1, #{role => core, apps => AppSpec}}, + {Core2, #{role => core, apps => AppSpec}}, + {Core3, #{role => core, apps => AppSpec}}, + {Repl, #{role => replicant, apps => AppSpec}} + ], + #{work_dir => emqx_cth_suite:work_dir(TC, Config)} + ), + [{cluster, Cluster} | Config]; +init_per_testcase(_TC, Config) -> + Config. + +end_per_testcase(_TC, Config) -> + case ?config(cluster, Config) of + undefined -> ok; + Cluster -> emqx_cth_cluster:stop(Cluster) + end. + t_status(_Config) -> emqx_ctl:run_command([]), emqx_ctl:run_command(["status"]), @@ -263,3 +305,44 @@ t_admin(_Config) -> %% admins passwd # Reset dashboard user password %% admins del # Delete dashboard user ok. + +t_autocluster_leave(Config) -> + [Core1, Core2, Core3, Repl] = Cluster = ?config(cluster, Config), + %% Mria membership updates are async, makes sense to wait a little + timer:sleep(300), + ClusterView = [lists:sort(rpc:call(N, emqx, running_nodes, [])) || N <- Cluster], + [View1, View2, View3, View4] = ClusterView, + ?assertEqual(lists:sort(Cluster), View1), + ?assertEqual(View1, View2), + ?assertEqual(View1, View3), + ?assertEqual(View1, View4), + + rpc:call(Core3, emqx_mgmt_cli, cluster, [["leave"]]), + timer:sleep(1000), + %% Replicant node may still discover and join Core3 which is now split from [Core1, Core2], + %% but it's expected to choose a bigger cluster of [Core1, Core2].. + ?assertMatch([Core3], rpc:call(Core3, emqx, running_nodes, [])), + ?assertEqual(undefined, rpc:call(Core1, erlang, whereis, [ekka_autocluster])), + ?assertEqual(lists:sort([Core1, Core2, Repl]), rpc:call(Core1, emqx, running_nodes, [])), + ?assertEqual(lists:sort([Core1, Core2, Repl]), rpc:call(Core2, emqx, running_nodes, [])), + ?assertEqual(lists:sort([Core1, Core2, Repl]), rpc:call(Repl, emqx, running_nodes, [])), + + rpc:call(Repl, emqx_mgmt_cli, cluster, [["leave"]]), + timer:sleep(1000), + ?assertEqual(lists:sort([Core1, Core2]), rpc:call(Core1, emqx, running_nodes, [])), + ?assertEqual(lists:sort([Core1, Core2]), rpc:call(Core2, emqx, running_nodes, [])), + + rpc:call(Core3, emqx_mgmt_cli, cluster, [["discovery", "enable"]]), + rpc:call(Repl, emqx_mgmt_cli, cluster, [["discovery", "enable"]]), + %% core nodes will join and restart asyncly, may need more time to re-cluster + ?assertEqual( + ok, + emqx_common_test_helpers:wait_for( + ?FUNCTION_NAME, + ?LINE, + fun() -> + [lists:sort(rpc:call(N, emqx, running_nodes, [])) || N <- Cluster] =:= ClusterView + end, + 10_000 + ) + ). diff --git a/changes/ce/fix-12802.en.md b/changes/ce/fix-12802.en.md new file mode 100644 index 000000000..f63603a97 --- /dev/null +++ b/changes/ce/fix-12802.en.md @@ -0,0 +1,3 @@ +Improve cluster discovery behaviour when a node is manually removed from a cluster using 'emqx ctl cluster leave' command. +Previously, if the configured cluster 'discovery_strategy' was not 'manual', the left node might re-discover and re-join the same cluster shortly after it left (unless it was stopped). +After this change, 'cluster leave' command disables automatic cluster_discovery, so that the left node won't re-join the same cluster again. Cluster discovery can be re-enabled by running 'emqx ctl discovery enable` or by restarting the left node. diff --git a/mix.exs b/mix.exs index 638bbb284..20df56e90 100644 --- a/mix.exs +++ b/mix.exs @@ -55,7 +55,7 @@ defmodule EMQXUmbrella.MixProject do {:cowboy, github: "emqx/cowboy", tag: "2.9.2", override: true}, {:esockd, github: "emqx/esockd", tag: "5.11.1", override: true}, {:rocksdb, github: "emqx/erlang-rocksdb", tag: "1.8.0-emqx-2", override: true}, - {:ekka, github: "emqx/ekka", tag: "0.19.1", override: true}, + {:ekka, github: "emqx/ekka", tag: "0.19.2", override: true}, {:gen_rpc, github: "emqx/gen_rpc", tag: "3.3.1", override: true}, {:grpc, github: "emqx/grpc-erl", tag: "0.6.12", override: true}, {:minirest, github: "emqx/minirest", tag: "1.4.0", override: true}, diff --git a/rebar.config b/rebar.config index 238dca515..1dd17c4e7 100644 --- a/rebar.config +++ b/rebar.config @@ -83,7 +83,7 @@ {cowboy, {git, "https://github.com/emqx/cowboy", {tag, "2.9.2"}}}, {esockd, {git, "https://github.com/emqx/esockd", {tag, "5.11.1"}}}, {rocksdb, {git, "https://github.com/emqx/erlang-rocksdb", {tag, "1.8.0-emqx-2"}}}, - {ekka, {git, "https://github.com/emqx/ekka", {tag, "0.19.1"}}}, + {ekka, {git, "https://github.com/emqx/ekka", {tag, "0.19.2"}}}, {gen_rpc, {git, "https://github.com/emqx/gen_rpc", {tag, "3.3.1"}}}, {grpc, {git, "https://github.com/emqx/grpc-erl", {tag, "0.6.12"}}}, {minirest, {git, "https://github.com/emqx/minirest", {tag, "1.4.0"}}}, From ad95473aaef0148d350996f2a2bfccc1b812f90f Mon Sep 17 00:00:00 2001 From: zmstone Date: Thu, 28 Mar 2024 13:59:31 +0100 Subject: [PATCH 017/234] refactor: move string functions to emqx_variform --- apps/emqx/src/variform/emqx_variform.erl | 39 ++ apps/emqx/src/variform/emqx_variform_str.erl | 353 ++++++++++++++++++ apps/emqx_rule_engine/src/emqx_rule_funcs.erl | 271 +++----------- 3 files changed, 434 insertions(+), 229 deletions(-) create mode 100644 apps/emqx/src/variform/emqx_variform.erl create mode 100644 apps/emqx/src/variform/emqx_variform_str.erl diff --git a/apps/emqx/src/variform/emqx_variform.erl b/apps/emqx/src/variform/emqx_variform.erl new file mode 100644 index 000000000..6f4fd6f47 --- /dev/null +++ b/apps/emqx/src/variform/emqx_variform.erl @@ -0,0 +1,39 @@ +%%-------------------------------------------------------------------- +%% Copyright (c) 2020-2024 EMQ Technologies Co., Ltd. All Rights Reserved. +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. +%%-------------------------------------------------------------------- + +%% Predefined functions for templating +-module(emqx_variform). + +-export([render/2]). + +render(Expression, Context) -> + case emqx_variform_scan:string(Expression) of + {ok, Tokens, _Line} -> + case emqx_variform_parser:parse(Tokens) of + {ok, Expr} -> + eval(Expr, Context); + {error, {_, emqx_variform_parser, Msg}} -> + %% syntax error + {error, lists:flatten(Msg)}; + {error, Reason} -> + {error, Reason} + end; + {error, Reason, _Line} -> + {error, Reason} + end. + +eval(Expr, _Context) -> + io:format(user, "~p~n", [Expr]). diff --git a/apps/emqx/src/variform/emqx_variform_str.erl b/apps/emqx/src/variform/emqx_variform_str.erl new file mode 100644 index 000000000..d94519f76 --- /dev/null +++ b/apps/emqx/src/variform/emqx_variform_str.erl @@ -0,0 +1,353 @@ +%%-------------------------------------------------------------------- +%% Copyright (c) 2020-2024 EMQ Technologies Co., Ltd. All Rights Reserved. +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. +%%-------------------------------------------------------------------- + +%% Predefined functions string templating +-module(emqx_variform_str). + +%% String Funcs +-export([ + coalesce/1, + coalesce/2, + lower/1, + ltrim/1, + ltrim/2, + reverse/1, + rtrim/1, + rtrim/2, + strlen/1, + substr/2, + substr/3, + trim/1, + trim/2, + upper/1, + split/2, + split/3, + concat/1, + concat/2, + tokens/2, + tokens/3, + sprintf_s/2, + pad/2, + pad/3, + pad/4, + replace/3, + replace/4, + regex_match/2, + regex_replace/3, + ascii/1, + find/2, + find/3, + join_to_string/1, + join_to_string/2, + unescape/1 +]). + +-define(IS_EMPTY(X), (X =:= <<>> orelse X =:= "" orelse X =:= undefined)). + +%%------------------------------------------------------------------------------ +%% String Funcs +%%------------------------------------------------------------------------------ + +%% @doc Return the first non-empty string +coalesce(A, B) when ?IS_EMPTY(A) andalso ?IS_EMPTY(B) -> + <<>>; +coalesce(A, _) when is_binary(A) -> + A; +coalesce(_, B) -> + B. + +%% @doc Return the first non-empty string +coalesce([]) -> + <<>>; +coalesce([H | T]) -> + coalesce(H, coalesce(T)). + +lower(S) when is_binary(S) -> + string:lowercase(S). + +ltrim(S) when is_binary(S) -> + string:trim(S, leading). + +ltrim(S, Chars) -> + string:trim(S, leading, Chars). + +reverse(S) when is_binary(S) -> + iolist_to_binary(string:reverse(S)). + +rtrim(S) when is_binary(S) -> + string:trim(S, trailing). + +rtrim(S, Chars) when is_binary(S) -> + string:trim(S, trailing, Chars). + +strlen(S) when is_binary(S) -> + string:length(S). + +substr(S, Start) when is_binary(S), is_integer(Start) -> + string:slice(S, Start). + +substr(S, Start, Length) when + is_binary(S), + is_integer(Start), + is_integer(Length) +-> + string:slice(S, Start, Length). + +trim(S) when is_binary(S) -> + string:trim(S). + +trim(S, Chars) when is_binary(S) -> + string:trim(S, both, Chars). + +upper(S) when is_binary(S) -> + string:uppercase(S). + +split(S, P) when is_binary(S), is_binary(P) -> + [R || R <- string:split(S, P, all), R =/= <<>> andalso R =/= ""]. + +split(S, P, <<"notrim">>) -> + string:split(S, P, all); +split(S, P, <<"leading_notrim">>) -> + string:split(S, P, leading); +split(S, P, <<"leading">>) when is_binary(S), is_binary(P) -> + [R || R <- string:split(S, P, leading), R =/= <<>> andalso R =/= ""]; +split(S, P, <<"trailing_notrim">>) -> + string:split(S, P, trailing); +split(S, P, <<"trailing">>) when is_binary(S), is_binary(P) -> + [R || R <- string:split(S, P, trailing), R =/= <<>> andalso R =/= ""]. + +tokens(S, Separators) -> + [list_to_binary(R) || R <- string:lexemes(binary_to_list(S), binary_to_list(Separators))]. + +tokens(S, Separators, <<"nocrlf">>) -> + [ + list_to_binary(R) + || R <- string:lexemes(binary_to_list(S), binary_to_list(Separators) ++ [$\r, $\n, [$\r, $\n]]) + ]. + +%% implicit convert args to strings, and then do concatenation +concat(S1, S2) -> + concat([S1, S2], unicode). + +concat(List) -> + unicode:characters_to_binary(lists:map(fun str/1, List), unicode). + +sprintf_s(Format, Args) when is_list(Args) -> + erlang:iolist_to_binary(io_lib:format(binary_to_list(Format), Args)). + +pad(S, Len) when is_binary(S), is_integer(Len) -> + iolist_to_binary(string:pad(S, Len, trailing)). + +pad(S, Len, <<"trailing">>) when is_binary(S), is_integer(Len) -> + iolist_to_binary(string:pad(S, Len, trailing)); +pad(S, Len, <<"both">>) when is_binary(S), is_integer(Len) -> + iolist_to_binary(string:pad(S, Len, both)); +pad(S, Len, <<"leading">>) when is_binary(S), is_integer(Len) -> + iolist_to_binary(string:pad(S, Len, leading)). + +pad(S, Len, <<"trailing">>, Char) when is_binary(S), is_integer(Len), is_binary(Char) -> + Chars = unicode:characters_to_list(Char, utf8), + iolist_to_binary(string:pad(S, Len, trailing, Chars)); +pad(S, Len, <<"both">>, Char) when is_binary(S), is_integer(Len), is_binary(Char) -> + Chars = unicode:characters_to_list(Char, utf8), + iolist_to_binary(string:pad(S, Len, both, Chars)); +pad(S, Len, <<"leading">>, Char) when is_binary(S), is_integer(Len), is_binary(Char) -> + Chars = unicode:characters_to_list(Char, utf8), + iolist_to_binary(string:pad(S, Len, leading, Chars)). + +replace(SrcStr, P, RepStr) when is_binary(SrcStr), is_binary(P), is_binary(RepStr) -> + iolist_to_binary(string:replace(SrcStr, P, RepStr, all)). + +replace(SrcStr, P, RepStr, <<"all">>) when is_binary(SrcStr), is_binary(P), is_binary(RepStr) -> + iolist_to_binary(string:replace(SrcStr, P, RepStr, all)); +replace(SrcStr, P, RepStr, <<"trailing">>) when + is_binary(SrcStr), is_binary(P), is_binary(RepStr) +-> + iolist_to_binary(string:replace(SrcStr, P, RepStr, trailing)); +replace(SrcStr, P, RepStr, <<"leading">>) when is_binary(SrcStr), is_binary(P), is_binary(RepStr) -> + iolist_to_binary(string:replace(SrcStr, P, RepStr, leading)). + +regex_match(Str, RE) -> + case re:run(Str, RE, [global, {capture, none}]) of + match -> true; + nomatch -> false + end. + +regex_replace(SrcStr, RE, RepStr) -> + re:replace(SrcStr, RE, RepStr, [global, {return, binary}]). + +ascii(Char) when is_binary(Char) -> + [FirstC | _] = binary_to_list(Char), + FirstC. + +find(S, P) when is_binary(S), is_binary(P) -> + find_s(S, P, leading). + +find(S, P, <<"trailing">>) when is_binary(S), is_binary(P) -> + find_s(S, P, trailing); +find(S, P, <<"leading">>) when is_binary(S), is_binary(P) -> + find_s(S, P, leading). + +find_s(S, P, Dir) -> + case string:find(S, P, Dir) of + nomatch -> <<"">>; + SubStr -> SubStr + end. + +join_to_string(List) when is_list(List) -> + join_to_string(<<", ">>, List). + +join_to_string(Sep, List) when is_list(List), is_binary(Sep) -> + iolist_to_binary(lists:join(Sep, [str(Item) || Item <- List])). + +unescape(Bin) when is_binary(Bin) -> + UnicodeList = unicode:characters_to_list(Bin, utf8), + UnescapedUnicodeList = unescape_string(UnicodeList), + UnescapedUTF8Bin = unicode:characters_to_binary(UnescapedUnicodeList, utf32, utf8), + case UnescapedUTF8Bin of + Out when is_binary(Out) -> + Out; + Error -> + throw({invalid_unicode_character, Error}) + end. + +unescape_string(Input) -> unescape_string(Input, []). + +unescape_string([], Acc) -> + lists:reverse(Acc); +unescape_string([$\\, $\\ | Rest], Acc) -> + unescape_string(Rest, [$\\ | Acc]); +unescape_string([$\\, $n | Rest], Acc) -> + unescape_string(Rest, [$\n | Acc]); +unescape_string([$\\, $t | Rest], Acc) -> + unescape_string(Rest, [$\t | Acc]); +unescape_string([$\\, $r | Rest], Acc) -> + unescape_string(Rest, [$\r | Acc]); +unescape_string([$\\, $b | Rest], Acc) -> + unescape_string(Rest, [$\b | Acc]); +unescape_string([$\\, $f | Rest], Acc) -> + unescape_string(Rest, [$\f | Acc]); +unescape_string([$\\, $v | Rest], Acc) -> + unescape_string(Rest, [$\v | Acc]); +unescape_string([$\\, $' | Rest], Acc) -> + unescape_string(Rest, [$\' | Acc]); +unescape_string([$\\, $" | Rest], Acc) -> + unescape_string(Rest, [$\" | Acc]); +unescape_string([$\\, $? | Rest], Acc) -> + unescape_string(Rest, [$\? | Acc]); +unescape_string([$\\, $a | Rest], Acc) -> + unescape_string(Rest, [$\a | Acc]); +%% Start of HEX escape code +unescape_string([$\\, $x | [$0 | _] = HexStringStart], Acc) -> + unescape_handle_hex_string(HexStringStart, Acc); +unescape_string([$\\, $x | [$1 | _] = HexStringStart], Acc) -> + unescape_handle_hex_string(HexStringStart, Acc); +unescape_string([$\\, $x | [$2 | _] = HexStringStart], Acc) -> + unescape_handle_hex_string(HexStringStart, Acc); +unescape_string([$\\, $x | [$3 | _] = HexStringStart], Acc) -> + unescape_handle_hex_string(HexStringStart, Acc); +unescape_string([$\\, $x | [$4 | _] = HexStringStart], Acc) -> + unescape_handle_hex_string(HexStringStart, Acc); +unescape_string([$\\, $x | [$5 | _] = HexStringStart], Acc) -> + unescape_handle_hex_string(HexStringStart, Acc); +unescape_string([$\\, $x | [$6 | _] = HexStringStart], Acc) -> + unescape_handle_hex_string(HexStringStart, Acc); +unescape_string([$\\, $x | [$7 | _] = HexStringStart], Acc) -> + unescape_handle_hex_string(HexStringStart, Acc); +unescape_string([$\\, $x | [$8 | _] = HexStringStart], Acc) -> + unescape_handle_hex_string(HexStringStart, Acc); +unescape_string([$\\, $x | [$9 | _] = HexStringStart], Acc) -> + unescape_handle_hex_string(HexStringStart, Acc); +unescape_string([$\\, $x | [$A | _] = HexStringStart], Acc) -> + unescape_handle_hex_string(HexStringStart, Acc); +unescape_string([$\\, $x | [$B | _] = HexStringStart], Acc) -> + unescape_handle_hex_string(HexStringStart, Acc); +unescape_string([$\\, $x | [$C | _] = HexStringStart], Acc) -> + unescape_handle_hex_string(HexStringStart, Acc); +unescape_string([$\\, $x | [$D | _] = HexStringStart], Acc) -> + unescape_handle_hex_string(HexStringStart, Acc); +unescape_string([$\\, $x | [$E | _] = HexStringStart], Acc) -> + unescape_handle_hex_string(HexStringStart, Acc); +unescape_string([$\\, $x | [$F | _] = HexStringStart], Acc) -> + unescape_handle_hex_string(HexStringStart, Acc); +unescape_string([$\\, $x | [$a | _] = HexStringStart], Acc) -> + unescape_handle_hex_string(HexStringStart, Acc); +unescape_string([$\\, $x | [$b | _] = HexStringStart], Acc) -> + unescape_handle_hex_string(HexStringStart, Acc); +unescape_string([$\\, $x | [$c | _] = HexStringStart], Acc) -> + unescape_handle_hex_string(HexStringStart, Acc); +unescape_string([$\\, $x | [$d | _] = HexStringStart], Acc) -> + unescape_handle_hex_string(HexStringStart, Acc); +unescape_string([$\\, $x | [$e | _] = HexStringStart], Acc) -> + unescape_handle_hex_string(HexStringStart, Acc); +unescape_string([$\\, $x | [$f | _] = HexStringStart], Acc) -> + unescape_handle_hex_string(HexStringStart, Acc); +%% We treat all other escape sequences as not valid input to leave room for +%% extending the function to support more escape codes +unescape_string([$\\, X | _Rest], _Acc) -> + erlang:throw({unrecognized_escape_sequence, list_to_binary([$\\, X])}); +unescape_string([First | Rest], Acc) -> + unescape_string(Rest, [First | Acc]). + +unescape_handle_hex_string(HexStringStart, Acc) -> + {RemainingString, Num} = parse_hex_string(HexStringStart), + unescape_string(RemainingString, [Num | Acc]). + +parse_hex_string(SeqStartingWithHexDigit) -> + parse_hex_string(SeqStartingWithHexDigit, []). + +parse_hex_string([], Acc) -> + ReversedAcc = lists:reverse(Acc), + {[], list_to_integer(ReversedAcc, 16)}; +parse_hex_string([First | Rest] = String, Acc) -> + case is_hex_digit(First) of + true -> + parse_hex_string(Rest, [First | Acc]); + false -> + ReversedAcc = lists:reverse(Acc), + {String, list_to_integer(ReversedAcc, 16)} + end. + +is_hex_digit($0) -> true; +is_hex_digit($1) -> true; +is_hex_digit($2) -> true; +is_hex_digit($3) -> true; +is_hex_digit($4) -> true; +is_hex_digit($5) -> true; +is_hex_digit($6) -> true; +is_hex_digit($7) -> true; +is_hex_digit($8) -> true; +is_hex_digit($9) -> true; +is_hex_digit($A) -> true; +is_hex_digit($B) -> true; +is_hex_digit($C) -> true; +is_hex_digit($D) -> true; +is_hex_digit($E) -> true; +is_hex_digit($F) -> true; +is_hex_digit($a) -> true; +is_hex_digit($b) -> true; +is_hex_digit($c) -> true; +is_hex_digit($d) -> true; +is_hex_digit($e) -> true; +is_hex_digit($f) -> true; +is_hex_digit(_) -> false. + +%%------------------------------------------------------------------------------ +%% Data Type Conversion Funcs +%%------------------------------------------------------------------------------ + +str(Data) -> + emqx_utils_conv:bin(Data). diff --git a/apps/emqx_rule_engine/src/emqx_rule_funcs.erl b/apps/emqx_rule_engine/src/emqx_rule_funcs.erl index ac7f66597..ea8e192d4 100644 --- a/apps/emqx_rule_engine/src/emqx_rule_funcs.erl +++ b/apps/emqx_rule_engine/src/emqx_rule_funcs.erl @@ -132,6 +132,8 @@ %% String Funcs -export([ + coalesce/1, + coalesce/2, lower/1, ltrim/1, reverse/1, @@ -768,130 +770,67 @@ is_array(_) -> false. %% String Funcs %%------------------------------------------------------------------------------ -lower(S) when is_binary(S) -> - string:lowercase(S). +coalesce(List) -> emqx_variform_str:coalesce(List). -ltrim(S) when is_binary(S) -> - string:trim(S, leading). +coalesce(A, B) -> emqx_variform_str:coalesce(A, B). -reverse(S) when is_binary(S) -> - iolist_to_binary(string:reverse(S)). +lower(S) -> emqx_variform_str:lower(S). -rtrim(S) when is_binary(S) -> - string:trim(S, trailing). +ltrim(S) -> emqx_variform_str:ltrim(S). -strlen(S) when is_binary(S) -> - string:length(S). +reverse(S) -> emqx_variform_str:reverse(S). -substr(S, Start) when is_binary(S), is_integer(Start) -> - string:slice(S, Start). +rtrim(S) -> emqx_variform_str:rtrim(S). -substr(S, Start, Length) when - is_binary(S), - is_integer(Start), - is_integer(Length) --> - string:slice(S, Start, Length). +strlen(S) -> emqx_variform_str:strlen(S). -trim(S) when is_binary(S) -> - string:trim(S). +substr(S, Start) -> emqx_variform_str:substr(S, Start). -upper(S) when is_binary(S) -> - string:uppercase(S). +substr(S, Start, Length) -> emqx_variform_str:substr(S, Start, Length). -split(S, P) when is_binary(S), is_binary(P) -> - [R || R <- string:split(S, P, all), R =/= <<>> andalso R =/= ""]. +trim(S) -> emqx_variform_str:trim(S). -split(S, P, <<"notrim">>) -> - string:split(S, P, all); -split(S, P, <<"leading_notrim">>) -> - string:split(S, P, leading); -split(S, P, <<"leading">>) when is_binary(S), is_binary(P) -> - [R || R <- string:split(S, P, leading), R =/= <<>> andalso R =/= ""]; -split(S, P, <<"trailing_notrim">>) -> - string:split(S, P, trailing); -split(S, P, <<"trailing">>) when is_binary(S), is_binary(P) -> - [R || R <- string:split(S, P, trailing), R =/= <<>> andalso R =/= ""]. +upper(S) -> emqx_variform_str:upper(S). -tokens(S, Separators) -> - [list_to_binary(R) || R <- string:lexemes(binary_to_list(S), binary_to_list(Separators))]. +split(S, P) -> emqx_variform_str:split(S, P). -tokens(S, Separators, <<"nocrlf">>) -> - [ - list_to_binary(R) - || R <- string:lexemes(binary_to_list(S), binary_to_list(Separators) ++ [$\r, $\n, [$\r, $\n]]) - ]. +split(S, P, Position) -> emqx_variform_str:split(S, P, Position). -%% implicit convert args to strings, and then do concatenation -concat(S1, S2) -> - unicode:characters_to_binary([str(S1), str(S2)], unicode). +tokens(S, Separators) -> emqx_variform_str:tokens(S, Separators). -sprintf_s(Format, Args) when is_list(Args) -> - erlang:iolist_to_binary(io_lib:format(binary_to_list(Format), Args)). +tokens(S, Separators, NoCRLF) -> emqx_variform_str:tokens(S, Separators, NoCRLF). -pad(S, Len) when is_binary(S), is_integer(Len) -> - iolist_to_binary(string:pad(S, Len, trailing)). +concat(S1, S2) -> emqx_variform_str:concat(S1, S2). -pad(S, Len, <<"trailing">>) when is_binary(S), is_integer(Len) -> - iolist_to_binary(string:pad(S, Len, trailing)); -pad(S, Len, <<"both">>) when is_binary(S), is_integer(Len) -> - iolist_to_binary(string:pad(S, Len, both)); -pad(S, Len, <<"leading">>) when is_binary(S), is_integer(Len) -> - iolist_to_binary(string:pad(S, Len, leading)). +concat(List) -> emqx_variform_str:concat(List). -pad(S, Len, <<"trailing">>, Char) when is_binary(S), is_integer(Len), is_binary(Char) -> - Chars = unicode:characters_to_list(Char, utf8), - iolist_to_binary(string:pad(S, Len, trailing, Chars)); -pad(S, Len, <<"both">>, Char) when is_binary(S), is_integer(Len), is_binary(Char) -> - Chars = unicode:characters_to_list(Char, utf8), - iolist_to_binary(string:pad(S, Len, both, Chars)); -pad(S, Len, <<"leading">>, Char) when is_binary(S), is_integer(Len), is_binary(Char) -> - Chars = unicode:characters_to_list(Char, utf8), - iolist_to_binary(string:pad(S, Len, leading, Chars)). +sprintf_s(Format, Args) -> emqx_variform_str:sprintf_s(Format, Args). -replace(SrcStr, P, RepStr) when is_binary(SrcStr), is_binary(P), is_binary(RepStr) -> - iolist_to_binary(string:replace(SrcStr, P, RepStr, all)). +pad(S, Len) -> emqx_variform_str:pad(S, Len). -replace(SrcStr, P, RepStr, <<"all">>) when is_binary(SrcStr), is_binary(P), is_binary(RepStr) -> - iolist_to_binary(string:replace(SrcStr, P, RepStr, all)); -replace(SrcStr, P, RepStr, <<"trailing">>) when - is_binary(SrcStr), is_binary(P), is_binary(RepStr) --> - iolist_to_binary(string:replace(SrcStr, P, RepStr, trailing)); -replace(SrcStr, P, RepStr, <<"leading">>) when is_binary(SrcStr), is_binary(P), is_binary(RepStr) -> - iolist_to_binary(string:replace(SrcStr, P, RepStr, leading)). +pad(S, Len, Position) -> emqx_variform_str:pad(S, Len, Position). -regex_match(Str, RE) -> - case re:run(Str, RE, [global, {capture, none}]) of - match -> true; - nomatch -> false - end. +pad(S, Len, Position, Char) -> emqx_variform_str:pad(S, Len, Position, Char). -regex_replace(SrcStr, RE, RepStr) -> - re:replace(SrcStr, RE, RepStr, [global, {return, binary}]). +replace(SrcStr, Pattern, RepStr) -> emqx_variform_str:replace(SrcStr, Pattern, RepStr). -ascii(Char) when is_binary(Char) -> - [FirstC | _] = binary_to_list(Char), - FirstC. +replace(SrcStr, Pattern, RepStr, Position) -> + emqx_variform_str:replace(SrcStr, Pattern, RepStr, Position). -find(S, P) when is_binary(S), is_binary(P) -> - find_s(S, P, leading). +regex_match(Str, RE) -> emqx_variform_str:regex_match(Str, RE). -find(S, P, <<"trailing">>) when is_binary(S), is_binary(P) -> - find_s(S, P, trailing); -find(S, P, <<"leading">>) when is_binary(S), is_binary(P) -> - find_s(S, P, leading). +regex_replace(SrcStr, RE, RepStr) -> emqx_variform_str:regex_replace(SrcStr, RE, RepStr). -find_s(S, P, Dir) -> - case string:find(S, P, Dir) of - nomatch -> <<"">>; - SubStr -> SubStr - end. +ascii(Char) -> emqx_variform_str:ascii(Char). + +find(S, P) -> emqx_variform_str:find(S, P). + +find(S, P, Position) -> emqx_variform_str:find(S, P, Position). + +join_to_string(Str) -> emqx_variform_str:join_to_string(Str). + +join_to_string(Sep, List) -> emqx_variform_str:join_to_string(Sep, List). -join_to_string(List) when is_list(List) -> - join_to_string(<<", ">>, List). -join_to_string(Sep, List) when is_list(List), is_binary(Sep) -> - iolist_to_binary(lists:join(Sep, [str(Item) || Item <- List])). join_to_sql_values_string(List) -> QuotedList = [ @@ -938,137 +877,7 @@ jq(FilterProgram, JSONBin) -> ]) ). -unescape(Bin) when is_binary(Bin) -> - UnicodeList = unicode:characters_to_list(Bin, utf8), - UnescapedUnicodeList = unescape_string(UnicodeList), - UnescapedUTF8Bin = unicode:characters_to_binary(UnescapedUnicodeList, utf32, utf8), - case UnescapedUTF8Bin of - Out when is_binary(Out) -> - Out; - Error -> - throw({invalid_unicode_character, Error}) - end. - -unescape_string(Input) -> unescape_string(Input, []). - -unescape_string([], Acc) -> - lists:reverse(Acc); -unescape_string([$\\, $\\ | Rest], Acc) -> - unescape_string(Rest, [$\\ | Acc]); -unescape_string([$\\, $n | Rest], Acc) -> - unescape_string(Rest, [$\n | Acc]); -unescape_string([$\\, $t | Rest], Acc) -> - unescape_string(Rest, [$\t | Acc]); -unescape_string([$\\, $r | Rest], Acc) -> - unescape_string(Rest, [$\r | Acc]); -unescape_string([$\\, $b | Rest], Acc) -> - unescape_string(Rest, [$\b | Acc]); -unescape_string([$\\, $f | Rest], Acc) -> - unescape_string(Rest, [$\f | Acc]); -unescape_string([$\\, $v | Rest], Acc) -> - unescape_string(Rest, [$\v | Acc]); -unescape_string([$\\, $' | Rest], Acc) -> - unescape_string(Rest, [$\' | Acc]); -unescape_string([$\\, $" | Rest], Acc) -> - unescape_string(Rest, [$\" | Acc]); -unescape_string([$\\, $? | Rest], Acc) -> - unescape_string(Rest, [$\? | Acc]); -unescape_string([$\\, $a | Rest], Acc) -> - unescape_string(Rest, [$\a | Acc]); -%% Start of HEX escape code -unescape_string([$\\, $x | [$0 | _] = HexStringStart], Acc) -> - unescape_handle_hex_string(HexStringStart, Acc); -unescape_string([$\\, $x | [$1 | _] = HexStringStart], Acc) -> - unescape_handle_hex_string(HexStringStart, Acc); -unescape_string([$\\, $x | [$2 | _] = HexStringStart], Acc) -> - unescape_handle_hex_string(HexStringStart, Acc); -unescape_string([$\\, $x | [$3 | _] = HexStringStart], Acc) -> - unescape_handle_hex_string(HexStringStart, Acc); -unescape_string([$\\, $x | [$4 | _] = HexStringStart], Acc) -> - unescape_handle_hex_string(HexStringStart, Acc); -unescape_string([$\\, $x | [$5 | _] = HexStringStart], Acc) -> - unescape_handle_hex_string(HexStringStart, Acc); -unescape_string([$\\, $x | [$6 | _] = HexStringStart], Acc) -> - unescape_handle_hex_string(HexStringStart, Acc); -unescape_string([$\\, $x | [$7 | _] = HexStringStart], Acc) -> - unescape_handle_hex_string(HexStringStart, Acc); -unescape_string([$\\, $x | [$8 | _] = HexStringStart], Acc) -> - unescape_handle_hex_string(HexStringStart, Acc); -unescape_string([$\\, $x | [$9 | _] = HexStringStart], Acc) -> - unescape_handle_hex_string(HexStringStart, Acc); -unescape_string([$\\, $x | [$A | _] = HexStringStart], Acc) -> - unescape_handle_hex_string(HexStringStart, Acc); -unescape_string([$\\, $x | [$B | _] = HexStringStart], Acc) -> - unescape_handle_hex_string(HexStringStart, Acc); -unescape_string([$\\, $x | [$C | _] = HexStringStart], Acc) -> - unescape_handle_hex_string(HexStringStart, Acc); -unescape_string([$\\, $x | [$D | _] = HexStringStart], Acc) -> - unescape_handle_hex_string(HexStringStart, Acc); -unescape_string([$\\, $x | [$E | _] = HexStringStart], Acc) -> - unescape_handle_hex_string(HexStringStart, Acc); -unescape_string([$\\, $x | [$F | _] = HexStringStart], Acc) -> - unescape_handle_hex_string(HexStringStart, Acc); -unescape_string([$\\, $x | [$a | _] = HexStringStart], Acc) -> - unescape_handle_hex_string(HexStringStart, Acc); -unescape_string([$\\, $x | [$b | _] = HexStringStart], Acc) -> - unescape_handle_hex_string(HexStringStart, Acc); -unescape_string([$\\, $x | [$c | _] = HexStringStart], Acc) -> - unescape_handle_hex_string(HexStringStart, Acc); -unescape_string([$\\, $x | [$d | _] = HexStringStart], Acc) -> - unescape_handle_hex_string(HexStringStart, Acc); -unescape_string([$\\, $x | [$e | _] = HexStringStart], Acc) -> - unescape_handle_hex_string(HexStringStart, Acc); -unescape_string([$\\, $x | [$f | _] = HexStringStart], Acc) -> - unescape_handle_hex_string(HexStringStart, Acc); -%% We treat all other escape sequences as not valid input to leave room for -%% extending the function to support more escape codes -unescape_string([$\\, X | _Rest], _Acc) -> - erlang:throw({unrecognized_escape_sequence, list_to_binary([$\\, X])}); -unescape_string([First | Rest], Acc) -> - unescape_string(Rest, [First | Acc]). - -unescape_handle_hex_string(HexStringStart, Acc) -> - {RemainingString, Num} = parse_hex_string(HexStringStart), - unescape_string(RemainingString, [Num | Acc]). - -parse_hex_string(SeqStartingWithHexDigit) -> - parse_hex_string(SeqStartingWithHexDigit, []). - -parse_hex_string([], Acc) -> - ReversedAcc = lists:reverse(Acc), - {[], list_to_integer(ReversedAcc, 16)}; -parse_hex_string([First | Rest] = String, Acc) -> - case is_hex_digit(First) of - true -> - parse_hex_string(Rest, [First | Acc]); - false -> - ReversedAcc = lists:reverse(Acc), - {String, list_to_integer(ReversedAcc, 16)} - end. - -is_hex_digit($0) -> true; -is_hex_digit($1) -> true; -is_hex_digit($2) -> true; -is_hex_digit($3) -> true; -is_hex_digit($4) -> true; -is_hex_digit($5) -> true; -is_hex_digit($6) -> true; -is_hex_digit($7) -> true; -is_hex_digit($8) -> true; -is_hex_digit($9) -> true; -is_hex_digit($A) -> true; -is_hex_digit($B) -> true; -is_hex_digit($C) -> true; -is_hex_digit($D) -> true; -is_hex_digit($E) -> true; -is_hex_digit($F) -> true; -is_hex_digit($a) -> true; -is_hex_digit($b) -> true; -is_hex_digit($c) -> true; -is_hex_digit($d) -> true; -is_hex_digit($e) -> true; -is_hex_digit($f) -> true; -is_hex_digit(_) -> false. +unescape(Str) -> emqx_variform_str:unescape(Str). %%------------------------------------------------------------------------------ %% Array Funcs @@ -1095,6 +904,10 @@ last(List) when is_list(List) -> contains(Elm, List) when is_list(List) -> lists:member(Elm, List). +%%------------------------------------------------------------------------------ +%% Map Funcs +%%------------------------------------------------------------------------------ + map_new() -> #{}. From 5f26e4ed5e66de9d2193329e5738bf230201bf5b Mon Sep 17 00:00:00 2001 From: zmstone Date: Thu, 28 Mar 2024 18:03:03 +0100 Subject: [PATCH 018/234] feat(variform): implement variform engine --- apps/emqx/src/variform/emqx_variform.erl | 158 +++++++++++++++++- apps/emqx/src/variform/emqx_variform_str.erl | 10 +- apps/emqx_rule_engine/src/emqx_rule_funcs.erl | 1 + 3 files changed, 158 insertions(+), 11 deletions(-) diff --git a/apps/emqx/src/variform/emqx_variform.erl b/apps/emqx/src/variform/emqx_variform.erl index 6f4fd6f47..51108885c 100644 --- a/apps/emqx/src/variform/emqx_variform.erl +++ b/apps/emqx/src/variform/emqx_variform.erl @@ -14,17 +14,46 @@ %% limitations under the License. %%-------------------------------------------------------------------- -%% Predefined functions for templating +%% @doc This module provides a single-line expression string rendering engine. +%% A predefined set of functions are allowed to be called in the expressions. +%% Only simple string expressions are supported, and no control flow is allowed. +%% However, with the help from the functions, some control flow can be achieved. +%% For example, the `coalesce` function can be used to provide a default value, +%% or used to choose the first non-empty value from a list of variables. -module(emqx_variform). --export([render/2]). +-export([inject_allowed_modules/1]). +-export([render/2, render/3]). -render(Expression, Context) -> +%% @doc Render a variform expression with bindings. +%% A variform expression is a template string which supports variable substitution +%% and function calls. +%% +%% The function calls are in the form of `module.function(arg1, arg2, ...)` where `module` +%% is optional, and if not provided, the function is assumed to be in the `emqx_variform_str` module. +%% Both module and function must be existing atoms, and only whitelisted functions are allowed. +%% +%% A function arg can be a constant string or a number. +%% Strings can be quoted with single quotes or double quotes, without support of escape characters. +%% If some special characters are needed, the function `unescape' can be used convert a escaped string +%% to raw bytes. +%% For example, to get the first line of a multi-line string, the expression can be +%% `coalesce(tokens(variable_name, unescape("\n")))'. +%% +%% The bindings is a map of variables to their values. +%% +%% For unresolved variables, empty string (but not "undefined") is used. +%% In case of runtime exeption, an error is returned. +-spec render(string(), map()) -> {ok, binary()} | {error, term()}. +render(Expression, Bindings) -> + render(Expression, Bindings, #{}). + +render(Expression, Bindings, Opts) -> case emqx_variform_scan:string(Expression) of {ok, Tokens, _Line} -> case emqx_variform_parser:parse(Tokens) of {ok, Expr} -> - eval(Expr, Context); + eval_as_string(Expr, Bindings, Opts); {error, {_, emqx_variform_parser, Msg}} -> %% syntax error {error, lists:flatten(Msg)}; @@ -35,5 +64,122 @@ render(Expression, Context) -> {error, Reason} end. -eval(Expr, _Context) -> - io:format(user, "~p~n", [Expr]). +eval_as_string(Expr, Bindings, _Opts) -> + try + {ok, iolist_to_binary(eval(Expr, Bindings))} + catch + throw:Reason -> + {error, Reason}; + C:E:S -> + {error, #{exception => C, reason => E, stack_trace => S}} + end. + +eval({str, Str}, _Bindings) -> + str(Str); +eval({num, Num}, _Bindings) -> + str(Num); +eval({call, FuncNameStr, Args}, Bindings) -> + {Mod, Fun} = resolve_func_name(FuncNameStr), + ok = assert_func_exported(Mod, Fun, length(Args)), + call(Mod, Fun, eval(Args, Bindings)); +eval({var, VarName}, Bindings) -> + resolve_var_value(VarName, Bindings); +eval([Arg | Args], Bindings) -> + [eval(Arg, Bindings) | eval(Args, Bindings)]; +eval([], _Bindings) -> + []. + +%% Some functions accept arbitrary number of arguments but implemented as /1. +call(emqx_variform_str, concat, Args) -> + str(emqx_variform_str:concat(Args)); +call(emqx_variform_str, coalesce, Args) -> + str(emqx_variform_str:coalesce(Args)); +call(Mod, Fun, Args) -> + str(erlang:apply(Mod, Fun, Args)). + +resolve_func_name(FuncNameStr) -> + case string:tokens(FuncNameStr, ".") of + [Mod0, Fun0] -> + Mod = + try + list_to_existing_atom(Mod0) + catch + error:badarg -> + throw(#{unknown_module => Mod0}) + end, + ok = assert_module_allowed(Mod), + Fun = + try + list_to_existing_atom(Fun0) + catch + error:badarg -> + throw(#{unknown_function => Fun0}) + end, + {Mod, Fun}; + [Fun] -> + FuncName = + try + list_to_existing_atom(Fun) + catch + error:badarg -> + throw(#{ + reason => "unknown_variform_function", + function => Fun + }) + end, + {emqx_variform_str, FuncName} + end. + +resolve_var_value(VarName, Bindings) -> + case emqx_template:lookup_var(split(VarName), Bindings) of + {ok, Value} -> + str(Value); + {error, _Reason} -> + <<>> + end. + +assert_func_exported(emqx_variform_str, concat, _Arity) -> + ok; +assert_func_exported(emqx_variform_str, coalesce, _Arity) -> + ok; +assert_func_exported(Mod, Fun, Arity) -> + _ = Mod:module_info(md5), + case erlang:function_exported(Mod, Fun, Arity) of + true -> + ok; + false -> + throw(#{ + reason => "unknown_variform_function", + module => Mod, + function => Fun, + arity => Arity + }) + end. + +assert_module_allowed(emqx_variform_str) -> + ok; +assert_module_allowed(Mod) -> + Allowed = get_allowed_modules(), + case lists:member(Mod, Allowed) of + true -> + ok; + false -> + throw(#{ + reason => "unallowed_veriform_module", + module => Mod + }) + end. + +inject_allowed_modules(Modules) -> + Allowed0 = get_allowed_modules(), + Allowed = lists:usort(Allowed0 ++ Modules), + persistent_term:put({emqx_variform, allowed_modules}, Allowed). + +get_allowed_modules() -> + persistent_term:get({emqx_variform, allowed_modules}, []). + +str(Value) -> + emqx_utils_conv:bin(Value). + +split(VarName) -> + lists:map(fun erlang:iolist_to_binary/1, string:tokens(VarName, ".")). diff --git a/apps/emqx/src/variform/emqx_variform_str.erl b/apps/emqx/src/variform/emqx_variform_str.erl index d94519f76..7b8e2e742 100644 --- a/apps/emqx/src/variform/emqx_variform_str.erl +++ b/apps/emqx/src/variform/emqx_variform_str.erl @@ -64,10 +64,10 @@ %% @doc Return the first non-empty string coalesce(A, B) when ?IS_EMPTY(A) andalso ?IS_EMPTY(B) -> <<>>; -coalesce(A, _) when is_binary(A) -> - A; -coalesce(_, B) -> - B. +coalesce(A, B) when ?IS_EMPTY(A) -> + B; +coalesce(A, _B) -> + A. %% @doc Return the first non-empty string coalesce([]) -> @@ -140,7 +140,7 @@ tokens(S, Separators, <<"nocrlf">>) -> %% implicit convert args to strings, and then do concatenation concat(S1, S2) -> - concat([S1, S2], unicode). + concat([S1, S2]). concat(List) -> unicode:characters_to_binary(lists:map(fun str/1, List), unicode). diff --git a/apps/emqx_rule_engine/src/emqx_rule_funcs.erl b/apps/emqx_rule_engine/src/emqx_rule_funcs.erl index ea8e192d4..6a719c3f1 100644 --- a/apps/emqx_rule_engine/src/emqx_rule_funcs.erl +++ b/apps/emqx_rule_engine/src/emqx_rule_funcs.erl @@ -145,6 +145,7 @@ upper/1, split/2, split/3, + concat/1, concat/2, tokens/2, tokens/3, From bfca3ebc71a81583cc1cf7df795e284ff66dfc79 Mon Sep 17 00:00:00 2001 From: zmstone Date: Thu, 28 Mar 2024 19:30:23 +0100 Subject: [PATCH 019/234] feat(variform): support array syntax '[' and ']' --- apps/emqx/src/variform/emqx_variform.erl | 2 + .../src/variform/emqx_variform_parser.yrl | 42 ++++++++++++++----- apps/emqx/src/variform/emqx_variform_scan.xrl | 4 ++ 3 files changed, 38 insertions(+), 10 deletions(-) diff --git a/apps/emqx/src/variform/emqx_variform.erl b/apps/emqx/src/variform/emqx_variform.erl index 51108885c..95ea1e1ce 100644 --- a/apps/emqx/src/variform/emqx_variform.erl +++ b/apps/emqx/src/variform/emqx_variform.erl @@ -78,6 +78,8 @@ eval({str, Str}, _Bindings) -> str(Str); eval({num, Num}, _Bindings) -> str(Num); +eval({array, Args}, Bindings) -> + eval(Args, Bindings); eval({call, FuncNameStr, Args}, Bindings) -> {Mod, Fun} = resolve_func_name(FuncNameStr), ok = assert_func_exported(Mod, Fun, length(Args)), diff --git a/apps/emqx/src/variform/emqx_variform_parser.yrl b/apps/emqx/src/variform/emqx_variform_parser.yrl index 3f2b739ba..8a8a03a4d 100644 --- a/apps/emqx/src/variform/emqx_variform_parser.yrl +++ b/apps/emqx/src/variform/emqx_variform_parser.yrl @@ -1,16 +1,38 @@ -Nonterminals expr call_or_var args. -Terminals identifier number string '(' ')' ','. +Nonterminals + expr + call_or_var + array + args + arg. -Rootsymbol expr. +Terminals + identifier + number + string + '(' ')' + ',' '[' ']'. + +Rootsymbol + expr. %% Grammar Rules -expr -> call_or_var: '$1'. -call_or_var -> identifier '(' args ')' : {call, element(3,'$1'), '$3'}. +%% Root expression: function call or variable +expr -> call_or_var : '$1'. + +%% Function call or variable +call_or_var -> identifier '(' args ')' : {call, element(3, '$1'), '$3'}. call_or_var -> identifier : {var, element(3, '$1')}. -args -> expr : ['$1']. -args -> args ',' expr : '$1' ++ ['$3']. -%% Handling direct values and variables within arguments -expr -> number : {num, element(3, '$1')}. -expr -> string : {str, element(3, '$1')}. +%% Array is like a arg list, but with square brackets +array -> '[' args ']' : {array, '$2'}. + +%% Argument handling +args -> arg : ['$1']. +args -> args ',' arg : '$1' ++ ['$3']. + +%% Arguments can be expressions, arrays, numbers, or strings +arg -> expr : '$1'. +arg -> array : '$1'. +arg -> number : {num, element(3, '$1')}. +arg -> string : {str, element(3, '$1')}. diff --git a/apps/emqx/src/variform/emqx_variform_scan.xrl b/apps/emqx/src/variform/emqx_variform_scan.xrl index 53657bad4..29a45ef92 100644 --- a/apps/emqx/src/variform/emqx_variform_scan.xrl +++ b/apps/emqx/src/variform/emqx_variform_scan.xrl @@ -6,6 +6,8 @@ DQ_STRING = \"[^\"]*\" NUMBER = [+-]?(\\d+\\.\\d+|[0-9]+) LPAREN = \( RPAREN = \) +LBRACKET = \[ +RBRACKET = \] COMMA = , WHITESPACE = [\s\t\n]+ @@ -18,6 +20,8 @@ Rules. {NUMBER} : {token, {number, TokenLine, TokenChars}}. {LPAREN} : {token, {'(', TokenLine}}. {RPAREN} : {token, {')', TokenLine}}. +{LBRACKET} : {token, {'[', TokenLine}}. +{RBRACKET} : {token, {']', TokenLine}}. {COMMA} : {token, {',', TokenLine}}. Erlang code. From 6e0be5ad35b915f03db48c2cbc0c7e19e03e9801 Mon Sep 17 00:00:00 2001 From: Ivan Dyachkov Date: Thu, 28 Mar 2024 18:19:47 +0100 Subject: [PATCH 020/234] ci(release): bump emqx/upload-assets to 0.5.2 this version adds an option to skip uploading existing assets --- .github/workflows/release.yaml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml index 1bed80376..0380b630b 100644 --- a/.github/workflows/release.yaml +++ b/.github/workflows/release.yaml @@ -67,12 +67,13 @@ jobs: BUCKET=${{ secrets.AWS_S3_BUCKET }} OUTPUT_DIR=${{ steps.profile.outputs.s3dir }} aws s3 cp --recursive s3://$BUCKET/$OUTPUT_DIR/${{ env.ref_name }} packages - - uses: emqx/upload-assets@8d2083b4dbe3151b0b735572eaa153b6acb647fe # 0.5.0 + - uses: emqx/upload-assets@974befcf0e72a1811360a81c798855efb66b0551 # 0.5.2 env: GITHUB_TOKEN: ${{ github.token }} with: asset_paths: '["packages/*"]' tag_name: "${{ env.ref_name }}" + skip_existing: true - name: update to emqx.io if: startsWith(env.ref_name, 'v') && ((github.event_name == 'release' && !github.event.release.prerelease) || inputs.publish_release_artefacts) run: | From f5a820cb102c9d9ae60a3d340960a053f0522cca Mon Sep 17 00:00:00 2001 From: Serge Tupchii Date: Fri, 29 Mar 2024 13:09:08 +0200 Subject: [PATCH 021/234] fix(emqx_mgmt): catch OOM shutdown exits properly when calling a client conn process The exit reason is expected to include gen_server `Location`: `{{shutdown, OOMInfo}, Location}`. --- apps/emqx_management/src/emqx_mgmt.erl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/apps/emqx_management/src/emqx_mgmt.erl b/apps/emqx_management/src/emqx_mgmt.erl index 35908d3bd..df0450395 100644 --- a/apps/emqx_management/src/emqx_mgmt.erl +++ b/apps/emqx_management/src/emqx_mgmt.erl @@ -711,5 +711,7 @@ call_conn(ConnMod, Pid, Req) -> exit:R when R =:= shutdown; R =:= normal -> {error, shutdown}; exit:{R, _} when R =:= shutdown; R =:= noproc -> + {error, shutdown}; + exit:{{shutdown, _OOMInfo}, _Location} -> {error, shutdown} end. From 42af1f9d634b1408f537c233a7d1fe9801e7950b Mon Sep 17 00:00:00 2001 From: Serge Tupchii Date: Fri, 29 Mar 2024 13:29:19 +0200 Subject: [PATCH 022/234] fix: handle internal timeout errors in client Mqueue/Inflight APIs --- apps/emqx_management/src/emqx_mgmt.erl | 14 +++++++++++++- apps/emqx_management/src/emqx_mgmt_api_clients.erl | 2 ++ 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/apps/emqx_management/src/emqx_mgmt.erl b/apps/emqx_management/src/emqx_mgmt.erl index df0450395..bc194f03e 100644 --- a/apps/emqx_management/src/emqx_mgmt.erl +++ b/apps/emqx_management/src/emqx_mgmt.erl @@ -713,5 +713,17 @@ call_conn(ConnMod, Pid, Req) -> exit:{R, _} when R =:= shutdown; R =:= noproc -> {error, shutdown}; exit:{{shutdown, _OOMInfo}, _Location} -> - {error, shutdown} + {error, shutdown}; + exit:timeout -> + ?SLOG( + warning, + #{ + msg => "call_client_connection_process_timeout", + request => Req, + pid => Pid, + module => ConnMod, + stacktrace => erlang:process_info(Pid, current_stacktrace) + } + ), + {error, timeout} end. diff --git a/apps/emqx_management/src/emqx_mgmt_api_clients.erl b/apps/emqx_management/src/emqx_mgmt_api_clients.erl index dd65c1245..262faf87f 100644 --- a/apps/emqx_management/src/emqx_mgmt_api_clients.erl +++ b/apps/emqx_management/src/emqx_mgmt_api_clients.erl @@ -1232,6 +1232,8 @@ list_client_msgs(MsgType, ClientID, QString) -> code => 'NOT_IMPLEMENTED', message => <<"API not implemented for persistent sessions">> }}; + {error, Reason} -> + ?INTERNAL_ERROR(Reason); {Msgs, Meta = #{}} when is_list(Msgs) -> format_msgs_resp(MsgType, Msgs, Meta, QString) end From 6cdf876684932d81f8bd063c85a175b5d9cc455c Mon Sep 17 00:00:00 2001 From: Serge Tupchii Date: Fri, 29 Mar 2024 13:39:36 +0200 Subject: [PATCH 023/234] chore: add changelog --- changes/ce/fix-12814.en.md | 4 ++++ 1 file changed, 4 insertions(+) create mode 100644 changes/ce/fix-12814.en.md diff --git a/changes/ce/fix-12814.en.md b/changes/ce/fix-12814.en.md new file mode 100644 index 000000000..f84025561 --- /dev/null +++ b/changes/ce/fix-12814.en.md @@ -0,0 +1,4 @@ +Handle several errors in `/clients/{clientid}/mqueue_messages` and `/clients/{clientid}/inflight_messages` APIs: + +- Internal timeout, which means that EMQX failed to get the list of Inflight/Mqueue messages within the default timeout of 5 s. This error may occur when the system is under a heavy load. The API will return 500 `{"code":"INTERNAL_ERROR","message":"timeout"}` response and log additional details. +- Client shutdown. The error may occur if the client connection is shutdown during the API call. The API will return 404 `{"code": "CLIENT_SHUTDOWN", "message": "Client connection has been shutdown"}` response in this case. From ceb04ba06da74061f10fce697027884549a3925b Mon Sep 17 00:00:00 2001 From: Serge Tupchii Date: Mon, 1 Apr 2024 16:42:12 +0300 Subject: [PATCH 024/234] fix(emqx_mgmt): do not attempt to get a stacktrace of a remote client connection process --- apps/emqx_management/src/emqx_mgmt.erl | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/apps/emqx_management/src/emqx_mgmt.erl b/apps/emqx_management/src/emqx_mgmt.erl index bc194f03e..0cde965f9 100644 --- a/apps/emqx_management/src/emqx_mgmt.erl +++ b/apps/emqx_management/src/emqx_mgmt.erl @@ -715,15 +715,20 @@ call_conn(ConnMod, Pid, Req) -> exit:{{shutdown, _OOMInfo}, _Location} -> {error, shutdown}; exit:timeout -> - ?SLOG( - warning, - #{ - msg => "call_client_connection_process_timeout", - request => Req, - pid => Pid, - module => ConnMod, - stacktrace => erlang:process_info(Pid, current_stacktrace) - } - ), + LogData = #{ + msg => "call_client_connection_process_timeout", + request => Req, + pid => Pid, + module => ConnMod + }, + LogData1 = + case node(Pid) =:= node() of + true -> + LogData#{stacktrace => erlang:process_info(Pid, current_stacktrace)}; + false -> + LogData + end, + + ?SLOG(warning, LogData1), {error, timeout} end. From c62410ff75285c667bd45053a0a8a7f05c36164a Mon Sep 17 00:00:00 2001 From: Serge Tupchii Date: Mon, 1 Apr 2024 17:03:50 +0300 Subject: [PATCH 025/234] refactor: remove already bound variable --- apps/emqx_bridge_mqtt/src/emqx_bridge_mqtt_connector.erl | 1 - 1 file changed, 1 deletion(-) diff --git a/apps/emqx_bridge_mqtt/src/emqx_bridge_mqtt_connector.erl b/apps/emqx_bridge_mqtt/src/emqx_bridge_mqtt_connector.erl index 0b8f71a7f..900f6143f 100644 --- a/apps/emqx_bridge_mqtt/src/emqx_bridge_mqtt_connector.erl +++ b/apps/emqx_bridge_mqtt/src/emqx_bridge_mqtt_connector.erl @@ -450,7 +450,6 @@ connect(Options) -> options => emqx_utils:redact(Options) }), Name = proplists:get_value(name, Options), - WorkerId = proplists:get_value(ecpool_worker_id, Options), ClientOpts = proplists:get_value(client_opts, Options), case emqtt:start_link(mk_client_opts(Name, WorkerId, ClientOpts)) of {ok, Pid} -> From bade09b56e35c80509ef04d605ac5cf8f0947566 Mon Sep 17 00:00:00 2001 From: Thales Macedo Garitezi Date: Thu, 28 Mar 2024 18:28:54 -0300 Subject: [PATCH 026/234] feat(resource manager): perform non-blocking resource health checks Fixes https://emqx.atlassian.net/browse/EMQX-12015 This introduces only _resource_ non-blocking health checks. _Channel_ non-blocking health checks may be introduced later. --- .../test/emqx_bridge_api_SUITE.erl | 43 +++- apps/emqx_resource/src/emqx_resource.app.src | 2 +- .../src/emqx_resource_manager.erl | 235 ++++++++++++------ .../test/emqx_connector_demo.erl | 16 +- .../test/emqx_resource_SUITE.erl | 108 +++++--- apps/emqx_utils/src/emqx_utils_redact.erl | 15 +- changes/ce/fix-12812.en.md | 1 + 7 files changed, 291 insertions(+), 129 deletions(-) create mode 100644 changes/ce/fix-12812.en.md diff --git a/apps/emqx_bridge/test/emqx_bridge_api_SUITE.erl b/apps/emqx_bridge/test/emqx_bridge_api_SUITE.erl index 30b6c8b34..1971ad697 100644 --- a/apps/emqx_bridge/test/emqx_bridge_api_SUITE.erl +++ b/apps/emqx_bridge/test/emqx_bridge_api_SUITE.erl @@ -825,22 +825,47 @@ do_start_stop_bridges(Type, Config) -> %% Connecting to this endpoint should always timeout BadServer = iolist_to_binary(io_lib:format("localhost:~B", [ListenPort])), BadName = <<"bad_", (atom_to_binary(Type))/binary>>, + CreateRes0 = request_json( + post, + uri(["bridges"]), + ?MQTT_BRIDGE(BadServer, BadName), + Config + ), ?assertMatch( {ok, 201, #{ <<"type">> := ?BRIDGE_TYPE_MQTT, <<"name">> := BadName, <<"enable">> := true, - <<"server">> := BadServer, - <<"status">> := <<"connecting">>, - <<"node_status">> := [_ | _] + <<"server">> := BadServer }}, - request_json( - post, - uri(["bridges"]), - ?MQTT_BRIDGE(BadServer, BadName), - Config - ) + CreateRes0 ), + {ok, 201, CreateRes1} = CreateRes0, + case CreateRes1 of + #{ + <<"node_status">> := [ + #{ + <<"status">> := <<"disconnected">>, + <<"status_reason">> := <<"connack_timeout">> + }, + #{<<"status">> := <<"connecting">>} + | _ + ], + %% `inconsistent': one node is `?status_disconnected' (because it has already + %% timed out), the other node is `?status_connecting' (started later and + %% haven't timed out yet) + <<"status">> := <<"inconsistent">>, + <<"status_reason">> := <<"connack_timeout">> + } -> + ok; + #{ + <<"node_status">> := [_], + <<"status">> := <<"connecting">> + } -> + ok; + _ -> + error({unexpected_result, CreateRes1}) + end, BadBridgeID = emqx_bridge_resource:bridge_id(?BRIDGE_TYPE_MQTT, BadName), ?assertMatch( %% request from product: return 400 on such errors diff --git a/apps/emqx_resource/src/emqx_resource.app.src b/apps/emqx_resource/src/emqx_resource.app.src index 272dd4e08..913cc5e8c 100644 --- a/apps/emqx_resource/src/emqx_resource.app.src +++ b/apps/emqx_resource/src/emqx_resource.app.src @@ -1,7 +1,7 @@ %% -*- mode: erlang -*- {application, emqx_resource, [ {description, "Manager for all external resources"}, - {vsn, "0.1.28"}, + {vsn, "0.1.29"}, {registered, []}, {mod, {emqx_resource_app, []}}, {applications, [ diff --git a/apps/emqx_resource/src/emqx_resource_manager.erl b/apps/emqx_resource/src/emqx_resource_manager.erl index 60bc3c7e9..06123a935 100644 --- a/apps/emqx_resource/src/emqx_resource_manager.erl +++ b/apps/emqx_resource/src/emqx_resource_manager.erl @@ -60,6 +60,9 @@ % Behaviour -export([init/1, callback_mode/0, handle_event/4, terminate/3]). +%% Internal exports. +-export([worker_resource_health_check/1]). + % State record -record(data, { id, @@ -73,7 +76,15 @@ state, error, pid, - added_channels, + added_channels = #{}, + %% Reference to process performing resource health check. + hc_workers = #{resource => #{}, channel => #{}} :: #{ + resource | channel := #{{pid(), reference()} => true} + }, + %% Callers waiting on health check + hc_pending_callers = #{resource => [], channel => []} :: #{ + resource | channel := [gen_server:from()] + }, extra }). -type data() :: #data{}. @@ -153,13 +164,13 @@ create(ResId, Group, ResourceType, Config, Opts) -> case SpawnBufferWorkers andalso lists:member(QueryMode, [sync, async]) of true -> %% start resource workers as the query type requires them - ok = emqx_resource_buffer_worker_sup:start_workers(ResId, Opts), - case maps:get(start_after_created, Opts, ?START_AFTER_CREATED) of - true -> - wait_for_ready(ResId, maps:get(start_timeout, Opts, ?START_TIMEOUT)); - false -> - ok - end; + ok = emqx_resource_buffer_worker_sup:start_workers(ResId, Opts); + false -> + ok + end, + case maps:get(start_after_created, Opts, ?START_AFTER_CREATED) of + true -> + wait_for_ready(ResId, maps:get(start_timeout, Opts, ?START_TIMEOUT)); false -> ok end. @@ -455,7 +466,7 @@ handle_event({call, From}, {remove, ClearMetrics}, _State, Data) -> handle_event({call, From}, lookup, _State, #data{group = Group} = Data) -> Reply = {ok, Group, data_record_to_external_map(Data)}, {keep_state_and_data, [{reply, From, Reply}]}; -% Called when doing a manually health check. +% Called when doing a manual health check. handle_event({call, From}, health_check, ?state_stopped, _Data) -> Actions = [{reply, From, {error, resource_is_stopped}}], {keep_state_and_data, Actions}; @@ -463,9 +474,9 @@ handle_event({call, From}, {channel_health_check, _}, ?state_stopped, _Data) -> Actions = [{reply, From, {error, resource_is_stopped}}], {keep_state_and_data, Actions}; handle_event({call, From}, health_check, _State, Data) -> - handle_manually_health_check(From, Data); + handle_manual_resource_health_check(From, Data); handle_event({call, From}, {channel_health_check, ChannelId}, _State, Data) -> - handle_manually_channel_health_check(From, Data, ChannelId); + handle_manual_channel_health_check(From, Data, ChannelId); % State: CONNECTING handle_event(enter, _OldState, ?state_connecting = State, Data) -> ok = log_status_consistency(State, Data), @@ -473,7 +484,7 @@ handle_event(enter, _OldState, ?state_connecting = State, Data) -> handle_event(internal, start_resource, ?state_connecting, Data) -> start_resource(Data, undefined); handle_event(state_timeout, health_check, ?state_connecting, Data) -> - handle_connecting_health_check(Data); + start_resource_health_check(Data); handle_event( {call, From}, {remove_channel, ChannelId}, ?state_connecting = _State, Data ) -> @@ -487,7 +498,7 @@ handle_event(enter, _OldState, ?state_connected = State, Data) -> ?tp(resource_connected_enter, #{}), {keep_state_and_data, health_check_actions(Data)}; handle_event(state_timeout, health_check, ?state_connected, Data) -> - handle_connected_health_check(Data); + start_resource_health_check(Data); handle_event( {call, From}, {add_channel, ChannelId, Config}, ?state_connected = _State, Data ) -> @@ -523,6 +534,15 @@ handle_event( ) -> Channels = emqx_resource:call_get_channels(Data#data.id, Data#data.mod), {keep_state_and_data, {reply, From, {ok, Channels}}}; +handle_event( + info, + {'DOWN', Ref, process, Pid, Res}, + State0, + Data0 = #data{hc_workers = #{resource := HCWorkers}} +) when + is_map_key({Pid, Ref}, HCWorkers) +-> + handle_resource_health_check_worker_down(State0, Data0, {Pid, Ref}, Res); % Ignore all other events handle_event(EventType, EventData, State, Data) -> ?SLOG( @@ -835,18 +855,127 @@ handle_not_connected_and_not_connecting_remove_channel(From, ChannelId, Data) -> _ = maybe_clear_alarm(ChannelId), {keep_state, update_state(NewData, Data), [{reply, From, ok}]}. -handle_manually_health_check(From, Data) -> - with_health_check( - Data, - fun(Status, UpdatedData) -> - Actions = [{reply, From, {ok, Status}}], - {next_state, Status, channels_health_check(Status, UpdatedData), Actions} - end - ). +handle_manual_resource_health_check(From, Data0 = #data{hc_workers = #{resource := HCWorkers}}) when + map_size(HCWorkers) > 0 +-> + %% ongoing health check + #data{hc_pending_callers = Pending0 = #{resource := RPending0}} = Data0, + Pending = Pending0#{resource := [From | RPending0]}, + Data = Data0#data{hc_pending_callers = Pending}, + {keep_state, Data}; +handle_manual_resource_health_check(From, Data0) -> + #data{hc_pending_callers = Pending0 = #{resource := RPending0}} = Data0, + Pending = Pending0#{resource := [From | RPending0]}, + Data = Data0#data{hc_pending_callers = Pending}, + start_resource_health_check(Data). -handle_manually_channel_health_check(From, #data{state = undefined}, _ChannelId) -> +reply_pending_health_check_callers(Status, resource, Data0 = #data{hc_pending_callers = Pending0}) -> + #{resource := RPending} = Pending0, + Actions = [{reply, From, {ok, Status}} || From <- RPending], + Data = Data0#data{hc_pending_callers = Pending0#{resource := []}}, + {Actions, Data}. + +start_resource_health_check(#data{state = undefined} = Data) -> + %% No resource running, thus disconnected. + %% A health check spawn when state is undefined can only happen when someone manually + %% asks for a health check and the resource could not initialize or has not had enough + %% time to do so. Let's assume the continuation is as if we were `?status_connecting'. + continue_resource_health_check_not_connected(?status_disconnected, Data); +start_resource_health_check(#data{hc_workers = #{resource := HCWorkers}}) when + map_size(HCWorkers) > 0 +-> + %% Already ongoing + keep_state_and_data; +start_resource_health_check(#data{} = Data0) -> + #data{hc_workers = HCWorkers0 = #{resource := RHCWorkers0}} = Data0, + WorkerRef = {_Pid, _Ref} = spawn_health_check_worker(Data0), + HCWorkers = HCWorkers0#{resource := RHCWorkers0#{WorkerRef => true}}, + Data = Data0#data{hc_workers = HCWorkers}, + {keep_state, Data}. + +-spec spawn_health_check_worker(data()) -> {pid(), reference()}. +spawn_health_check_worker(#data{} = Data) -> + spawn_monitor(?MODULE, worker_resource_health_check, [Data]). + +%% separated so it can be spec'ed and placate dialyzer tantrums... +-spec worker_resource_health_check(data()) -> no_return(). +worker_resource_health_check(Data) -> + HCRes = emqx_resource:call_health_check(Data#data.id, Data#data.mod, Data#data.state), + exit({ok, HCRes}). + +handle_resource_health_check_worker_down(CurrentState, Data0, WorkerRef, ExitResult) -> + #data{hc_workers = HCWorkers0 = #{resource := RHCWorkers0}} = Data0, + HCWorkers = HCWorkers0#{resource := maps:remove(WorkerRef, RHCWorkers0)}, + Data1 = Data0#data{hc_workers = HCWorkers}, + case ExitResult of + {ok, HCRes} -> + continue_with_health_check(Data1, CurrentState, HCRes); + _ -> + %% Unexpected: `emqx_resource:call_health_check' catches all exceptions. + continue_with_health_check(Data1, CurrentState, {error, ExitResult}) + end. + +continue_with_health_check(#data{} = Data0, CurrentState, HCRes) -> + #data{ + id = ResId, + error = PrevError + } = Data0, + {NewStatus, NewState, Err} = parse_health_check_result(HCRes, Data0), + _ = maybe_alarm(NewStatus, ResId, Err, PrevError), + ok = maybe_resume_resource_workers(ResId, NewStatus), + Data1 = Data0#data{ + state = NewState, status = NewStatus, error = Err + }, + Data = update_state(Data1, Data0), + case CurrentState of + ?state_connected -> + continue_resource_health_check_connected(NewStatus, Data); + _ -> + %% `?state_connecting' | `?state_disconnected' | `?state_stopped' + continue_resource_health_check_not_connected(NewStatus, Data) + end. + +%% Continuation to be used when the current resource state is `?state_connected'. +continue_resource_health_check_connected(NewStatus, Data0) -> + case NewStatus of + ?status_connected -> + {Replies, Data1} = reply_pending_health_check_callers(NewStatus, resource, Data0), + Data2 = channels_health_check(?status_connected, Data1), + Data = update_state(Data2, Data0), + Actions = Replies ++ health_check_actions(Data), + {keep_state, Data, Actions}; + _ -> + ?SLOG(warning, #{ + msg => "health_check_failed", + id => Data0#data.id, + status => NewStatus + }), + %% Note: works because, coincidentally, channel/resource status is a + %% subset of resource manager state... But there should be a conversion + %% between the two here, as resource manager also has `stopped', which is + %% not a valid status at the time of writing. + {Replies, Data} = reply_pending_health_check_callers(NewStatus, resource, Data0), + {next_state, NewStatus, channels_health_check(NewStatus, Data), Replies} + end. + +%% Continuation to be used when the current resource state is not `?state_connected'. +continue_resource_health_check_not_connected(NewStatus, Data0) -> + {Replies, Data} = reply_pending_health_check_callers(NewStatus, resource, Data0), + case NewStatus of + ?status_connected -> + {next_state, ?state_connected, channels_health_check(?status_connected, Data), Replies}; + ?status_connecting -> + Actions = Replies ++ health_check_actions(Data), + {next_state, ?status_connecting, channels_health_check(?status_connecting, Data), + Actions}; + ?status_disconnected -> + {next_state, ?state_disconnected, channels_health_check(?status_disconnected, Data), + Replies} + end. + +handle_manual_channel_health_check(From, #data{state = undefined}, _ChannelId) -> {keep_state_and_data, [{reply, From, channel_status({error, resource_disconnected})}]}; -handle_manually_channel_health_check( +handle_manual_channel_health_check( From, #data{added_channels = Channels} = _Data, ChannelId @@ -854,7 +983,7 @@ handle_manually_channel_health_check( is_map_key(ChannelId, Channels) -> {keep_state_and_data, [{reply, From, maps:get(ChannelId, Channels)}]}; -handle_manually_channel_health_check( +handle_manual_channel_health_check( From, _Data, _ChannelId @@ -865,56 +994,6 @@ get_channel_status_channel_added(#data{id = ResId, mod = Mod, state = State}, Ch RawStatus = emqx_resource:call_channel_health_check(ResId, ChannelId, Mod, State), channel_status(RawStatus). -handle_connecting_health_check(Data) -> - with_health_check( - Data, - fun - (?status_connected, UpdatedData) -> - {next_state, ?state_connected, - channels_health_check(?status_connected, UpdatedData)}; - (?status_connecting, UpdatedData) -> - {keep_state, channels_health_check(?status_connecting, UpdatedData), - health_check_actions(UpdatedData)}; - (?status_disconnected, UpdatedData) -> - {next_state, ?state_disconnected, - channels_health_check(?status_disconnected, UpdatedData)} - end - ). - -handle_connected_health_check(Data) -> - with_health_check( - Data, - fun - (?status_connected, UpdatedData0) -> - UpdatedData1 = channels_health_check(?status_connected, UpdatedData0), - {keep_state, UpdatedData1, health_check_actions(UpdatedData1)}; - (Status, UpdatedData) -> - ?SLOG(warning, #{ - msg => "health_check_failed", - id => Data#data.id, - status => Status - }), - %% Note: works because, coincidentally, channel/resource status is a - %% subset of resource manager state... But there should be a conversion - %% between the two here, as resource manager also has `stopped', which is - %% not a valid status at the time of writing. - {next_state, Status, channels_health_check(Status, UpdatedData)} - end - ). - -with_health_check(#data{state = undefined} = Data, Func) -> - Func(disconnected, Data); -with_health_check(#data{error = PrevError} = Data, Func) -> - ResId = Data#data.id, - HCRes = emqx_resource:call_health_check(Data#data.id, Data#data.mod, Data#data.state), - {Status, NewState, Err} = parse_health_check_result(HCRes, Data), - _ = maybe_alarm(Status, ResId, Err, PrevError), - ok = maybe_resume_resource_workers(ResId, Status), - UpdatedData = Data#data{ - state = NewState, status = Status, error = Err - }, - Func(Status, update_state(UpdatedData, Data)). - -spec channels_health_check(resource_status(), data()) -> data(). channels_health_check(?status_connected = _ConnectorStatus, Data0) -> Channels = maps:to_list(Data0#data.added_channels), @@ -1097,9 +1176,15 @@ update_state(Data) -> update_state(DataWas, DataWas) -> DataWas; update_state(Data, _DataWas) -> - _ = insert_cache(Data#data.id, Data), + _ = insert_cache(Data#data.id, remove_runtime_data(Data)), Data. +remove_runtime_data(#data{} = Data0) -> + Data0#data{ + hc_workers = #{resource => #{}, channel => #{}}, + hc_pending_callers = #{resource => [], channel => []} + }. + health_check_interval(Opts) -> maps:get(health_check_interval, Opts, ?HEALTHCHECK_INTERVAL). diff --git a/apps/emqx_resource/test/emqx_connector_demo.erl b/apps/emqx_resource/test/emqx_connector_demo.erl index 93f6b661b..d1ac5c2e6 100644 --- a/apps/emqx_resource/test/emqx_connector_demo.erl +++ b/apps/emqx_resource/test/emqx_connector_demo.erl @@ -18,6 +18,7 @@ -include_lib("typerefl/include/types.hrl"). -include_lib("snabbkaffe/include/snabbkaffe.hrl"). +-include_lib("emqx_resource/include/emqx_resource.hrl"). -behaviour(emqx_resource). @@ -276,15 +277,22 @@ batch_individual_reply({async, ReplyFunAndArgs}, InstId, Batch, State) -> on_get_status(_InstId, #{health_check_error := true}) -> ?tp(connector_demo_health_check_error, #{}), - disconnected; + ?status_disconnected; on_get_status(_InstId, State = #{health_check_error := {msg, Message}}) -> ?tp(connector_demo_health_check_error, #{}), - {disconnected, State, Message}; + {?status_disconnected, State, Message}; +on_get_status(_InstId, #{pid := Pid, health_check_error := {delay, Delay}}) -> + ?tp(connector_demo_health_check_delay, #{}), + timer:sleep(Delay), + case is_process_alive(Pid) of + true -> ?status_connected; + false -> ?status_disconnected + end; on_get_status(_InstId, #{pid := Pid}) -> timer:sleep(300), case is_process_alive(Pid) of - true -> connected; - false -> disconnected + true -> ?status_connected; + false -> ?status_disconnected end. spawn_counter_process(Name, Register) -> diff --git a/apps/emqx_resource/test/emqx_resource_SUITE.erl b/apps/emqx_resource/test/emqx_resource_SUITE.erl index fa9f7e7c9..a6cdaedb2 100644 --- a/apps/emqx_resource/test/emqx_resource_SUITE.erl +++ b/apps/emqx_resource/test/emqx_resource_SUITE.erl @@ -52,12 +52,20 @@ end_per_testcase(_, _Config) -> init_per_suite(Config) -> code:ensure_loaded(?TEST_RESOURCE), - ok = emqx_common_test_helpers:start_apps([emqx_conf]), - {ok, _} = application:ensure_all_started(emqx_resource), - Config. + Apps = emqx_cth_suite:start( + [ + emqx, + emqx_conf, + emqx_resource + ], + #{work_dir => emqx_cth_suite:work_dir(Config)} + ), + [{apps, Apps} | Config]. -end_per_suite(_Config) -> - ok = emqx_common_test_helpers:stop_apps([emqx_resource, emqx_conf]). +end_per_suite(Config) -> + Apps = proplists:get_value(apps, Config), + emqx_cth_suite:stop(Apps), + ok. %%------------------------------------------------------------------------------ %% Tests @@ -115,10 +123,7 @@ t_create_remove(_) -> ?assertNot(is_process_alive(Pid)) end, - fun(Trace) -> - ?assertEqual([], ?of_kind("inconsistent_status", Trace)), - ?assertEqual([], ?of_kind("inconsistent_cache", Trace)) - end + [log_consistency_prop()] ). t_create_remove_local(_) -> @@ -174,10 +179,7 @@ t_create_remove_local(_) -> ?assertNot(is_process_alive(Pid)) end, - fun(Trace) -> - ?assertEqual([], ?of_kind("inconsistent_status", Trace)), - ?assertEqual([], ?of_kind("inconsistent_cache", Trace)) - end + [log_consistency_prop()] ). t_do_not_start_after_created(_) -> @@ -219,10 +221,7 @@ t_do_not_start_after_created(_) -> ?assertNot(is_process_alive(Pid2)) end, - fun(Trace) -> - ?assertEqual([], ?of_kind("inconsistent_status", Trace)), - ?assertEqual([], ?of_kind("inconsistent_cache", Trace)) - end + [log_consistency_prop()] ). t_query(_) -> @@ -855,14 +854,12 @@ t_healthy_timeout(_) -> ), ?assertEqual(ok, emqx_resource:remove_local(?ID)) end, - fun(Trace) -> - ?assertEqual([], ?of_kind("inconsistent_status", Trace)), - ?assertEqual([], ?of_kind("inconsistent_cache", Trace)) - end + [log_consistency_prop()] ). t_healthy(_) -> ?check_trace( + #{timetrap => 10_000}, begin ?assertMatch( {ok, _}, @@ -873,10 +870,13 @@ t_healthy(_) -> #{name => test_resource} ) ), + ct:pal("getting state"), {ok, #{pid := Pid}} = emqx_resource:query(?ID, get_state), timer:sleep(300), + ct:pal("setting state as `connecting`"), emqx_resource:set_resource_status_connecting(?ID), + ct:pal("health check"), ?assertEqual({ok, connected}, emqx_resource:health_check(?ID)), ?assertMatch( [#{status := connected}], @@ -894,10 +894,7 @@ t_healthy(_) -> ?assertEqual(ok, emqx_resource:remove_local(?ID)) end, - fun(Trace) -> - ?assertEqual([], ?of_kind("inconsistent_status", Trace)), - ?assertEqual([], ?of_kind("inconsistent_cache", Trace)) - end + [log_consistency_prop()] ). t_unhealthy_target(_) -> @@ -1005,11 +1002,7 @@ t_stop_start(_) -> ?assertEqual(ok, emqx_resource:stop(?ID)), ?assertEqual(0, emqx_resource_metrics:inflight_get(?ID)) end, - - fun(Trace) -> - ?assertEqual([], ?of_kind("inconsistent_status", Trace)), - ?assertEqual([], ?of_kind("inconsistent_cache", Trace)) - end + [log_consistency_prop()] ). t_stop_start_local(_) -> @@ -1064,10 +1057,7 @@ t_stop_start_local(_) -> ?assert(is_process_alive(Pid1)) end, - fun(Trace) -> - ?assertEqual([], ?of_kind("inconsistent_status", Trace)), - ?assertEqual([], ?of_kind("inconsistent_cache", Trace)) - end + [log_consistency_prop()] ). t_list_filter(_) -> @@ -1269,10 +1259,7 @@ t_health_check_disconnected(_) -> emqx_resource:health_check(?ID) ) end, - fun(Trace) -> - ?assertEqual([], ?of_kind("inconsistent_status", Trace)), - ?assertEqual([], ?of_kind("inconsistent_cache", Trace)) - end + [log_consistency_prop()] ). t_unblock_only_required_buffer_workers(_) -> @@ -3116,6 +3103,44 @@ t_telemetry_handler_crash(_Config) -> ), ok. +t_non_blocking_resource_health_check(_Config) -> + ?check_trace( + begin + {ok, _} = + create( + ?ID, + ?DEFAULT_RESOURCE_GROUP, + ?TEST_RESOURCE, + #{name => test_resource, health_check_error => {delay, 1_000}}, + #{health_check_interval => 100} + ), + %% concurrently attempt to health check the resource; should do it only once + %% for all callers + NumCallers = 20, + Expected = lists:duplicate(NumCallers, {ok, connected}), + ?assertEqual( + Expected, + emqx_utils:pmap( + fun(_) -> emqx_resource:health_check(?ID) end, + lists:seq(1, NumCallers) + ) + ), + + NumCallers + end, + [ + log_consistency_prop(), + fun(NumCallers, Trace) -> + %% shouldn't have one health check per caller + SubTrace = ?of_kind(connector_demo_health_check_delay, Trace), + ?assertMatch([_ | _], SubTrace), + ?assert(length(SubTrace) < (NumCallers div 2), #{trace => Trace}), + ok + end + ] + ), + ok. + %%------------------------------------------------------------------------------ %% Helpers %%------------------------------------------------------------------------------ @@ -3373,3 +3398,10 @@ create(Id, Group, Type, Config) -> create(Id, Group, Type, Config, Opts) -> emqx_resource:create_local(Id, Group, Type, Config, Opts). + +log_consistency_prop() -> + {"check state and cache consistency", fun ?MODULE:log_consistency_prop/1}. +log_consistency_prop(Trace) -> + ?assertEqual([], ?of_kind("inconsistent_status", Trace)), + ?assertEqual([], ?of_kind("inconsistent_cache", Trace)), + ok. diff --git a/apps/emqx_utils/src/emqx_utils_redact.erl b/apps/emqx_utils/src/emqx_utils_redact.erl index 4d3cc7f7b..c830048a9 100644 --- a/apps/emqx_utils/src/emqx_utils_redact.erl +++ b/apps/emqx_utils/src/emqx_utils_redact.erl @@ -65,8 +65,11 @@ redact(Term, Checker) -> redact_headers(Term) -> do_redact_headers(Term). -do_redact(L, Checker) when is_list(L) -> - lists:map(fun(E) -> do_redact(E, Checker) end, L); +do_redact([], _Checker) -> + []; +do_redact([X | Xs], Checker) -> + %% Note: we could be dealing with an improper list + [do_redact(X, Checker) | do_redact(Xs, Checker)]; do_redact(M, Checker) when is_map(M) -> maps:map( fun(K, V) -> @@ -252,6 +255,14 @@ redact2_test_() -> Keys = [secret, passcode], [{case_name(atom, Key), fun() -> Case(Key, Checker) end} || Key <- Keys]. +redact_improper_list_test_() -> + %% improper lists: check that we don't crash + %% may arise when we redact process states with pending `gen' requests + [ + ?_assertEqual([alias | foo], redact([alias | foo])), + ?_assertEqual([1, 2 | foo], redact([1, 2 | foo])) + ]. + deobfuscate_test() -> NewConf0 = #{foo => <<"bar0">>, password => <<"123456">>}, ?assertEqual(NewConf0, deobfuscate(NewConf0, #{foo => <<"bar">>, password => <<"654321">>})), diff --git a/changes/ce/fix-12812.en.md b/changes/ce/fix-12812.en.md new file mode 100644 index 000000000..f530c2060 --- /dev/null +++ b/changes/ce/fix-12812.en.md @@ -0,0 +1 @@ +Made resource health checks non-blocking operations. This means that operations such as updating or removing a resource won't be blocked by a lengthy running health check. From 1a4cfc2a2d9b504df8734c4317a23b276edb1115 Mon Sep 17 00:00:00 2001 From: JimMoen Date: Thu, 21 Mar 2024 17:30:36 +0800 Subject: [PATCH 027/234] fix(api_schema): removed metrics schema in api spec - Followup [PR#6622](https://github.com/emqx/emqx/pull/6622). --- apps/emqx_management/src/emqx_mgmt_api_metrics.erl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/apps/emqx_management/src/emqx_mgmt_api_metrics.erl b/apps/emqx_management/src/emqx_mgmt_api_metrics.erl index 8d61ee1fb..f2e302569 100644 --- a/apps/emqx_management/src/emqx_mgmt_api_metrics.erl +++ b/apps/emqx_management/src/emqx_mgmt_api_metrics.erl @@ -264,10 +264,10 @@ properties() -> "messages.qos0.received\fmessages.qos1.received and messages.qos2.received" >> ), - m( - 'messages.retained', - <<"Number of retained messages">> - ), + %% m( + %% 'messages.retained', + %% <<"Number of retained messages">> + %% ), m( 'messages.sent', << From 0f4b14829416b51b236d850a9eb8525168f32ac5 Mon Sep 17 00:00:00 2001 From: JimMoen Date: Fri, 22 Mar 2024 18:03:13 +0800 Subject: [PATCH 028/234] refactor: uniform shared_sub table macros --- apps/emqx/include/emqx_shared_sub.hrl | 28 ++++++++++++ apps/emqx/src/emqx_shared_sub.erl | 66 +++++++++++++-------------- 2 files changed, 60 insertions(+), 34 deletions(-) create mode 100644 apps/emqx/include/emqx_shared_sub.hrl diff --git a/apps/emqx/include/emqx_shared_sub.hrl b/apps/emqx/include/emqx_shared_sub.hrl new file mode 100644 index 000000000..d744bd8a8 --- /dev/null +++ b/apps/emqx/include/emqx_shared_sub.hrl @@ -0,0 +1,28 @@ +%%-------------------------------------------------------------------- +%% Copyright (c) 2018-2024 EMQ Technologies Co., Ltd. All Rights Reserved. +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. +%%-------------------------------------------------------------------- + +-ifndef(EMQX_SHARED_SUB_HRL). +-define(EMQX_SHARED_SUB_HRL, true). + +%% Mnesia table for shared sub message routing +-define(SHARED_SUBSCRIPTION, emqx_shared_subscription). + +%% ETS tables for Shared PubSub +-define(SHARED_SUBSCRIBER, emqx_shared_subscriber). +-define(ALIVE_SHARED_SUBSCRIBERS, emqx_alive_shared_subscribers). +-define(SHARED_SUBS_ROUND_ROBIN_COUNTER, emqx_shared_subscriber_round_robin_counter). + +-endif. diff --git a/apps/emqx/src/emqx_shared_sub.erl b/apps/emqx/src/emqx_shared_sub.erl index ce694ba33..54c107111 100644 --- a/apps/emqx/src/emqx_shared_sub.erl +++ b/apps/emqx/src/emqx_shared_sub.erl @@ -21,6 +21,7 @@ -include("emqx_schema.hrl"). -include("emqx.hrl"). -include("emqx_mqtt.hrl"). +-include("emqx_shared_sub.hrl"). -include("logger.hrl"). -include("types.hrl"). @@ -84,10 +85,7 @@ | hash_topic. -define(SERVER, ?MODULE). --define(TAB, emqx_shared_subscription). --define(SHARED_SUBS_ROUND_ROBIN_COUNTER, emqx_shared_subscriber_round_robin_counter). --define(SHARED_SUBS, emqx_shared_subscriber). --define(ALIVE_SUBS, emqx_alive_shared_subscribers). + -define(SHARED_SUB_QOS1_DISPATCH_TIMEOUT_SECONDS, 5). -define(IS_LOCAL_PID(Pid), (is_pid(Pid) andalso node(Pid) =:= node())). -define(ACK, shared_sub_ack). @@ -99,21 +97,21 @@ -record(state, {pmon}). --record(emqx_shared_subscription, {group, topic, subpid}). +-record(?SHARED_SUBSCRIPTION, {group, topic, subpid}). %%-------------------------------------------------------------------- %% Mnesia bootstrap %%-------------------------------------------------------------------- create_tables() -> - ok = mria:create_table(?TAB, [ + ok = mria:create_table(?SHARED_SUBSCRIPTION, [ {type, bag}, {rlog_shard, ?SHARED_SUB_SHARD}, {storage, ram_copies}, - {record_name, emqx_shared_subscription}, - {attributes, record_info(fields, emqx_shared_subscription)} + {record_name, ?SHARED_SUBSCRIPTION}, + {attributes, record_info(fields, ?SHARED_SUBSCRIPTION)} ]), - [?TAB]. + [?SHARED_SUBSCRIPTION]. %%-------------------------------------------------------------------- %% API @@ -132,7 +130,7 @@ unsubscribe(Group, Topic, SubPid) when is_pid(SubPid) -> gen_server:call(?SERVER, {unsubscribe, Group, Topic, SubPid}). record(Group, Topic, SubPid) -> - #emqx_shared_subscription{group = Group, topic = Topic, subpid = SubPid}. + #?SHARED_SUBSCRIPTION{group = Group, topic = Topic, subpid = SubPid}. -spec dispatch(emqx_types:group(), emqx_types:topic(), emqx_types:delivery()) -> emqx_types:deliver_result(). @@ -394,18 +392,18 @@ subscribers(Group, Topic, FailedSubs) -> %% Select ETS table to get all subscriber pids. subscribers(Group, Topic) -> - ets:select(?TAB, [{{emqx_shared_subscription, Group, Topic, '$1'}, [], ['$1']}]). + ets:select(?SHARED_SUBSCRIPTION, [{{emqx_shared_subscription, Group, Topic, '$1'}, [], ['$1']}]). %%-------------------------------------------------------------------- %% gen_server callbacks %%-------------------------------------------------------------------- init([]) -> - ok = mria:wait_for_tables([?TAB]), - {ok, _} = mnesia:subscribe({table, ?TAB, simple}), + ok = mria:wait_for_tables([?SHARED_SUBSCRIPTION]), + {ok, _} = mnesia:subscribe({table, ?SHARED_SUBSCRIPTION, simple}), {atomic, PMon} = mria:transaction(?SHARED_SUB_SHARD, fun ?MODULE:init_monitors/0), - ok = emqx_utils_ets:new(?SHARED_SUBS, [protected, bag]), - ok = emqx_utils_ets:new(?ALIVE_SUBS, [protected, set, {read_concurrency, true}]), + ok = emqx_utils_ets:new(?SHARED_SUBSCRIBER, [protected, bag]), + ok = emqx_utils_ets:new(?ALIVE_SHARED_SUBSCRIBERS, [protected, set, {read_concurrency, true}]), ok = emqx_utils_ets:new(?SHARED_SUBS_ROUND_ROBIN_COUNTER, [ public, set, {write_concurrency, true} ]), @@ -413,26 +411,26 @@ init([]) -> init_monitors() -> mnesia:foldl( - fun(#emqx_shared_subscription{subpid = SubPid}, Mon) -> + fun(#?SHARED_SUBSCRIPTION{subpid = SubPid}, Mon) -> emqx_pmon:monitor(SubPid, Mon) end, emqx_pmon:new(), - ?TAB + ?SHARED_SUBSCRIPTION ). handle_call({subscribe, Group, Topic, SubPid}, _From, State = #state{pmon = PMon}) -> - mria:dirty_write(?TAB, record(Group, Topic, SubPid)), - case ets:member(?SHARED_SUBS, {Group, Topic}) of + mria:dirty_write(?SHARED_SUBSCRIPTION, record(Group, Topic, SubPid)), + case ets:member(?SHARED_SUBSCRIBER, {Group, Topic}) of true -> ok; false -> ok = emqx_router:do_add_route(Topic, {Group, node()}) end, ok = maybe_insert_alive_tab(SubPid), ok = maybe_insert_round_robin_count({Group, Topic}), - true = ets:insert(?SHARED_SUBS, {{Group, Topic}, SubPid}), + true = ets:insert(?SHARED_SUBSCRIBER, {{Group, Topic}, SubPid}), {reply, ok, update_stats(State#state{pmon = emqx_pmon:monitor(SubPid, PMon)})}; handle_call({unsubscribe, Group, Topic, SubPid}, _From, State) -> - mria:dirty_delete_object(?TAB, record(Group, Topic, SubPid)), - true = ets:delete_object(?SHARED_SUBS, {{Group, Topic}, SubPid}), + mria:dirty_delete_object(?SHARED_SUBSCRIPTION, record(Group, Topic, SubPid)), + true = ets:delete_object(?SHARED_SUBSCRIBER, {{Group, Topic}, SubPid}), delete_route_if_needed({Group, Topic}), maybe_delete_round_robin_count({Group, Topic}), {reply, ok, update_stats(State)}; @@ -445,7 +443,7 @@ handle_cast(Msg, State) -> {noreply, State}. handle_info( - {mnesia_table_event, {write, #emqx_shared_subscription{subpid = SubPid}, _}}, + {mnesia_table_event, {write, #?SHARED_SUBSCRIPTION{subpid = SubPid}, _}}, State = #state{pmon = PMon} ) -> ok = maybe_insert_alive_tab(SubPid), @@ -455,7 +453,7 @@ handle_info( %% The trick is we don't demonitor the subscriber here, and (after a long time) it will eventually %% be disconnected. % handle_info({mnesia_table_event, {delete_object, OldRecord, _}}, State = #state{pmon = PMon}) -> -% #emqx_shared_subscription{subpid = SubPid} = OldRecord, +% #?SHARED_SUBSCRIPTION{subpid = SubPid} = OldRecord, % {noreply, update_stats(State#state{pmon = emqx_pmon:demonitor(SubPid, PMon)})}; handle_info({mnesia_table_event, _Event}, State) -> @@ -468,7 +466,7 @@ handle_info(_Info, State) -> {noreply, State}. terminate(_Reason, _State) -> - mnesia:unsubscribe({table, ?TAB, simple}). + mnesia:unsubscribe({table, ?SHARED_SUBSCRIPTION, simple}). code_change(_OldVsn, State, _Extra) -> {ok, State}. @@ -501,7 +499,7 @@ maybe_delete_round_robin_count({Group, _Topic} = GroupTopic) -> ok. if_no_more_subscribers(GroupTopic, Fn) -> - case ets:member(?SHARED_SUBS, GroupTopic) of + case ets:member(?SHARED_SUBSCRIBER, GroupTopic) of true -> ok; false -> Fn() end, @@ -510,26 +508,26 @@ if_no_more_subscribers(GroupTopic, Fn) -> %% keep track of alive remote pids maybe_insert_alive_tab(Pid) when ?IS_LOCAL_PID(Pid) -> ok; maybe_insert_alive_tab(Pid) when is_pid(Pid) -> - ets:insert(?ALIVE_SUBS, {Pid}), + ets:insert(?ALIVE_SHARED_SUBSCRIBERS, {Pid}), ok. cleanup_down(SubPid) -> - ?IS_LOCAL_PID(SubPid) orelse ets:delete(?ALIVE_SUBS, SubPid), + ?IS_LOCAL_PID(SubPid) orelse ets:delete(?ALIVE_SHARED_SUBSCRIBERS, SubPid), lists:foreach( - fun(Record = #emqx_shared_subscription{topic = Topic, group = Group}) -> - ok = mria:dirty_delete_object(?TAB, Record), - true = ets:delete_object(?SHARED_SUBS, {{Group, Topic}, SubPid}), + fun(Record = #?SHARED_SUBSCRIPTION{topic = Topic, group = Group}) -> + ok = mria:dirty_delete_object(?SHARED_SUBSCRIPTION, Record), + true = ets:delete_object(?SHARED_SUBSCRIBER, {{Group, Topic}, SubPid}), maybe_delete_round_robin_count({Group, Topic}), delete_route_if_needed({Group, Topic}) end, - mnesia:dirty_match_object(#emqx_shared_subscription{_ = '_', subpid = SubPid}) + mnesia:dirty_match_object(#?SHARED_SUBSCRIPTION{_ = '_', subpid = SubPid}) ). update_stats(State) -> emqx_stats:setstat( 'subscriptions.shared.count', 'subscriptions.shared.max', - ets:info(?TAB, size) + ets:info(?SHARED_SUBSCRIPTION, size) ), State. @@ -543,7 +541,7 @@ is_active_sub(Pid, FailedSubs, All) -> is_alive_sub(Pid) when ?IS_LOCAL_PID(Pid) -> erlang:is_process_alive(Pid); is_alive_sub(Pid) -> - [] =/= ets:lookup(?ALIVE_SUBS, Pid). + [] =/= ets:lookup(?ALIVE_SHARED_SUBSCRIBERS, Pid). delete_route_if_needed({Group, Topic} = GroupTopic) -> if_no_more_subscribers(GroupTopic, fun() -> From 50bceee9ab3a244c49b7e78403e346872b9107b0 Mon Sep 17 00:00:00 2001 From: JimMoen Date: Fri, 22 Mar 2024 18:31:57 +0800 Subject: [PATCH 029/234] fix(stats): `'subscribers.count'` contains shared-subscriber --- apps/emqx/src/emqx_broker.erl | 18 ---------------- apps/emqx/src/emqx_broker_helper.erl | 31 +++++++++++++++++++++++++++- apps/emqx/test/emqx_broker_SUITE.erl | 2 +- apps/emqx/test/emqx_stats_SUITE.erl | 6 +++--- changes/ce/fix-12765.en.md | 2 ++ 5 files changed, 36 insertions(+), 23 deletions(-) create mode 100644 changes/ce/fix-12765.en.md diff --git a/apps/emqx/src/emqx_broker.erl b/apps/emqx/src/emqx_broker.erl index b20c3a15f..8c1239892 100644 --- a/apps/emqx/src/emqx_broker.erl +++ b/apps/emqx/src/emqx_broker.erl @@ -60,9 +60,6 @@ -export([topics/0]). -%% Stats fun --export([stats_fun/0]). - %% gen_server callbacks -export([ init/1, @@ -469,21 +466,6 @@ set_subopts(SubPid, Topic, NewOpts) -> topics() -> emqx_router:topics(). -%%-------------------------------------------------------------------- -%% Stats fun -%%-------------------------------------------------------------------- - -stats_fun() -> - safe_update_stats(?SUBSCRIBER, 'subscribers.count', 'subscribers.max'), - safe_update_stats(?SUBSCRIPTION, 'subscriptions.count', 'subscriptions.max'), - safe_update_stats(?SUBOPTION, 'suboptions.count', 'suboptions.max'). - -safe_update_stats(Tab, Stat, MaxStat) -> - case ets:info(Tab, size) of - undefined -> ok; - Size -> emqx_stats:setstat(Stat, MaxStat, Size) - end. - %%-------------------------------------------------------------------- %% call, cast, pick %%-------------------------------------------------------------------- diff --git a/apps/emqx/src/emqx_broker_helper.erl b/apps/emqx/src/emqx_broker_helper.erl index 8562a1968..368398b92 100644 --- a/apps/emqx/src/emqx_broker_helper.erl +++ b/apps/emqx/src/emqx_broker_helper.erl @@ -18,6 +18,8 @@ -behaviour(gen_server). +-include("emqx_router.hrl"). +-include("emqx_shared_sub.hrl"). -include("logger.hrl"). -include("types.hrl"). @@ -33,6 +35,9 @@ reclaim_seq/1 ]). +%% Stats fun +-export([stats_fun/0]). + %% gen_server callbacks -export([ init/1, @@ -99,6 +104,30 @@ create_seq(Topic) -> reclaim_seq(Topic) -> emqx_sequence:reclaim(?SUBSEQ, Topic). +%%-------------------------------------------------------------------- +%% Stats fun +%%-------------------------------------------------------------------- + +stats_fun() -> + safe_update_stats(subscriber_val(), 'subscribers.count', 'subscribers.max'), + safe_update_stats(table_size(?SUBSCRIPTION), 'subscriptions.count', 'subscriptions.max'), + safe_update_stats(table_size(?SUBOPTION), 'suboptions.count', 'suboptions.max'). + +safe_update_stats(undefined, _Stat, _MaxStat) -> + ok; +safe_update_stats(Val, Stat, MaxStat) when is_integer(Val) -> + emqx_stats:setstat(Stat, MaxStat, Val). + +subscriber_val() -> + sum_subscriber(table_size(?SUBSCRIBER), table_size(?SHARED_SUBSCRIBER)). + +sum_subscriber(undefined, undefined) -> undefined; +sum_subscriber(undefined, V2) when is_integer(V2) -> V2; +sum_subscriber(V1, undefined) when is_integer(V1) -> V1; +sum_subscriber(V1, V2) when is_integer(V1), is_integer(V2) -> V1 + V2. + +table_size(Tab) when is_atom(Tab) -> ets:info(Tab, size). + %%-------------------------------------------------------------------- %% gen_server callbacks %%-------------------------------------------------------------------- @@ -115,7 +144,7 @@ init([]) -> %% SubMon: SubPid -> SubId ok = emqx_utils_ets:new(?SUBMON, [public, {read_concurrency, true}, {write_concurrency, true}]), %% Stats timer - ok = emqx_stats:update_interval(broker_stats, fun emqx_broker:stats_fun/0), + ok = emqx_stats:update_interval(broker_stats, fun ?MODULE:stats_fun/0), {ok, #{pmon => emqx_pmon:new()}}. handle_call(Req, _From, State) -> diff --git a/apps/emqx/test/emqx_broker_SUITE.erl b/apps/emqx/test/emqx_broker_SUITE.erl index d4bb9e7fc..e106e3375 100644 --- a/apps/emqx/test/emqx_broker_SUITE.erl +++ b/apps/emqx/test/emqx_broker_SUITE.erl @@ -158,7 +158,7 @@ t_stats_fun(Config) when is_list(Config) -> ok = emqx_broker:subscribe(<<"topic">>, <<"clientid">>), ok = emqx_broker:subscribe(<<"topic2">>, <<"clientid">>), %% ensure stats refreshed - emqx_broker:stats_fun(), + emqx_broker_helper:stats_fun(), %% emqx_stats:set_stat is a gen_server cast %% make a synced call sync ignored = gen_server:call(emqx_stats, call, infinity), diff --git a/apps/emqx/test/emqx_stats_SUITE.erl b/apps/emqx/test/emqx_stats_SUITE.erl index 1a672fa67..1c32396ce 100644 --- a/apps/emqx/test/emqx_stats_SUITE.erl +++ b/apps/emqx/test/emqx_stats_SUITE.erl @@ -105,10 +105,10 @@ t_helper(_) -> end end, [ - {"emqx_broker", MkTestFun(emqx_broker, stats_fun)}, - {"emqx_sm", MkTestFun(emqx_sm, stats_fun)}, + {"emqx_broker_helper", MkTestFun(emqx_broker_helper, stats_fun)}, {"emqx_router_helper", MkTestFun(emqx_router_helper, stats_fun)}, - {"emqx_cm", MkTestFun(emqx_cm, stats_fun)} + {"emqx_cm", MkTestFun(emqx_cm, stats_fun)}, + {"emqx_retainer", MkTestFun(emqx_retainer, stats_fun)} ]. with_proc(F) -> diff --git a/changes/ce/fix-12765.en.md b/changes/ce/fix-12765.en.md new file mode 100644 index 000000000..01c13146d --- /dev/null +++ b/changes/ce/fix-12765.en.md @@ -0,0 +1,2 @@ +Make sure stats `'subscribers.count'` `'subscribers.max'` countains shared-subscribers. +It only contains non-shared subscribers previously. From 50150423e1159159ab87d8a4d481c83309820d4b Mon Sep 17 00:00:00 2001 From: JimMoen Date: Tue, 2 Apr 2024 17:04:38 +0800 Subject: [PATCH 030/234] docs: rename change log file name due to cherry-pick --- changes/ce/{fix-12765.en.md => fix-12824.en.md} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename changes/ce/{fix-12765.en.md => fix-12824.en.md} (100%) diff --git a/changes/ce/fix-12765.en.md b/changes/ce/fix-12824.en.md similarity index 100% rename from changes/ce/fix-12765.en.md rename to changes/ce/fix-12824.en.md From 5759ba5162a2ba400fa33c94d2919cde47e5f2c9 Mon Sep 17 00:00:00 2001 From: JimMoen Date: Tue, 2 Apr 2024 17:09:22 +0800 Subject: [PATCH 031/234] chore: bump app version --- apps/emqx/src/emqx.app.src | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/apps/emqx/src/emqx.app.src b/apps/emqx/src/emqx.app.src index 1d8c55fe9..462b7e74b 100644 --- a/apps/emqx/src/emqx.app.src +++ b/apps/emqx/src/emqx.app.src @@ -2,7 +2,7 @@ {application, emqx, [ {id, "emqx"}, {description, "EMQX Core"}, - {vsn, "5.2.0"}, + {vsn, "5.2.1"}, {modules, []}, {registered, []}, {applications, [ From b8b9b7739b8306ed4cb2ced6bf51b01eed8eed32 Mon Sep 17 00:00:00 2001 From: Andrew Mayorov Date: Tue, 19 Mar 2024 20:03:08 +0100 Subject: [PATCH 032/234] chore(ds): slightly simplify working with storage generations --- .../src/emqx_ds_storage_layer.erl | 53 +++++++++---------- 1 file changed, 24 insertions(+), 29 deletions(-) diff --git a/apps/emqx_durable_storage/src/emqx_ds_storage_layer.erl b/apps/emqx_durable_storage/src/emqx_ds_storage_layer.erl index 5319458e2..fee4c4457 100644 --- a/apps/emqx_durable_storage/src/emqx_ds_storage_layer.erl +++ b/apps/emqx_durable_storage/src/emqx_ds_storage_layer.erl @@ -259,14 +259,14 @@ get_streams(Shard, TopicFilter, StartTime) -> lists:flatmap( fun(GenId) -> ?tp(get_streams_get_gen, #{gen_id => GenId}), - case generation_get_safe(Shard, GenId) of - {ok, #{module := Mod, data := GenData}} -> + case generation_get(Shard, GenId) of + #{module := Mod, data := GenData} -> Streams = Mod:get_streams(Shard, GenData, TopicFilter, StartTime), [ {GenId, ?stream_v2(GenId, InnerStream)} || InnerStream <- Streams ]; - {error, not_found} -> + not_found -> %% race condition: generation was dropped before getting its streams? [] end @@ -282,14 +282,14 @@ get_delete_streams(Shard, TopicFilter, StartTime) -> lists:flatmap( fun(GenId) -> ?tp(get_streams_get_gen, #{gen_id => GenId}), - case generation_get_safe(Shard, GenId) of - {ok, #{module := Mod, data := GenData}} -> + case generation_get(Shard, GenId) of + #{module := Mod, data := GenData} -> Streams = Mod:get_delete_streams(Shard, GenData, TopicFilter, StartTime), [ ?delete_stream(GenId, InnerStream) || InnerStream <- Streams ]; - {error, not_found} -> + not_found -> %% race condition: generation was dropped before getting its streams? [] end @@ -302,8 +302,8 @@ get_delete_streams(Shard, TopicFilter, StartTime) -> make_iterator( Shard, ?stream_v2(GenId, Stream), TopicFilter, StartTime ) -> - case generation_get_safe(Shard, GenId) of - {ok, #{module := Mod, data := GenData}} -> + case generation_get(Shard, GenId) of + #{module := Mod, data := GenData} -> case Mod:make_iterator(Shard, GenData, Stream, TopicFilter, StartTime) of {ok, Iter} -> {ok, #{ @@ -314,7 +314,7 @@ make_iterator( {error, _} = Err -> Err end; - {error, not_found} -> + not_found -> {error, unrecoverable, generation_not_found} end. @@ -323,8 +323,8 @@ make_iterator( make_delete_iterator( Shard, ?delete_stream(GenId, Stream), TopicFilter, StartTime ) -> - case generation_get_safe(Shard, GenId) of - {ok, #{module := Mod, data := GenData}} -> + case generation_get(Shard, GenId) of + #{module := Mod, data := GenData} -> case Mod:make_delete_iterator(Shard, GenData, Stream, TopicFilter, StartTime) of {ok, Iter} -> {ok, #{ @@ -335,7 +335,7 @@ make_delete_iterator( {error, _} = Err -> Err end; - {error, not_found} -> + not_found -> {error, end_of_stream} end. @@ -346,8 +346,8 @@ update_iterator( #{?tag := ?IT, ?generation := GenId, ?enc := OldIter}, DSKey ) -> - case generation_get_safe(Shard, GenId) of - {ok, #{module := Mod, data := GenData}} -> + case generation_get(Shard, GenId) of + #{module := Mod, data := GenData} -> case Mod:update_iterator(Shard, GenData, OldIter, DSKey) of {ok, Iter} -> {ok, #{ @@ -358,15 +358,15 @@ update_iterator( {error, _} = Err -> Err end; - {error, not_found} -> + not_found -> {error, unrecoverable, generation_not_found} end. -spec next(shard_id(), iterator(), pos_integer()) -> emqx_ds:next_result(iterator()). next(Shard, Iter = #{?tag := ?IT, ?generation := GenId, ?enc := GenIter0}, BatchSize) -> - case generation_get_safe(Shard, GenId) of - {ok, #{module := Mod, data := GenData}} -> + case generation_get(Shard, GenId) of + #{module := Mod, data := GenData} -> Current = generation_current(Shard), case Mod:next(Shard, GenData, GenIter0, BatchSize) of {ok, _GenIter, []} when GenId < Current -> @@ -379,7 +379,7 @@ next(Shard, Iter = #{?tag := ?IT, ?generation := GenId, ?enc := GenIter0}, Batch Error = {error, _, _} -> Error end; - {error, not_found} -> + not_found -> %% generation was possibly dropped by GC {error, unrecoverable, generation_not_found} end. @@ -392,8 +392,8 @@ delete_next( Selector, BatchSize ) -> - case generation_get_safe(Shard, GenId) of - {ok, #{module := Mod, data := GenData}} -> + case generation_get(Shard, GenId) of + #{module := Mod, data := GenData} -> Current = generation_current(Shard), case Mod:delete_next(Shard, GenData, GenIter0, Selector, BatchSize) of {ok, _GenIter, _Deleted = 0, _IteratedOver = 0} when GenId < Current -> @@ -406,7 +406,7 @@ delete_next( Error = {error, _} -> Error end; - {error, not_found} -> + not_found -> %% generation was possibly dropped by GC {ok, end_of_stream} end. @@ -777,18 +777,13 @@ generation_current(Shard) -> #{current_generation := Current} = get_schema_runtime(Shard), Current. --spec generation_get(shard_id(), gen_id()) -> generation(). +-spec generation_get(shard_id(), gen_id()) -> generation() | not_found. generation_get(Shard, GenId) -> - {ok, GenData} = generation_get_safe(Shard, GenId), - GenData. - --spec generation_get_safe(shard_id(), gen_id()) -> {ok, generation()} | {error, not_found}. -generation_get_safe(Shard, GenId) -> case get_schema_runtime(Shard) of #{?GEN_KEY(GenId) := GenData} -> - {ok, GenData}; + GenData; #{} -> - {error, not_found} + not_found end. -spec generations_since(shard_id(), emqx_ds:time()) -> [gen_id()]. From 77a022bd938f47680f140a3d7c7c8c32459bde7b Mon Sep 17 00:00:00 2001 From: Andrew Mayorov Date: Tue, 19 Mar 2024 20:05:12 +0100 Subject: [PATCH 033/234] feat(dsrepl): transfer storage snapshot during ra snapshot recovery --- .../src/emqx_ds_builtin_db_sup.erl | 23 +- .../src/emqx_ds_replication_layer.erl | 28 +- .../src/emqx_ds_replication_layer_shard.erl | 18 +- .../src/emqx_ds_replication_snapshot.erl | 229 ++++++++++++ .../src/emqx_ds_storage_layer.erl | 74 +++- .../src/emqx_ds_storage_layer_sup.erl | 88 ----- .../src/emqx_ds_storage_snapshot.erl | 325 ++++++++++++++++++ .../test/emqx_ds_replication_SUITE.erl | 202 +++++++++++ .../test/emqx_ds_storage_snapshot_SUITE.erl | 149 ++++++++ 9 files changed, 1027 insertions(+), 109 deletions(-) create mode 100644 apps/emqx_durable_storage/src/emqx_ds_replication_snapshot.erl delete mode 100644 apps/emqx_durable_storage/src/emqx_ds_storage_layer_sup.erl create mode 100644 apps/emqx_durable_storage/src/emqx_ds_storage_snapshot.erl create mode 100644 apps/emqx_durable_storage/test/emqx_ds_replication_SUITE.erl create mode 100644 apps/emqx_durable_storage/test/emqx_ds_storage_snapshot_SUITE.erl diff --git a/apps/emqx_durable_storage/src/emqx_ds_builtin_db_sup.erl b/apps/emqx_durable_storage/src/emqx_ds_builtin_db_sup.erl index a93a94168..79e2f6120 100644 --- a/apps/emqx_durable_storage/src/emqx_ds_builtin_db_sup.erl +++ b/apps/emqx_durable_storage/src/emqx_ds_builtin_db_sup.erl @@ -21,7 +21,16 @@ -behaviour(supervisor). %% API: --export([start_db/2, start_shard/1, start_egress/1, stop_shard/1, ensure_shard/1, ensure_egress/1]). +-export([ + start_db/2, + start_shard/1, + start_egress/1, + stop_shard/1, + terminate_storage/1, + restart_storage/1, + ensure_shard/1, + ensure_egress/1 +]). -export([which_shards/1]). %% behaviour callbacks: @@ -64,12 +73,22 @@ start_shard({DB, Shard}) -> start_egress({DB, Shard}) -> supervisor:start_child(?via(#?egress_sup{db = DB}), egress_spec(DB, Shard)). --spec stop_shard(emqx_ds_storage_layer:shard_id()) -> ok | {error, _}. +-spec stop_shard(emqx_ds_storage_layer:shard_id()) -> ok. stop_shard(Shard = {DB, _}) -> Sup = ?via(#?shards_sup{db = DB}), ok = supervisor:terminate_child(Sup, Shard), ok = supervisor:delete_child(Sup, Shard). +-spec terminate_storage(emqx_ds_storage_layer:shard_id()) -> ok | {error, _Reason}. +terminate_storage({DB, Shard}) -> + Sup = ?via(#?shard_sup{db = DB, shard = Shard}), + supervisor:terminate_child(Sup, {Shard, storage}). + +-spec restart_storage(emqx_ds_storage_layer:shard_id()) -> {ok, _Child} | {error, _Reason}. +restart_storage({DB, Shard}) -> + Sup = ?via(#?shard_sup{db = DB, shard = Shard}), + supervisor:restart_child(Sup, {Shard, storage}). + -spec ensure_shard(emqx_ds_storage_layer:shard_id()) -> ok | {error, _Reason}. ensure_shard(Shard) -> diff --git a/apps/emqx_durable_storage/src/emqx_ds_replication_layer.erl b/apps/emqx_durable_storage/src/emqx_ds_replication_layer.erl index f8c4980d0..b1b8fef36 100644 --- a/apps/emqx_durable_storage/src/emqx_ds_replication_layer.erl +++ b/apps/emqx_durable_storage/src/emqx_ds_replication_layer.erl @@ -65,7 +65,9 @@ -export([ init/1, - apply/3 + apply/3, + + snapshot_module/0 ]). -export_type([ @@ -80,6 +82,10 @@ batch/0 ]). +-export_type([ + ra_state/0 +]). + -include_lib("emqx_utils/include/emqx_message.hrl"). -include("emqx_ds_replication_layer.hrl"). @@ -140,6 +146,20 @@ -type generation_rank() :: {shard_id(), term()}. +%% Core state of the replication, i.e. the state of ra machine. +-type ra_state() :: #{ + db_shard := {emqx_ds:db(), shard_id()}, + latest := timestamp_us() +}. + +%% Command. Each command is an entry in the replication log. +-type ra_command() :: #{ + ?tag := ?BATCH | add_generation | update_config | drop_generation, + _ => _ +}. + +-type timestamp_us() :: non_neg_integer(). + %%================================================================================ %% API functions %%================================================================================ @@ -635,9 +655,12 @@ ra_drop_shard(DB, Shard) -> %% +-spec init(_Args :: map()) -> ra_state(). init(#{db := DB, shard := Shard}) -> #{db_shard => {DB, Shard}, latest => 0}. +-spec apply(ra_machine:command_meta_data(), ra_command(), ra_state()) -> + {ra_state(), _Reply, _Effects}. apply( #{index := RaftIdx}, #{ @@ -717,3 +740,6 @@ timestamp_to_timeus(TimestampMs) -> timeus_to_timestamp(TimestampUs) -> TimestampUs div 1000. + +snapshot_module() -> + emqx_ds_replication_snapshot. diff --git a/apps/emqx_durable_storage/src/emqx_ds_replication_layer_shard.erl b/apps/emqx_durable_storage/src/emqx_ds_replication_layer_shard.erl index 7540e01bb..62a6edab2 100644 --- a/apps/emqx_durable_storage/src/emqx_ds_replication_layer_shard.erl +++ b/apps/emqx_durable_storage/src/emqx_ds_replication_layer_shard.erl @@ -147,19 +147,21 @@ start_shard(DB, Shard, #{replication_options := ReplicationOpts}) -> Bootstrap = false; {error, name_not_registered} -> Bootstrap = true, + Machine = {module, emqx_ds_replication_layer, #{db => DB, shard => Shard}}, + LogOpts = maps:with( + [ + snapshot_interval, + resend_window + ], + ReplicationOpts + ), ok = ra:start_server(DB, #{ id => LocalServer, uid => <>, cluster_name => ClusterName, initial_members => Servers, - machine => {module, emqx_ds_replication_layer, #{db => DB, shard => Shard}}, - log_init_args => maps:with( - [ - snapshot_interval, - resend_window - ], - ReplicationOpts - ) + machine => Machine, + log_init_args => LogOpts }) end, case Servers of diff --git a/apps/emqx_durable_storage/src/emqx_ds_replication_snapshot.erl b/apps/emqx_durable_storage/src/emqx_ds_replication_snapshot.erl new file mode 100644 index 000000000..ab06dff53 --- /dev/null +++ b/apps/emqx_durable_storage/src/emqx_ds_replication_snapshot.erl @@ -0,0 +1,229 @@ +%%-------------------------------------------------------------------- +%% Copyright (c) 2024 EMQ Technologies Co., Ltd. All Rights Reserved. +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. +%%-------------------------------------------------------------------- + +-module(emqx_ds_replication_snapshot). + +-include_lib("snabbkaffe/include/trace.hrl"). + +-behaviour(ra_snapshot). +-export([ + prepare/2, + write/3, + + begin_read/2, + read_chunk/3, + + begin_accept/2, + accept_chunk/2, + complete_accept/2, + + recover/1, + validate/1, + read_meta/1 +]). + +%% Read state. +-record(rs, { + phase :: machine_state | storage_snapshot, + started_at :: _Time :: integer(), + state :: emqx_ds_replication_layer:ra_state() | undefined, + reader :: emqx_ds_storage_snapshot:reader() | undefined +}). + +%% Write state. +-record(ws, { + phase :: machine_state | storage_snapshot, + started_at :: _Time :: integer(), + dir :: file:filename(), + meta :: ra_snapshot:meta(), + state :: emqx_ds_replication_layer:ra_state() | undefined, + writer :: emqx_ds_storage_snapshot:writer() | undefined +}). + +-type rs() :: #rs{}. +-type ws() :: #ws{}. + +-type ra_state() :: emqx_ds_replication_layer:ra_state(). + +%% Writing a snapshot. +%% This process is exactly the same as writing a ra log snapshot: store the +%% log meta and the machine state in a single snapshot file. + +-spec prepare(_RaftIndex, ra_state()) -> _State :: ra_state(). +prepare(Index, State) -> + ra_log_snapshot:prepare(Index, State). + +-spec write(_SnapshotDir :: file:filename(), ra_snapshot:meta(), _State :: ra_state()) -> + ok | {ok, _BytesWritten :: non_neg_integer()} | {error, ra_snapshot:file_err()}. +write(Dir, Meta, MachineState) -> + ra_log_snapshot:write(Dir, Meta, MachineState). + +%% Reading a snapshot. +%% This is triggered by the leader when it finds out that a follower is +%% behind so much that there are no log segments covering the gap anymore. +%% This process, on the other hand, MUST involve reading the storage snapshot, +%% (in addition to the log snapshot) to reconstruct the storage state on the +%% target node. + +-spec begin_read(_SnapshotDir :: file:filename(), _Context :: #{}) -> + {ok, ra_snapshot:meta(), rs()} | {error, _Reason :: term()}. +begin_read(Dir, _Context) -> + RS = #rs{ + phase = machine_state, + started_at = erlang:monotonic_time(millisecond) + }, + case ra_log_snapshot:recover(Dir) of + {ok, Meta, MachineState} -> + start_snapshot_reader(Meta, RS#rs{state = MachineState}); + Error -> + Error + end. + +start_snapshot_reader(Meta, RS) -> + ShardId = shard_id(RS), + logger:info(#{ + msg => "dsrepl_snapshot_read_started", + shard => ShardId + }), + {ok, SnapReader} = emqx_ds_storage_layer:take_snapshot(ShardId), + {ok, Meta, RS#rs{reader = SnapReader}}. + +-spec read_chunk(rs(), _Size :: non_neg_integer(), _SnapshotDir :: file:filename()) -> + {ok, binary(), {next, rs()} | last} | {error, _Reason :: term()}. +read_chunk(RS = #rs{phase = machine_state, state = MachineState}, _Size, _Dir) -> + Chunk = term_to_binary(MachineState), + {ok, Chunk, {next, RS#rs{phase = storage_snapshot}}}; +read_chunk(RS = #rs{phase = storage_snapshot, reader = SnapReader0}, Size, _Dir) -> + case emqx_ds_storage_snapshot:read_chunk(SnapReader0, Size) of + {next, Chunk, SnapReader} -> + {ok, Chunk, {next, RS#rs{reader = SnapReader}}}; + {last, Chunk, SnapReader} -> + %% TODO: idempotence? + ?tp(dsrepl_snapshot_read_complete, #{reader => SnapReader}), + _ = complete_read(RS#rs{reader = SnapReader}), + {ok, Chunk, last}; + {error, Reason} -> + ?tp(dsrepl_snapshot_read_error, #{reason => Reason, reader => SnapReader0}), + _ = emqx_ds_storage_snapshot:release_reader(SnapReader0), + error(Reason) + end. + +complete_read(RS = #rs{reader = SnapReader, started_at = StartedAt}) -> + _ = emqx_ds_storage_snapshot:release_reader(SnapReader), + logger:info(#{ + msg => "dsrepl_snapshot_read_complete", + shard => shard_id(RS), + duration_ms => erlang:monotonic_time(millisecond) - StartedAt, + read_bytes => emqx_ds_storage_snapshot:reader_info(bytes_read, SnapReader) + }). + +%% Accepting a snapshot. +%% This process is triggered by the target server, when the leader finds out +%% that the target server is severely lagging behind. This is receiving side of +%% `begin_read/2` and `read_chunk/3`. + +-spec begin_accept(_SnapshotDir :: file:filename(), ra_snapshot:meta()) -> + {ok, ws()}. +begin_accept(Dir, Meta) -> + WS = #ws{ + phase = machine_state, + started_at = erlang:monotonic_time(millisecond), + dir = Dir, + meta = Meta + }, + {ok, WS}. + +-spec accept_chunk(binary(), ws()) -> + {ok, ws()} | {error, _Reason :: term()}. +accept_chunk(Chunk, WS = #ws{phase = machine_state}) -> + MachineState = binary_to_term(Chunk), + start_snapshot_writer(WS#ws{state = MachineState}); +accept_chunk(Chunk, WS = #ws{phase = storage_snapshot, writer = SnapWriter0}) -> + %% TODO: idempotence? + case emqx_ds_storage_snapshot:write_chunk(SnapWriter0, Chunk) of + {next, SnapWriter} -> + {ok, WS#ws{writer = SnapWriter}}; + {error, Reason} -> + ?tp(dsrepl_snapshot_write_error, #{reason => Reason, writer => SnapWriter0}), + _ = emqx_ds_storage_snapshot:abort_writer(SnapWriter0), + error(Reason) + end. + +start_snapshot_writer(WS) -> + ShardId = shard_id(WS), + logger:info(#{ + msg => "dsrepl_snapshot_write_started", + shard => ShardId + }), + _ = emqx_ds_builtin_db_sup:terminate_storage(ShardId), + {ok, SnapWriter} = emqx_ds_storage_layer:accept_snapshot(ShardId), + {ok, WS#ws{phase = storage_snapshot, writer = SnapWriter}}. + +-spec complete_accept(ws()) -> ok | {error, ra_snapshot:file_err()}. +complete_accept(Chunk, WS = #ws{phase = storage_snapshot, writer = SnapWriter0}) -> + %% TODO: idempotence? + case emqx_ds_storage_snapshot:write_chunk(SnapWriter0, Chunk) of + {last, SnapWriter} -> + ?tp(dsrepl_snapshot_write_complete, #{writer => SnapWriter}), + _ = emqx_ds_storage_snapshot:release_writer(SnapWriter), + Result = complete_accept(WS#ws{writer = SnapWriter}), + ?tp(dsrepl_snapshot_accepted, #{shard => shard_id(WS)}), + Result; + {error, Reason} -> + ?tp(dsrepl_snapshot_write_error, #{reason => Reason, writer => SnapWriter0}), + _ = emqx_ds_storage_snapshot:abort_writer(SnapWriter0), + error(Reason) + end. + +complete_accept(WS = #ws{started_at = StartedAt, writer = SnapWriter}) -> + ShardId = shard_id(WS), + logger:info(#{ + msg => "dsrepl_snapshot_read_complete", + shard => ShardId, + duration_ms => erlang:monotonic_time(millisecond) - StartedAt, + bytes_written => emqx_ds_storage_snapshot:writer_info(bytes_written, SnapWriter) + }), + {ok, _} = emqx_ds_builtin_db_sup:restart_storage(ShardId), + write_machine_snapshot(WS). + +write_machine_snapshot(#ws{dir = Dir, meta = Meta, state = MachineState}) -> + write(Dir, Meta, MachineState). + +%% Restoring machine state from a snapshot. +%% This is equivalent to restoring from a log snapshot. + +-spec recover(_SnapshotDir :: file:filename()) -> + {ok, ra_snapshot:meta(), ra_state()} | {error, _Reason}. +recover(Dir) -> + %% TODO: Verify that storage layer is online? + ra_log_snapshot:recover(Dir). + +-spec validate(_SnapshotDir :: file:filename()) -> + ok | {error, _Reason}. +validate(Dir) -> + ra_log_snapshot:validate(Dir). + +-spec read_meta(_SnapshotDir :: file:filename()) -> + {ok, ra_snapshot:meta()} | {error, _Reason}. +read_meta(Dir) -> + ra_log_snapshot:read_meta(Dir). + +shard_id(#rs{state = MachineState}) -> + shard_id(MachineState); +shard_id(#ws{state = MachineState}) -> + shard_id(MachineState); +shard_id(MachineState) -> + maps:get(db_shard, MachineState). diff --git a/apps/emqx_durable_storage/src/emqx_ds_storage_layer.erl b/apps/emqx_durable_storage/src/emqx_ds_storage_layer.erl index fee4c4457..2a3829ac3 100644 --- a/apps/emqx_durable_storage/src/emqx_ds_storage_layer.erl +++ b/apps/emqx_durable_storage/src/emqx_ds_storage_layer.erl @@ -19,9 +19,12 @@ %% Replication layer API: -export([ - open_shard/2, + %% Lifecycle + start_link/2, drop_shard/1, shard_info/2, + + %% Data store_batch/3, get_streams/3, get_delete_streams/3, @@ -30,14 +33,20 @@ update_iterator/3, next/3, delete_next/4, + + %% Generations update_config/3, add_generation/2, list_generations_with_lifetimes/1, - drop_generation/2 + drop_generation/2, + + %% Snapshotting + take_snapshot/1, + accept_snapshot/1 ]). %% gen_server --export([start_link/2, init/1, handle_call/3, handle_cast/2, handle_info/2, terminate/2]). +-export([init/1, handle_call/3, handle_cast/2, handle_info/2, terminate/2]). %% internal exports: -export([db_dir/1]). @@ -230,10 +239,7 @@ -record(call_update_config, {options :: emqx_ds:create_db_opts(), since :: emqx_ds:time()}). -record(call_list_generations_with_lifetimes, {}). -record(call_drop_generation, {gen_id :: gen_id()}). - --spec open_shard(shard_id(), options()) -> ok. -open_shard(Shard, Options) -> - emqx_ds_storage_layer_sup:ensure_shard(Shard, Options). +-record(call_take_snapshot, {}). -spec drop_shard(shard_id()) -> ok. drop_shard(Shard) -> @@ -245,12 +251,24 @@ drop_shard(Shard) -> emqx_ds:message_store_opts() ) -> emqx_ds:store_batch_result(). -store_batch(Shard, Messages, Options) -> +store_batch(Shard, Messages0, Options) -> %% We always store messages in the current generation: GenId = generation_current(Shard), - #{module := Mod, data := GenData} = generation_get(Shard, GenId), + #{module := Mod, data := GenData, since := Since} = generation_get(Shard, GenId), + case Messages0 of + [{Time, _Msg} | Rest] when Time < Since -> + %% FIXME: log / feedback + Messages = skip_outdated_messages(Since, Rest); + _ -> + Messages = Messages0 + end, Mod:store_batch(Shard, GenData, Messages, Options). +skip_outdated_messages(Since, [{Time, _Msg} | Rest]) when Time < Since -> + skip_outdated_messages(Since, Rest); +skip_outdated_messages(_Since, Messages) -> + Messages. + -spec get_streams(shard_id(), emqx_ds:topic_filter(), emqx_ds:time()) -> [{integer(), stream()}]. get_streams(Shard, TopicFilter, StartTime) -> @@ -445,6 +463,20 @@ shard_info(ShardId, status) -> error:badarg -> down end. +-spec take_snapshot(shard_id()) -> {ok, emqx_ds_storage_snapshot:reader()} | {error, _Reason}. +take_snapshot(ShardId) -> + case gen_server:call(?REF(ShardId), #call_take_snapshot{}, infinity) of + {ok, Dir} -> + emqx_ds_storage_snapshot:new_reader(Dir); + Error -> + Error + end. + +-spec accept_snapshot(shard_id()) -> {ok, emqx_ds_storage_snapshot:writer()} | {error, _Reason}. +accept_snapshot(ShardId) -> + ok = drop_shard(ShardId), + handle_accept_snapshot(ShardId). + %%================================================================================ %% gen_server for the shard %%================================================================================ @@ -514,6 +546,9 @@ handle_call(#call_drop_generation{gen_id = GenId}, _From, S0) -> {Reply, S} = handle_drop_generation(S0, GenId), commit_metadata(S), {reply, Reply, S}; +handle_call(#call_take_snapshot{}, _From, S) -> + Snapshot = handle_take_snapshot(S), + {reply, Snapshot, S}; handle_call(_Call, _From, S) -> {reply, {error, unknown_call}, S}. @@ -735,7 +770,11 @@ rocksdb_open(Shard, Options) -> -spec db_dir(shard_id()) -> file:filename(). db_dir({DB, ShardId}) -> - filename:join([emqx_ds:base_dir(), atom_to_list(DB), binary_to_list(ShardId)]). + filename:join([emqx_ds:base_dir(), DB, binary_to_list(ShardId)]). + +-spec checkpoint_dir(shard_id(), _Name :: file:name()) -> file:filename(). +checkpoint_dir({DB, ShardId}, Name) -> + filename:join([emqx_ds:base_dir(), DB, checkpoints, binary_to_list(ShardId), Name]). -spec update_last_until(Schema, emqx_ds:time()) -> Schema | {error, exists | overlaps_existing_generations} @@ -768,6 +807,21 @@ run_post_creation_actions(#{new_gen_runtime_data := NewGenData}) -> %% Different implementation modules NewGenData. +handle_take_snapshot(#s{db = DB, shard_id = ShardId}) -> + Name = integer_to_list(erlang:system_time(millisecond)), + Dir = checkpoint_dir(ShardId, Name), + _ = filelib:ensure_dir(Dir), + case rocksdb:checkpoint(DB, Dir) of + ok -> + {ok, Dir}; + {error, _} = Error -> + Error + end. + +handle_accept_snapshot(ShardId) -> + Dir = db_dir(ShardId), + emqx_ds_storage_snapshot:new_writer(Dir). + %%-------------------------------------------------------------------------------- %% Schema access %%-------------------------------------------------------------------------------- diff --git a/apps/emqx_durable_storage/src/emqx_ds_storage_layer_sup.erl b/apps/emqx_durable_storage/src/emqx_ds_storage_layer_sup.erl deleted file mode 100644 index 136669ed2..000000000 --- a/apps/emqx_durable_storage/src/emqx_ds_storage_layer_sup.erl +++ /dev/null @@ -1,88 +0,0 @@ -%%-------------------------------------------------------------------- -%% Copyright (c) 2022-2024 EMQ Technologies Co., Ltd. All Rights Reserved. -%% -%% Licensed under the Apache License, Version 2.0 (the "License"); -%% you may not use this file except in compliance with the License. -%% You may obtain a copy of the License at -%% -%% http://www.apache.org/licenses/LICENSE-2.0 -%% -%% Unless required by applicable law or agreed to in writing, software -%% distributed under the License is distributed on an "AS IS" BASIS, -%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -%% See the License for the specific language governing permissions and -%% limitations under the License. -%%-------------------------------------------------------------------- --module(emqx_ds_storage_layer_sup). - --behaviour(supervisor). - -%% API: --export([start_link/0, start_shard/2, stop_shard/1, ensure_shard/2]). - -%% behaviour callbacks: --export([init/1]). - -%%================================================================================ -%% Type declarations -%%================================================================================ - --define(SUP, ?MODULE). - -%%================================================================================ -%% API funcions -%%================================================================================ - --spec start_link() -> {ok, pid()}. -start_link() -> - supervisor:start_link(?MODULE, []). - --spec start_shard(emqx_ds_storage_layer:shard_id(), emqx_ds:create_db_opts()) -> - supervisor:startchild_ret(). -start_shard(Shard, Options) -> - supervisor:start_child(?SUP, shard_child_spec(Shard, Options)). - --spec stop_shard(emqx_ds_storage_layer:shard_id()) -> ok | {error, _}. -stop_shard(Shard) -> - ok = supervisor:terminate_child(?SUP, Shard), - ok = supervisor:delete_child(?SUP, Shard). - --spec ensure_shard(emqx_ds_storage_layer:shard_id(), emqx_ds_storage_layer:options()) -> - ok | {error, _Reason}. -ensure_shard(Shard, Options) -> - case start_shard(Shard, Options) of - {ok, _Pid} -> - ok; - {error, {already_started, _Pid}} -> - ok; - {error, Reason} -> - {error, Reason} - end. - -%%================================================================================ -%% behaviour callbacks -%%================================================================================ - -init([]) -> - Children = [], - SupFlags = #{ - strategy => one_for_one, - intensity => 10, - period => 10 - }, - {ok, {SupFlags, Children}}. - -%%================================================================================ -%% Internal functions -%%================================================================================ - --spec shard_child_spec(emqx_ds_storage_layer:shard_id(), emqx_ds:create_db_opts()) -> - supervisor:child_spec(). -shard_child_spec(Shard, Options) -> - #{ - id => Shard, - start => {emqx_ds_storage_layer, start_link, [Shard, Options]}, - shutdown => 5_000, - restart => permanent, - type => worker - }. diff --git a/apps/emqx_durable_storage/src/emqx_ds_storage_snapshot.erl b/apps/emqx_durable_storage/src/emqx_ds_storage_snapshot.erl new file mode 100644 index 000000000..ab605e40e --- /dev/null +++ b/apps/emqx_durable_storage/src/emqx_ds_storage_snapshot.erl @@ -0,0 +1,325 @@ +%%-------------------------------------------------------------------- +%% Copyright (c) 2024 EMQ Technologies Co., Ltd. All Rights Reserved. +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. +%%-------------------------------------------------------------------- +-module(emqx_ds_storage_snapshot). + +-include_lib("kernel/include/file.hrl"). + +-export([ + new_reader/1, + read_chunk/2, + abort_reader/1, + release_reader/1, + reader_info/2 +]). + +-export([ + new_writer/1, + write_chunk/2, + abort_writer/1, + release_writer/1, + writer_info/2 +]). + +-export_type([ + reader/0, + writer/0 +]). + +%% + +-define(FILECHUNK(RELPATH, POS, MORE), #{ + '$' => chunk, + rp => RELPATH, + pos => POS, + more => MORE +}). +-define(PAT_FILECHUNK(RELPATH, POS, MORE), #{ + '$' := chunk, + rp := RELPATH, + pos := POS, + more := MORE +}). + +-define(EOS(), #{ + '$' => eos +}). +-define(PAT_EOS(), #{ + '$' := eos +}). + +-define(PAT_HEADER(), #{'$' := _}). + +%% + +-record(reader, { + dirpath :: file:filename(), + files :: #{_RelPath => reader_file()}, + queue :: [_RelPath :: file:filename()] +}). + +-record(rfile, { + abspath :: file:filename(), + fd :: file:io_device() | eof, + pos :: non_neg_integer() +}). + +-opaque reader() :: #reader{}. +-type reader_file() :: #rfile{}. + +-type reason() :: {atom(), _AbsPath :: file:filename(), _Details :: term()}. + +%% @doc Initialize a reader for a snapshot directory. +%% Snapshot directory is a directory containing arbitrary number of regular +%% files in arbitrary subdirectory structure. Files are read in indeterminate +%% order. It's an error to have non-regular files in the directory (e.g. symlinks). +-spec new_reader(_Dir :: file:filename()) -> {ok, reader()}. +new_reader(DirPath) -> + %% NOTE + %% Opening all files at once, so there would be less error handling later + %% during transfer. + %% TODO + %% Beware of how errors are handled: if one file fails to open, the whole + %% process will exit. This is fine for the purpose of replication (because + %% ra spawns separate process for each transfer), but may not be suitable + %% for other use cases. + Files = emqx_utils_fs:traverse_dir( + fun(Path, Info, Acc) -> new_reader_file(Path, Info, DirPath, Acc) end, + #{}, + DirPath + ), + {ok, #reader{ + dirpath = DirPath, + files = Files, + queue = maps:keys(Files) + }}. + +new_reader_file(Path, #file_info{type = regular}, DirPath, Acc) -> + case file:open(Path, [read, binary, raw]) of + {ok, IoDev} -> + RelPath = emqx_utils_fs:find_relpath(Path, DirPath), + File = #rfile{abspath = Path, fd = IoDev, pos = 0}, + Acc#{RelPath => File}; + {error, Reason} -> + error({open_failed, Path, Reason}) + end; +new_reader_file(Path, #file_info{type = Type}, _, _Acc) -> + error({bad_file_type, Path, Type}); +new_reader_file(Path, {error, Reason}, _, _Acc) -> + error({inaccessible, Path, Reason}). + +%% @doc Read a chunk of data from the snapshot. +%% Returns `{last, Chunk, Reader}` when the last chunk is read. After that, one +%% should call `release_reader/1` to finalize the process (or `abort_reader/1` if +%% keeping the snapshot is desired). +-spec read_chunk(reader(), _Size :: non_neg_integer()) -> + {last | next, _Chunk :: iodata(), reader()} | {error, reason()}. +read_chunk(R = #reader{files = Files, queue = [RelPath | Rest]}, Size) -> + File = maps:get(RelPath, Files), + case read_chunk_file(RelPath, File, Size) of + {last, Chunk, FileRest} -> + {next, Chunk, R#reader{files = Files#{RelPath := FileRest}, queue = Rest}}; + {next, Chunk, FileRest} -> + {next, Chunk, R#reader{files = Files#{RelPath := FileRest}}}; + Error -> + Error + end; +read_chunk(R = #reader{queue = []}, _Size) -> + {last, make_packet(?EOS()), R}. + +read_chunk_file(RelPath, RFile0 = #rfile{fd = IoDev, pos = Pos, abspath = AbsPath}, Size) -> + case file:read(IoDev, Size) of + {ok, Chunk} -> + ChunkSize = byte_size(Chunk), + HasMore = ChunkSize div Size, + RFile1 = RFile0#rfile{pos = Pos + ChunkSize}, + case ChunkSize < Size of + false -> + Status = next, + RFile = RFile1; + true -> + Status = last, + RFile = release_reader_file(RFile1) + end, + Packet = make_packet(?FILECHUNK(RelPath, Pos, HasMore), Chunk), + {Status, Packet, RFile}; + eof -> + Packet = make_packet(?FILECHUNK(RelPath, Pos, 0)), + {last, Packet, release_reader_file(RFile0)}; + {error, Reason} -> + {error, {read_failed, AbsPath, Reason}} + end. + +%% @doc Aborts the snapshot reader, but does not release the snapshot files. +-spec abort_reader(reader()) -> ok. +abort_reader(#reader{files = Files}) -> + lists:foreach(fun release_reader_file/1, maps:values(Files)). + +%% @doc Aborts the snapshot reader and deletes the snapshot files. +-spec release_reader(reader()) -> ok. +release_reader(R = #reader{dirpath = DirPath}) -> + ok = abort_reader(R), + file:del_dir_r(DirPath). + +release_reader_file(RFile = #rfile{fd = eof}) -> + RFile; +release_reader_file(RFile = #rfile{fd = IoDev}) -> + _ = file:close(IoDev), + RFile#rfile{fd = eof}. + +-spec reader_info(bytes_read, reader()) -> _Bytes :: non_neg_integer(). +reader_info(bytes_read, #reader{files = Files}) -> + maps:fold(fun(_, RFile, Sum) -> Sum + RFile#rfile.pos end, 0, Files). + +%% + +-record(writer, { + dirpath :: file:filename(), + files :: #{_RelPath :: file:filename() => writer_file()} +}). + +-record(wfile, { + abspath :: file:filename(), + fd :: file:io_device() | eof, + pos :: non_neg_integer() +}). + +-opaque writer() :: #writer{}. +-type writer_file() :: #wfile{}. + +%% @doc Initialize a writer into a snapshot directory. +%% The directory needs not to exist, it will be created if it doesn't. +%% Having non-empty directory is not an error, existing files will be +%% overwritten. +-spec new_writer(_Dir :: file:filename()) -> {ok, writer()} | {error, reason()}. +new_writer(DirPath) -> + case filelib:ensure_path(DirPath) of + ok -> + {ok, #writer{dirpath = DirPath, files = #{}}}; + {error, Reason} -> + {error, {mkdir_failed, DirPath, Reason}} + end. + +%% @doc Write a chunk of data to the snapshot. +%% Returns `{last, Writer}` when the last chunk is written. After that, one +%% should call `release_writer/1` to finalize the process. +-spec write_chunk(writer(), _Chunk :: binary()) -> + {last | next, writer()} | {error, _Reason}. +write_chunk(W, Packet) -> + case parse_packet(Packet) of + {?PAT_FILECHUNK(RelPath, Pos, More), Chunk} -> + write_chunk(W, RelPath, Pos, More, Chunk); + {?PAT_EOS(), _Rest} -> + %% TODO: Verify all files are `eof` at this point? + {last, W}; + Error -> + Error + end. + +write_chunk(W = #writer{files = Files}, RelPath, Pos, More, Chunk) -> + case Files of + #{RelPath := WFile} -> + write_chunk(W, WFile, RelPath, Pos, More, Chunk); + #{} when Pos == 0 -> + case new_writer_file(W, RelPath) of + WFile = #wfile{} -> + write_chunk(W, WFile, RelPath, Pos, More, Chunk); + Error -> + Error + end; + #{} -> + {error, {bad_chunk, RelPath, Pos}} + end. + +write_chunk(W = #writer{files = Files}, WFile0, RelPath, Pos, More, Chunk) -> + case write_chunk_file(WFile0, Pos, More, Chunk) of + WFile = #wfile{} -> + {next, W#writer{files = Files#{RelPath => WFile}}}; + Error -> + Error + end. + +new_writer_file(#writer{dirpath = DirPath}, RelPath) -> + AbsPath = filename:join(DirPath, RelPath), + _ = filelib:ensure_dir(AbsPath), + case file:open(AbsPath, [write, binary, raw]) of + {ok, IoDev} -> + #wfile{ + abspath = AbsPath, + fd = IoDev, + pos = 0 + }; + {error, Reason} -> + {error, {open_failed, AbsPath, Reason}} + end. + +write_chunk_file(WFile0 = #wfile{fd = IoDev, pos = Pos, abspath = AbsPath}, Pos, More, Chunk) -> + ChunkSize = byte_size(Chunk), + case file:write(IoDev, Chunk) of + ok -> + WFile1 = WFile0#wfile{pos = Pos + ChunkSize}, + case More of + 0 -> release_writer_file(WFile1); + _ -> WFile1 + end; + {error, Reason} -> + {error, {write_failed, AbsPath, Reason}} + end; +write_chunk_file(WFile = #wfile{pos = WPos}, Pos, _More, _Chunk) when Pos < WPos -> + WFile; +write_chunk_file(#wfile{abspath = AbsPath}, Pos, _More, _Chunk) -> + {error, {bad_chunk, AbsPath, Pos}}. + +%% @doc Abort the writer and clean up unfinished snapshot files. +-spec abort_writer(writer()) -> ok | {error, file:posix()}. +abort_writer(W = #writer{dirpath = DirPath}) -> + ok = release_writer(W), + file:del_dir_r(DirPath). + +%% @doc Release the writer and close all snapshot files. +-spec release_writer(writer()) -> ok. +release_writer(#writer{files = Files}) -> + ok = lists:foreach(fun release_writer_file/1, maps:values(Files)). + +release_writer_file(WFile = #wfile{fd = eof}) -> + WFile; +release_writer_file(WFile = #wfile{fd = IoDev}) -> + _ = file:close(IoDev), + WFile#wfile{fd = eof}. + +-spec writer_info(bytes_written, writer()) -> _Bytes :: non_neg_integer(). +writer_info(bytes_written, #writer{files = Files}) -> + maps:fold(fun(_, WFile, Sum) -> Sum + WFile#wfile.pos end, 0, Files). + +%% + +make_packet(Header) -> + term_to_binary(Header). + +make_packet(Header, Rest) -> + HeaderBytes = term_to_binary(Header), + <>. + +parse_packet(Packet) -> + try binary_to_term(Packet, [safe, used]) of + {Header = ?PAT_HEADER(), Length} -> + Rest = binary:part(Packet, Length, byte_size(Packet) - Length), + {Header, Rest}; + {Header, _} -> + {error, {bad_header, Header}} + catch + error:badarg -> + {error, bad_packet} + end. diff --git a/apps/emqx_durable_storage/test/emqx_ds_replication_SUITE.erl b/apps/emqx_durable_storage/test/emqx_ds_replication_SUITE.erl new file mode 100644 index 000000000..d1f98b3c3 --- /dev/null +++ b/apps/emqx_durable_storage/test/emqx_ds_replication_SUITE.erl @@ -0,0 +1,202 @@ +%%-------------------------------------------------------------------- +%% Copyright (c) 2024 EMQ Technologies Co., Ltd. All Rights Reserved. +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. +%%-------------------------------------------------------------------- +-module(emqx_ds_replication_SUITE). + +-compile(export_all). +-compile(nowarn_export_all). + +-include_lib("emqx/include/emqx.hrl"). +-include_lib("common_test/include/ct.hrl"). +-include_lib("stdlib/include/assert.hrl"). +-include_lib("snabbkaffe/include/test_macros.hrl"). + +-define(DB, testdb). + +opts() -> + #{ + backend => builtin, + storage => {emqx_ds_storage_bitfield_lts, #{}}, + n_shards => 1, + n_sites => 3, + replication_factor => 3, + replication_options => #{ + wal_max_size_bytes => 128 * 1024, + wal_max_batch_size => 1024, + snapshot_interval => 128 + } + }. + +t_replication_transfers_snapshots(Config) -> + NMsgs = 4000, + Nodes = [Node, NodeOffline | _] = ?config(nodes, Config), + _Specs = [_, SpecOffline | _] = ?config(specs, Config), + + %% Initialize DB on all nodes and wait for it to be online. + ?assertEqual( + [{ok, ok} || _ <- Nodes], + erpc:multicall(Nodes, emqx_ds, open_db, [?DB, opts()]) + ), + ?retry( + 500, + 10, + ?assertMatch([_], shards_online(Node, ?DB)) + ), + + %% Stop the DB on the "offline" node. + ok = emqx_cth_cluster:stop_node(NodeOffline), + + %% Fill the storage with messages and few additional generations. + Messages = fill_storage(Node, ?DB, NMsgs, #{p_addgen => 0.01}), + + %% Restart the node. + [NodeOffline] = emqx_cth_cluster:restart(SpecOffline), + {ok, SRef} = snabbkaffe:subscribe( + ?match_event(#{ + ?snk_kind := dsrepl_snapshot_accepted, + ?snk_meta := #{node := NodeOffline} + }) + ), + ?assertEqual( + ok, + erpc:call(NodeOffline, emqx_ds, open_db, [?DB, opts()]) + ), + + %% Trigger storage operation and wait the replica to be restored. + _ = add_generation(Node, ?DB), + ?assertMatch( + {ok, _}, + snabbkaffe:receive_events(SRef) + ), + + %% Wait until any pending replication activities are finished (e.g. Raft log entries). + ok = timer:sleep(3_000), + + %% Check that the DB has been restored. + Shard = hd(shards(NodeOffline, ?DB)), + MessagesOffline = lists:keysort( + #message.timestamp, + consume(NodeOffline, ?DB, Shard, ['#'], 0) + ), + ?assertEqual( + sample(40, Messages), + sample(40, MessagesOffline) + ), + ?assertEqual( + Messages, + MessagesOffline + ). + +shards(Node, DB) -> + erpc:call(Node, emqx_ds_replication_layer_meta, shards, [DB]). + +shards_online(Node, DB) -> + erpc:call(Node, emqx_ds_builtin_db_sup, which_shards, [DB]). + +fill_storage(Node, DB, NMsgs, Opts) -> + fill_storage(Node, DB, NMsgs, 0, Opts). + +fill_storage(Node, DB, NMsgs, I, Opts = #{p_addgen := PAddGen}) when I < NMsgs -> + R1 = push_message(Node, DB, I), + R2 = probably(PAddGen, fun() -> add_generation(Node, DB) end), + R1 ++ R2 ++ fill_storage(Node, DB, NMsgs, I + 1, Opts); +fill_storage(_Node, _DB, NMsgs, NMsgs, _Opts) -> + []. + +push_message(Node, DB, I) -> + Topic = emqx_topic:join([<<"topic">>, <<"foo">>, integer_to_binary(I)]), + {Bytes, _} = rand:bytes_s(120, rand:seed_s(default, I)), + Message = message(Topic, Bytes, I * 100), + ok = erpc:call(Node, emqx_ds, store_batch, [DB, [Message], #{sync => true}]), + [Message]. + +add_generation(Node, DB) -> + ok = erpc:call(Node, emqx_ds, add_generation, [DB]), + []. + +message(Topic, Payload, PublishedAt) -> + #message{ + from = <>, + topic = Topic, + payload = Payload, + timestamp = PublishedAt, + id = emqx_guid:gen() + }. + +consume(Node, DB, Shard, TopicFilter, StartTime) -> + Streams = erpc:call(Node, emqx_ds_storage_layer, get_streams, [ + {DB, Shard}, TopicFilter, StartTime + ]), + lists:flatmap( + fun({_Rank, Stream}) -> + {ok, It} = erpc:call(Node, emqx_ds_storage_layer, make_iterator, [ + {DB, Shard}, Stream, TopicFilter, StartTime + ]), + consume_stream(Node, DB, Shard, It) + end, + Streams + ). + +consume_stream(Node, DB, Shard, It) -> + case erpc:call(Node, emqx_ds_storage_layer, next, [{DB, Shard}, It, 100]) of + {ok, _NIt, _Msgs = []} -> + []; + {ok, NIt, Batch} -> + [Msg || {_Key, Msg} <- Batch] ++ consume_stream(Node, DB, Shard, NIt); + {ok, end_of_stream} -> + [] + end. + +probably(P, Fun) -> + case rand:uniform() of + X when X < P -> Fun(); + _ -> [] + end. + +sample(N, List) -> + L = length(List), + H = N div 2, + Filler = integer_to_list(L - N) ++ " more", + lists:sublist(List, H) ++ [Filler] ++ lists:sublist(List, L - H, L). + +%% + +suite() -> [{timetrap, {seconds, 60}}]. + +all() -> emqx_common_test_helpers:all(?MODULE). + +init_per_testcase(TCName, Config) -> + Apps = [ + {emqx_durable_storage, #{ + before_start => fun snabbkaffe:fix_ct_logging/0, + override_env => [{egress_flush_interval, 1}] + }} + ], + WorkDir = emqx_cth_suite:work_dir(TCName, Config), + NodeSpecs = emqx_cth_cluster:mk_nodespecs( + [ + {emqx_ds_replication_SUITE1, #{apps => Apps}}, + {emqx_ds_replication_SUITE2, #{apps => Apps}}, + {emqx_ds_replication_SUITE3, #{apps => Apps}} + ], + #{work_dir => WorkDir} + ), + Nodes = emqx_cth_cluster:start(NodeSpecs), + ok = snabbkaffe:start_trace(), + [{nodes, Nodes}, {specs, NodeSpecs} | Config]. + +end_per_testcase(_TCName, Config) -> + ok = snabbkaffe:stop(), + ok = emqx_cth_cluster:stop(?config(nodes, Config)). diff --git a/apps/emqx_durable_storage/test/emqx_ds_storage_snapshot_SUITE.erl b/apps/emqx_durable_storage/test/emqx_ds_storage_snapshot_SUITE.erl new file mode 100644 index 000000000..a081267bc --- /dev/null +++ b/apps/emqx_durable_storage/test/emqx_ds_storage_snapshot_SUITE.erl @@ -0,0 +1,149 @@ +%%-------------------------------------------------------------------- +%% Copyright (c) 2024 EMQ Technologies Co., Ltd. All Rights Reserved. +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. +%%-------------------------------------------------------------------- +-module(emqx_ds_storage_snapshot_SUITE). + +-compile(export_all). +-compile(nowarn_export_all). + +-include_lib("emqx/include/emqx.hrl"). +-include_lib("common_test/include/ct.hrl"). +-include_lib("stdlib/include/assert.hrl"). + +opts() -> + #{storage => {emqx_ds_storage_bitfield_lts, #{}}}. + +%% + +t_snapshot_take_restore(_Config) -> + Shard = {?FUNCTION_NAME, _ShardId = <<"42">>}, + {ok, Pid} = emqx_ds_storage_layer:start_link(Shard, opts()), + + %% Push some messages to the shard. + Msgs1 = [gen_message(N) || N <- lists:seq(1000, 2000)], + ?assertEqual(ok, emqx_ds_storage_layer:store_batch(Shard, mk_batch(Msgs1), #{})), + + %% Add new generation and push some more. + ?assertEqual(ok, emqx_ds_storage_layer:add_generation(Shard, 3000)), + Msgs2 = [gen_message(N) || N <- lists:seq(4000, 5000)], + ?assertEqual(ok, emqx_ds_storage_layer:store_batch(Shard, mk_batch(Msgs2), #{})), + ?assertEqual(ok, emqx_ds_storage_layer:add_generation(Shard, 6000)), + + %% Take a snapshot of the shard. + {ok, SnapReader} = emqx_ds_storage_layer:take_snapshot(Shard), + + %% Push even more messages to the shard AFTER taking the snapshot. + Msgs3 = [gen_message(N) || N <- lists:seq(7000, 8000)], + ?assertEqual(ok, emqx_ds_storage_layer:store_batch(Shard, mk_batch(Msgs3), #{})), + + %% Destroy the shard. + _ = unlink(Pid), + ok = proc_lib:stop(Pid, shutdown, infinity), + ok = emqx_ds_storage_layer:drop_shard(Shard), + + %% Restore the shard from the snapshot. + {ok, SnapWriter} = emqx_ds_storage_layer:accept_snapshot(Shard), + ?assertEqual(ok, transfer_snapshot(SnapReader, SnapWriter)), + + %% Verify that the restored shard contains the messages up until the snapshot. + {ok, _Pid} = emqx_ds_storage_layer:start_link(Shard, opts()), + ?assertEqual( + Msgs1 ++ Msgs2, + lists:keysort(#message.timestamp, consume(Shard, ['#'])) + ). + +mk_batch(Msgs) -> + [{emqx_message:timestamp(Msg, microsecond), Msg} || Msg <- Msgs]. + +gen_message(N) -> + Topic = emqx_topic:join([<<"foo">>, <<"bar">>, integer_to_binary(N)]), + message(Topic, integer_to_binary(N), N * 100). + +message(Topic, Payload, PublishedAt) -> + #message{ + from = <>, + topic = Topic, + payload = Payload, + timestamp = PublishedAt, + id = emqx_guid:gen() + }. + +transfer_snapshot(Reader, Writer) -> + ChunkSize = rand:uniform(1024), + case emqx_ds_storage_snapshot:read_chunk(Reader, ChunkSize) of + {RStatus, Chunk, NReader} -> + Data = iolist_to_binary(Chunk), + {WStatus, NWriter} = emqx_ds_storage_snapshot:write_chunk(Writer, Data), + %% Verify idempotency. + ?assertEqual( + {WStatus, NWriter}, + emqx_ds_storage_snapshot:write_chunk(Writer, Data) + ), + %% Verify convergence. + ?assertEqual( + RStatus, + WStatus, + #{reader => NReader, writer => NWriter} + ), + case WStatus of + last -> + ?assertEqual(ok, emqx_ds_storage_snapshot:release_reader(NReader)), + ?assertEqual(ok, emqx_ds_storage_snapshot:release_writer(NWriter)), + ok; + next -> + transfer_snapshot(NReader, NWriter) + end; + {error, Reason} -> + {error, Reason, Reader} + end. + +consume(Shard, TopicFilter) -> + consume(Shard, TopicFilter, 0). + +consume(Shard, TopicFilter, StartTime) -> + Streams = emqx_ds_storage_layer:get_streams(Shard, TopicFilter, StartTime), + lists:flatmap( + fun({_Rank, Stream}) -> + {ok, It} = emqx_ds_storage_layer:make_iterator(Shard, Stream, TopicFilter, StartTime), + consume_stream(Shard, It) + end, + Streams + ). + +consume_stream(Shard, It) -> + case emqx_ds_storage_layer:next(Shard, It, 100) of + {ok, _NIt, _Msgs = []} -> + []; + {ok, NIt, Batch} -> + [Msg || {_DSKey, Msg} <- Batch] ++ consume_stream(Shard, NIt); + {ok, end_of_stream} -> + [] + end. + +%% + +all() -> emqx_common_test_helpers:all(?MODULE). + +init_per_testcase(TCName, Config) -> + WorkDir = emqx_cth_suite:work_dir(TCName, Config), + Apps = emqx_cth_suite:start( + [{emqx_durable_storage, #{override_env => [{db_data_dir, WorkDir}]}}], + #{work_dir => WorkDir} + ), + [{apps, Apps} | Config]. + +end_per_testcase(_TCName, Config) -> + ok = emqx_cth_suite:stop(?config(apps, Config)), + ok. From 2cd357a5bd30177c86cf07efc17ef707146f3ed7 Mon Sep 17 00:00:00 2001 From: Andrew Mayorov Date: Mon, 25 Mar 2024 18:26:12 +0100 Subject: [PATCH 034/234] fix(ds): ensure store batch is idempotent wrt generations --- .../src/emqx_ds_storage_layer.erl | 48 +++++---- ...ot_SUITE.erl => emqx_ds_storage_SUITE.erl} | 98 ++++++++++++------- 2 files changed, 90 insertions(+), 56 deletions(-) rename apps/emqx_durable_storage/test/{emqx_ds_storage_snapshot_SUITE.erl => emqx_ds_storage_SUITE.erl} (62%) diff --git a/apps/emqx_durable_storage/src/emqx_ds_storage_layer.erl b/apps/emqx_durable_storage/src/emqx_ds_storage_layer.erl index 2a3829ac3..6b85328b6 100644 --- a/apps/emqx_durable_storage/src/emqx_ds_storage_layer.erl +++ b/apps/emqx_durable_storage/src/emqx_ds_storage_layer.erl @@ -251,23 +251,13 @@ drop_shard(Shard) -> emqx_ds:message_store_opts() ) -> emqx_ds:store_batch_result(). -store_batch(Shard, Messages0, Options) -> - %% We always store messages in the current generation: - GenId = generation_current(Shard), - #{module := Mod, data := GenData, since := Since} = generation_get(Shard, GenId), - case Messages0 of - [{Time, _Msg} | Rest] when Time < Since -> - %% FIXME: log / feedback - Messages = skip_outdated_messages(Since, Rest); - _ -> - Messages = Messages0 - end, - Mod:store_batch(Shard, GenData, Messages, Options). - -skip_outdated_messages(Since, [{Time, _Msg} | Rest]) when Time < Since -> - skip_outdated_messages(Since, Rest); -skip_outdated_messages(_Since, Messages) -> - Messages. +store_batch(Shard, Messages = [{Time, _Msg} | _], Options) -> + %% NOTE + %% We assume that batches do not span generations. Callers should enforce this. + #{module := Mod, data := GenData} = generation_at(Shard, Time), + Mod:store_batch(Shard, GenData, Messages, Options); +store_batch(_Shard, [], _Options) -> + ok. -spec get_streams(shard_id(), emqx_ds:topic_filter(), emqx_ds:time()) -> [{integer(), stream()}]. @@ -715,7 +705,7 @@ create_new_shard_schema(ShardId, DB, CFRefs, Prototype) -> {gen_id(), shard_schema(), cf_refs()}. new_generation(ShardId, DB, Schema0, Since) -> #{current_generation := PrevGenId, prototype := {Mod, ModConf}} = Schema0, - GenId = PrevGenId + 1, + GenId = next_generation_id(PrevGenId), {GenData, NewCFRefs} = Mod:create(ShardId, DB, GenId, ModConf), GenSchema = #{ module => Mod, @@ -731,6 +721,14 @@ new_generation(ShardId, DB, Schema0, Since) -> }, {GenId, Schema, NewCFRefs}. +-spec next_generation_id(gen_id()) -> gen_id(). +next_generation_id(GenId) -> + GenId + 1. + +-spec prev_generation_id(gen_id()) -> gen_id(). +prev_generation_id(GenId) when GenId > 0 -> + GenId - 1. + %% @doc Commit current state of the server to both rocksdb and the persistent term -spec commit_metadata(server_state()) -> ok. commit_metadata(#s{shard_id = ShardId, schema = Schema, shard = Runtime, db = DB}) -> @@ -854,6 +852,20 @@ generations_since(Shard, Since) -> Schema ). +-spec generation_at(shard_id(), emqx_ds:time()) -> generation(). +generation_at(Shard, Time) -> + Schema = #{current_generation := Current} = get_schema_runtime(Shard), + generation_at(Time, Current, Schema). + +generation_at(Time, GenId, Schema) -> + #{?GEN_KEY(GenId) := Gen} = Schema, + case Gen of + #{since := Since} when Time < Since andalso GenId > 0 -> + generation_at(Time, prev_generation_id(GenId), Schema); + _ -> + Gen + end. + -define(PERSISTENT_TERM(SHARD), {emqx_ds_storage_layer, SHARD}). -spec get_schema_runtime(shard_id()) -> shard(). diff --git a/apps/emqx_durable_storage/test/emqx_ds_storage_snapshot_SUITE.erl b/apps/emqx_durable_storage/test/emqx_ds_storage_SUITE.erl similarity index 62% rename from apps/emqx_durable_storage/test/emqx_ds_storage_snapshot_SUITE.erl rename to apps/emqx_durable_storage/test/emqx_ds_storage_SUITE.erl index a081267bc..a290a4c30 100644 --- a/apps/emqx_durable_storage/test/emqx_ds_storage_snapshot_SUITE.erl +++ b/apps/emqx_durable_storage/test/emqx_ds_storage_SUITE.erl @@ -13,7 +13,7 @@ %% See the License for the specific language governing permissions and %% limitations under the License. %%-------------------------------------------------------------------- --module(emqx_ds_storage_snapshot_SUITE). +-module(emqx_ds_storage_SUITE). -compile(export_all). -compile(nowarn_export_all). @@ -27,18 +27,37 @@ opts() -> %% +t_idempotent_store_batch(_Config) -> + Shard = {?FUNCTION_NAME, _ShardId = <<"42">>}, + {ok, Pid} = emqx_ds_storage_layer:start_link(Shard, opts()), + %% Push some messages to the shard. + Msgs1 = [gen_message(N) || N <- lists:seq(10, 20)], + GenTs = 30, + Msgs2 = [gen_message(N) || N <- lists:seq(40, 50)], + ?assertEqual(ok, emqx_ds_storage_layer:store_batch(Shard, batch(Msgs1), #{})), + %% Add new generation and push the same batch + some more. + ?assertEqual(ok, emqx_ds_storage_layer:add_generation(Shard, GenTs)), + ?assertEqual(ok, emqx_ds_storage_layer:store_batch(Shard, batch(Msgs1), #{})), + ?assertEqual(ok, emqx_ds_storage_layer:store_batch(Shard, batch(Msgs2), #{})), + %% First batch should have been handled idempotently. + ?assertEqual( + Msgs1 ++ Msgs2, + lists:keysort(#message.timestamp, consume(Shard, ['#'])) + ), + ok = stop_shard(Pid). + t_snapshot_take_restore(_Config) -> Shard = {?FUNCTION_NAME, _ShardId = <<"42">>}, {ok, Pid} = emqx_ds_storage_layer:start_link(Shard, opts()), %% Push some messages to the shard. Msgs1 = [gen_message(N) || N <- lists:seq(1000, 2000)], - ?assertEqual(ok, emqx_ds_storage_layer:store_batch(Shard, mk_batch(Msgs1), #{})), + ?assertEqual(ok, emqx_ds_storage_layer:store_batch(Shard, batch(Msgs1), #{})), %% Add new generation and push some more. ?assertEqual(ok, emqx_ds_storage_layer:add_generation(Shard, 3000)), Msgs2 = [gen_message(N) || N <- lists:seq(4000, 5000)], - ?assertEqual(ok, emqx_ds_storage_layer:store_batch(Shard, mk_batch(Msgs2), #{})), + ?assertEqual(ok, emqx_ds_storage_layer:store_batch(Shard, batch(Msgs2), #{})), ?assertEqual(ok, emqx_ds_storage_layer:add_generation(Shard, 6000)), %% Take a snapshot of the shard. @@ -46,11 +65,10 @@ t_snapshot_take_restore(_Config) -> %% Push even more messages to the shard AFTER taking the snapshot. Msgs3 = [gen_message(N) || N <- lists:seq(7000, 8000)], - ?assertEqual(ok, emqx_ds_storage_layer:store_batch(Shard, mk_batch(Msgs3), #{})), + ?assertEqual(ok, emqx_ds_storage_layer:store_batch(Shard, batch(Msgs3), #{})), %% Destroy the shard. - _ = unlink(Pid), - ok = proc_lib:stop(Pid, shutdown, infinity), + ok = stop_shard(Pid), ok = emqx_ds_storage_layer:drop_shard(Shard), %% Restore the shard from the snapshot. @@ -64,12 +82,41 @@ t_snapshot_take_restore(_Config) -> lists:keysort(#message.timestamp, consume(Shard, ['#'])) ). -mk_batch(Msgs) -> - [{emqx_message:timestamp(Msg, microsecond), Msg} || Msg <- Msgs]. +transfer_snapshot(Reader, Writer) -> + ChunkSize = rand:uniform(1024), + ReadResult = emqx_ds_storage_snapshot:read_chunk(Reader, ChunkSize), + ?assertMatch({RStatus, _, _} when RStatus == next; RStatus == last, ReadResult), + {RStatus, Chunk, NReader} = ReadResult, + Data = iolist_to_binary(Chunk), + {WStatus, NWriter} = emqx_ds_storage_snapshot:write_chunk(Writer, Data), + %% Verify idempotency. + ?assertMatch( + {WStatus, NWriter}, + emqx_ds_storage_snapshot:write_chunk(NWriter, Data) + ), + %% Verify convergence. + ?assertEqual( + RStatus, + WStatus, + #{reader => NReader, writer => NWriter} + ), + case WStatus of + last -> + ?assertEqual(ok, emqx_ds_storage_snapshot:release_reader(NReader)), + ?assertEqual(ok, emqx_ds_storage_snapshot:release_writer(NWriter)), + ok; + next -> + transfer_snapshot(NReader, NWriter) + end. + +%% + +batch(Msgs) -> + [{emqx_message:timestamp(Msg), Msg} || Msg <- Msgs]. gen_message(N) -> Topic = emqx_topic:join([<<"foo">>, <<"bar">>, integer_to_binary(N)]), - message(Topic, integer_to_binary(N), N * 100). + message(Topic, crypto:strong_rand_bytes(16), N). message(Topic, Payload, PublishedAt) -> #message{ @@ -80,35 +127,6 @@ message(Topic, Payload, PublishedAt) -> id = emqx_guid:gen() }. -transfer_snapshot(Reader, Writer) -> - ChunkSize = rand:uniform(1024), - case emqx_ds_storage_snapshot:read_chunk(Reader, ChunkSize) of - {RStatus, Chunk, NReader} -> - Data = iolist_to_binary(Chunk), - {WStatus, NWriter} = emqx_ds_storage_snapshot:write_chunk(Writer, Data), - %% Verify idempotency. - ?assertEqual( - {WStatus, NWriter}, - emqx_ds_storage_snapshot:write_chunk(Writer, Data) - ), - %% Verify convergence. - ?assertEqual( - RStatus, - WStatus, - #{reader => NReader, writer => NWriter} - ), - case WStatus of - last -> - ?assertEqual(ok, emqx_ds_storage_snapshot:release_reader(NReader)), - ?assertEqual(ok, emqx_ds_storage_snapshot:release_writer(NWriter)), - ok; - next -> - transfer_snapshot(NReader, NWriter) - end; - {error, Reason} -> - {error, Reason, Reader} - end. - consume(Shard, TopicFilter) -> consume(Shard, TopicFilter, 0). @@ -132,6 +150,10 @@ consume_stream(Shard, It) -> [] end. +stop_shard(Pid) -> + _ = unlink(Pid), + proc_lib:stop(Pid, shutdown, infinity). + %% all() -> emqx_common_test_helpers:all(?MODULE). From d31cd0c728c049eaf4bab715a75660f669d508cf Mon Sep 17 00:00:00 2001 From: Andrew Mayorov Date: Mon, 25 Mar 2024 18:27:24 +0100 Subject: [PATCH 035/234] feat(ds): ensure LTS state ids are deterministic --- apps/emqx_durable_storage/src/emqx_ds_lts.erl | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/apps/emqx_durable_storage/src/emqx_ds_lts.erl b/apps/emqx_durable_storage/src/emqx_ds_lts.erl index 6ebfc820d..bd7cb3826 100644 --- a/apps/emqx_durable_storage/src/emqx_ds_lts.erl +++ b/apps/emqx_durable_storage/src/emqx_ds_lts.erl @@ -263,12 +263,14 @@ trie_insert(#trie{trie = Trie, stats = Stats, persist = Persist}, State, Token, end. -spec get_id_for_key(trie(), state(), edge()) -> static_key(). -get_id_for_key(#trie{static_key_size = Size}, _State, _Token) -> +get_id_for_key(#trie{static_key_size = Size}, State, Token) when Size =< 32 -> %% Requirements for the return value: %% %% It should be globally unique for the `{State, Token}` pair. Other %% than that, there's no requirements. The return value doesn't even %% have to be deterministic, since the states are saved in the trie. + %% Yet, it helps a lot if it is, so that applying the same sequence + %% of topics to different tries will result in the same trie state. %% %% The generated value becomes the ID of the topic in the durable %% storage. Its size should be relatively small to reduce the @@ -277,7 +279,7 @@ get_id_for_key(#trie{static_key_size = Size}, _State, _Token) -> %% If we want to impress computer science crowd, sorry, I mean to %% minimize storage requirements, we can even employ Huffman coding %% based on the frequency of messages. - <> = crypto:strong_rand_bytes(Size), + <> = crypto:hash(sha256, term_to_binary([State | Token])), Int. %% erlfmt-ignore From e8b06a6a9f28afed47d6da620d47c0f4a7a35d84 Mon Sep 17 00:00:00 2001 From: Andrew Mayorov Date: Mon, 25 Mar 2024 19:45:56 +0100 Subject: [PATCH 036/234] chore(dsrepl): mark few more BPAPI targets as obsolete --- .../src/emqx_ds_replication_layer.erl | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/apps/emqx_durable_storage/src/emqx_ds_replication_layer.erl b/apps/emqx_durable_storage/src/emqx_ds_replication_layer.erl index b1b8fef36..14c2268b8 100644 --- a/apps/emqx_durable_storage/src/emqx_ds_replication_layer.erl +++ b/apps/emqx_durable_storage/src/emqx_ds_replication_layer.erl @@ -43,7 +43,6 @@ -export([ %% RPC Targets: do_drop_db_v1/1, - do_store_batch_v1/4, do_get_streams_v1/4, do_get_streams_v2/4, do_make_iterator_v2/5, @@ -53,11 +52,11 @@ do_get_delete_streams_v4/4, do_make_delete_iterator_v4/5, do_delete_next_v4/5, - %% Unused: - do_drop_generation_v3/3, %% Obsolete: + do_store_batch_v1/4, do_make_iterator_v1/5, do_add_generation_v2/1, + do_drop_generation_v3/3, %% Egress API: ra_store_batch/3 @@ -139,6 +138,8 @@ -type message_id() :: emqx_ds:message_id(). +%% TODO: this type is obsolete and is kept only for compatibility with +%% BPAPIs. Remove it when emqx_ds_proto_v4 is gone (EMQX 5.6) -type batch() :: #{ ?tag := ?BATCH, ?batch_messages := [emqx_types:message()] @@ -400,10 +401,9 @@ do_drop_db_v1(DB) -> batch(), emqx_ds:message_store_opts() ) -> - emqx_ds:store_batch_result(). -do_store_batch_v1(DB, Shard, #{?tag := ?BATCH, ?batch_messages := Messages}, Options) -> - Batch = [{emqx_message:timestamp(Message), Message} || Message <- Messages], - emqx_ds_storage_layer:store_batch({DB, Shard}, Batch, Options). + no_return(). +do_store_batch_v1(_DB, _Shard, _Batch, _Options) -> + error(obsolete_api). %% Remove me in EMQX 5.6 -dialyzer({nowarn_function, do_get_streams_v1/4}). @@ -516,9 +516,9 @@ do_list_generations_with_lifetimes_v3(DB, Shard) -> ). -spec do_drop_generation_v3(emqx_ds:db(), shard_id(), emqx_ds_storage_layer:gen_id()) -> - ok | {error, _}. -do_drop_generation_v3(DB, ShardId, GenId) -> - emqx_ds_storage_layer:drop_generation({DB, ShardId}, GenId). + no_return(). +do_drop_generation_v3(_DB, _ShardId, _GenId) -> + error(obsolete_api). -spec do_get_delete_streams_v4( emqx_ds:db(), emqx_ds_replication_layer:shard_id(), emqx_ds:topic_filter(), emqx_ds:time() From e029b8f99620214ed9049084bfd77959190cf764 Mon Sep 17 00:00:00 2001 From: Andrew Mayorov Date: Wed, 27 Mar 2024 15:46:07 +0100 Subject: [PATCH 037/234] test(dsrepl): wait for whole cluster readiness To minimize the chance of flaky tests due to the shards not being completely online. Co-Authored-By: Thales Macedo Garitezi --- apps/emqx_durable_storage/test/emqx_ds_replication_SUITE.erl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/apps/emqx_durable_storage/test/emqx_ds_replication_SUITE.erl b/apps/emqx_durable_storage/test/emqx_ds_replication_SUITE.erl index d1f98b3c3..5ff1d5fb2 100644 --- a/apps/emqx_durable_storage/test/emqx_ds_replication_SUITE.erl +++ b/apps/emqx_durable_storage/test/emqx_ds_replication_SUITE.erl @@ -52,7 +52,7 @@ t_replication_transfers_snapshots(Config) -> ?retry( 500, 10, - ?assertMatch([_], shards_online(Node, ?DB)) + ?assertMatch([[_], [_], [_]], [shards_online(N, ?DB) || N <- Nodes]) ), %% Stop the DB on the "offline" node. From 7cebf598a81baf8dd03482787c3fe3fd01f12fdf Mon Sep 17 00:00:00 2001 From: Andrew Mayorov Date: Wed, 27 Mar 2024 15:48:09 +0100 Subject: [PATCH 038/234] chore(dsrepl): simplify snapshot transfer code a bit Co-Authored-By: Thales Macedo Garitezi --- .../emqx_durable_storage/src/emqx_ds_storage_snapshot.erl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/apps/emqx_durable_storage/src/emqx_ds_storage_snapshot.erl b/apps/emqx_durable_storage/src/emqx_ds_storage_snapshot.erl index ab605e40e..74459893b 100644 --- a/apps/emqx_durable_storage/src/emqx_ds_storage_snapshot.erl +++ b/apps/emqx_durable_storage/src/emqx_ds_storage_snapshot.erl @@ -145,11 +145,11 @@ read_chunk_file(RelPath, RFile0 = #rfile{fd = IoDev, pos = Pos, abspath = AbsPat ChunkSize = byte_size(Chunk), HasMore = ChunkSize div Size, RFile1 = RFile0#rfile{pos = Pos + ChunkSize}, - case ChunkSize < Size of - false -> + case HasMore of + _Yes = 1 -> Status = next, RFile = RFile1; - true -> + _No = 0 -> Status = last, RFile = release_reader_file(RFile1) end, @@ -315,7 +315,7 @@ make_packet(Header, Rest) -> parse_packet(Packet) -> try binary_to_term(Packet, [safe, used]) of {Header = ?PAT_HEADER(), Length} -> - Rest = binary:part(Packet, Length, byte_size(Packet) - Length), + {_, Rest} = split_binary(Packet, Length), {Header, Rest}; {Header, _} -> {error, {bad_header, Header}} From c666c65c6ae31c43b4b3a52d8d33cc92c5d17c7f Mon Sep 17 00:00:00 2001 From: Andrew Mayorov Date: Thu, 28 Mar 2024 15:11:45 +0100 Subject: [PATCH 039/234] test(ds): factor out storage iteration into helper module --- .../test/emqx_ds_SUITE.erl | 80 +++++-------------- .../test/emqx_ds_replication_SUITE.erl | 23 +----- .../test/emqx_ds_storage_SUITE.erl | 27 +------ .../test/emqx_ds_test_helpers.erl | 60 ++++++++++++++ 4 files changed, 85 insertions(+), 105 deletions(-) diff --git a/apps/emqx_durable_storage/test/emqx_ds_SUITE.erl b/apps/emqx_durable_storage/test/emqx_ds_SUITE.erl index 64d81307c..3df16dc1c 100644 --- a/apps/emqx_durable_storage/test/emqx_ds_SUITE.erl +++ b/apps/emqx_durable_storage/test/emqx_ds_SUITE.erl @@ -98,8 +98,8 @@ t_03_smoke_iterate(_Config) -> ?assertMatch(ok, emqx_ds:store_batch(DB, Msgs)), [{_, Stream}] = emqx_ds:get_streams(DB, TopicFilter, StartTime), {ok, Iter0} = emqx_ds:make_iterator(DB, Stream, TopicFilter, StartTime), - {ok, Iter, Batch} = iterate(DB, Iter0, 1), - ?assertEqual(Msgs, [Msg || {_Key, Msg} <- Batch], {Iter0, Iter}). + {ok, Iter, Batch} = emqx_ds_test_helpers:consume_iter(DB, Iter0), + ?assertEqual(Msgs, Batch, {Iter0, Iter}). %% Verify that iterators survive restart of the application. This is %% an important property, since the lifetime of the iterators is tied @@ -125,8 +125,8 @@ t_04_restart(_Config) -> {ok, _} = application:ensure_all_started(emqx_durable_storage), ok = emqx_ds:open_db(DB, opts()), %% The old iterator should be still operational: - {ok, Iter, Batch} = iterate(DB, Iter0, 1), - ?assertEqual(Msgs, [Msg || {_Key, Msg} <- Batch], {Iter0, Iter}). + {ok, Iter, Batch} = emqx_ds_test_helpers:consume_iter(DB, Iter0), + ?assertEqual(Msgs, Batch, {Iter0, Iter}). %% Check that we can create iterators directly from DS keys. t_05_update_iterator(_Config) -> @@ -148,9 +148,8 @@ t_05_update_iterator(_Config) -> Res1 = emqx_ds:update_iterator(DB, OldIter, Key0), ?assertMatch({ok, _Iter1}, Res1), {ok, Iter1} = Res1, - {ok, FinalIter, Batch} = iterate(DB, Iter1, 1), - AllMsgs = [Msg0 | [Msg || {_Key, Msg} <- Batch]], - ?assertEqual(Msgs, AllMsgs, #{from_key => Iter1, final_iter => FinalIter}), + {ok, Iter, Batch} = emqx_ds_test_helpers:consume_iter(DB, Iter1, #{batch_size => 1}), + ?assertEqual(Msgs, [Msg0 | Batch], #{from_key => Iter1, final_iter => Iter}), ok. t_06_update_config(_Config) -> @@ -190,9 +189,9 @@ t_06_update_config(_Config) -> ), Checker = fun({StartTime, Msgs0}, Acc) -> - Msgs = Msgs0 ++ Acc, - Batch = fetch_all(DB, TopicFilter, StartTime), - ?assertEqual(Msgs, Batch, {StartTime}), + Msgs = Acc ++ Msgs0, + Batch = emqx_ds_test_helpers:consume(DB, TopicFilter, StartTime), + ?assertEqual(Msgs, Batch, StartTime), Msgs end, lists:foldl(Checker, [], lists:zip(StartTimes, MsgsList)). @@ -234,9 +233,9 @@ t_07_add_generation(_Config) -> ), Checker = fun({StartTime, Msgs0}, Acc) -> - Msgs = Msgs0 ++ Acc, - Batch = fetch_all(DB, TopicFilter, StartTime), - ?assertEqual(Msgs, Batch, {StartTime}), + Msgs = Acc ++ Msgs0, + Batch = emqx_ds_test_helpers:consume(DB, TopicFilter, StartTime), + ?assertEqual(Msgs, Batch, StartTime), Msgs end, lists:foldl(Checker, [], lists:zip(StartTimes, MsgsList)). @@ -398,9 +397,8 @@ t_smoke_delete_next(_Config) -> TopicFilterHash = ['#'], [{_, Stream}] = emqx_ds:get_streams(DB, TopicFilterHash, StartTime), - {ok, Iter0} = emqx_ds:make_iterator(DB, Stream, TopicFilterHash, StartTime), - {ok, _Iter, Batch} = iterate(DB, Iter0, 1), - ?assertEqual([Msg1, Msg3], [Msg || {_Key, Msg} <- Batch]), + Batch = emqx_ds_test_helpers:consume_stream(DB, Stream, TopicFilterHash, StartTime), + ?assertEqual([Msg1, Msg3], Batch), ok = emqx_ds:add_generation(DB), @@ -444,9 +442,9 @@ t_drop_generation_with_never_used_iterator(_Config) -> ], ?assertMatch(ok, emqx_ds:store_batch(DB, Msgs1)), - ?assertMatch( - {error, unrecoverable, generation_not_found, []}, - iterate(DB, Iter0, 1) + ?assertError( + {error, unrecoverable, generation_not_found}, + emqx_ds_test_helpers:consume_iter(DB, Iter0) ), %% New iterator for the new stream will only see the later messages. @@ -454,9 +452,9 @@ t_drop_generation_with_never_used_iterator(_Config) -> ?assertNotEqual(Stream0, Stream1), {ok, Iter1} = emqx_ds:make_iterator(DB, Stream1, TopicFilter, StartTime), - {ok, Iter, Batch} = iterate(DB, Iter1, 1), + {ok, Iter, Batch} = emqx_ds_test_helpers:consume_iter(DB, Iter1, #{batch_size => 1}), ?assertNotEqual(end_of_stream, Iter), - ?assertEqual(Msgs1, [Msg || {_Key, Msg} <- Batch]), + ?assertEqual(Msgs1, Batch), ok. @@ -496,9 +494,9 @@ t_drop_generation_with_used_once_iterator(_Config) -> ], ?assertMatch(ok, emqx_ds:store_batch(DB, Msgs1)), - ?assertMatch( - {error, unrecoverable, generation_not_found, []}, - iterate(DB, Iter1, 1) + ?assertError( + {error, unrecoverable, generation_not_found}, + emqx_ds_test_helpers:consume_iter(DB, Iter1) ). t_drop_generation_update_iterator(_Config) -> @@ -702,25 +700,6 @@ update_data_set() -> ] ]. -fetch_all(DB, TopicFilter, StartTime) -> - Streams0 = emqx_ds:get_streams(DB, TopicFilter, StartTime), - Streams = lists:sort( - fun({{_, A}, _}, {{_, B}, _}) -> - A < B - end, - Streams0 - ), - lists:foldl( - fun({_, Stream}, Acc) -> - {ok, Iter0} = emqx_ds:make_iterator(DB, Stream, TopicFilter, StartTime), - {ok, _, Msgs0} = iterate(DB, Iter0, StartTime), - Msgs = lists:map(fun({_, Msg}) -> Msg end, Msgs0), - Acc ++ Msgs - end, - [], - Streams - ). - message(ClientId, Topic, Payload, PublishedAt) -> Msg = message(Topic, Payload, PublishedAt), Msg#message{from = ClientId}. @@ -733,21 +712,6 @@ message(Topic, Payload, PublishedAt) -> id = emqx_guid:gen() }. -iterate(DB, It, BatchSize) -> - iterate(DB, It, BatchSize, []). - -iterate(DB, It0, BatchSize, Acc) -> - case emqx_ds:next(DB, It0, BatchSize) of - {ok, It, []} -> - {ok, It, Acc}; - {ok, It, Msgs} -> - iterate(DB, It, BatchSize, Acc ++ Msgs); - {ok, end_of_stream} -> - {ok, end_of_stream, Acc}; - {error, Class, Reason} -> - {error, Class, Reason, Acc} - end. - delete(DB, It, Selector, BatchSize) -> delete(DB, It, Selector, BatchSize, 0). diff --git a/apps/emqx_durable_storage/test/emqx_ds_replication_SUITE.erl b/apps/emqx_durable_storage/test/emqx_ds_replication_SUITE.erl index 5ff1d5fb2..24e7cdafb 100644 --- a/apps/emqx_durable_storage/test/emqx_ds_replication_SUITE.erl +++ b/apps/emqx_durable_storage/test/emqx_ds_replication_SUITE.erl @@ -136,28 +136,7 @@ message(Topic, Payload, PublishedAt) -> }. consume(Node, DB, Shard, TopicFilter, StartTime) -> - Streams = erpc:call(Node, emqx_ds_storage_layer, get_streams, [ - {DB, Shard}, TopicFilter, StartTime - ]), - lists:flatmap( - fun({_Rank, Stream}) -> - {ok, It} = erpc:call(Node, emqx_ds_storage_layer, make_iterator, [ - {DB, Shard}, Stream, TopicFilter, StartTime - ]), - consume_stream(Node, DB, Shard, It) - end, - Streams - ). - -consume_stream(Node, DB, Shard, It) -> - case erpc:call(Node, emqx_ds_storage_layer, next, [{DB, Shard}, It, 100]) of - {ok, _NIt, _Msgs = []} -> - []; - {ok, NIt, Batch} -> - [Msg || {_Key, Msg} <- Batch] ++ consume_stream(Node, DB, Shard, NIt); - {ok, end_of_stream} -> - [] - end. + erpc:call(Node, emqx_ds_test_helpers, storage_consume, [{DB, Shard}, TopicFilter, StartTime]). probably(P, Fun) -> case rand:uniform() of diff --git a/apps/emqx_durable_storage/test/emqx_ds_storage_SUITE.erl b/apps/emqx_durable_storage/test/emqx_ds_storage_SUITE.erl index a290a4c30..eaddab0c6 100644 --- a/apps/emqx_durable_storage/test/emqx_ds_storage_SUITE.erl +++ b/apps/emqx_durable_storage/test/emqx_ds_storage_SUITE.erl @@ -42,7 +42,7 @@ t_idempotent_store_batch(_Config) -> %% First batch should have been handled idempotently. ?assertEqual( Msgs1 ++ Msgs2, - lists:keysort(#message.timestamp, consume(Shard, ['#'])) + lists:keysort(#message.timestamp, emqx_ds_test_helpers:storage_consume(Shard, ['#'])) ), ok = stop_shard(Pid). @@ -79,7 +79,7 @@ t_snapshot_take_restore(_Config) -> {ok, _Pid} = emqx_ds_storage_layer:start_link(Shard, opts()), ?assertEqual( Msgs1 ++ Msgs2, - lists:keysort(#message.timestamp, consume(Shard, ['#'])) + lists:keysort(#message.timestamp, emqx_ds_test_helpers:storage_consume(Shard, ['#'])) ). transfer_snapshot(Reader, Writer) -> @@ -127,29 +127,6 @@ message(Topic, Payload, PublishedAt) -> id = emqx_guid:gen() }. -consume(Shard, TopicFilter) -> - consume(Shard, TopicFilter, 0). - -consume(Shard, TopicFilter, StartTime) -> - Streams = emqx_ds_storage_layer:get_streams(Shard, TopicFilter, StartTime), - lists:flatmap( - fun({_Rank, Stream}) -> - {ok, It} = emqx_ds_storage_layer:make_iterator(Shard, Stream, TopicFilter, StartTime), - consume_stream(Shard, It) - end, - Streams - ). - -consume_stream(Shard, It) -> - case emqx_ds_storage_layer:next(Shard, It, 100) of - {ok, _NIt, _Msgs = []} -> - []; - {ok, NIt, Batch} -> - [Msg || {_DSKey, Msg} <- Batch] ++ consume_stream(Shard, NIt); - {ok, end_of_stream} -> - [] - end. - stop_shard(Pid) -> _ = unlink(Pid), proc_lib:stop(Pid, shutdown, infinity). diff --git a/apps/emqx_durable_storage/test/emqx_ds_test_helpers.erl b/apps/emqx_durable_storage/test/emqx_ds_test_helpers.erl index d26c6dd30..44c45248b 100644 --- a/apps/emqx_durable_storage/test/emqx_ds_test_helpers.erl +++ b/apps/emqx_durable_storage/test/emqx_ds_test_helpers.erl @@ -56,3 +56,63 @@ mock_rpc_result(gen_rpc, ExpectFun) -> {badrpc, timeout} end end). + +%% Consuming streams and iterators + +consume(DB, TopicFilter) -> + consume(DB, TopicFilter, 0). + +consume(DB, TopicFilter, StartTime) -> + Streams = emqx_ds:get_streams(DB, TopicFilter, StartTime), + lists:flatmap( + fun({_Rank, Stream}) -> consume_stream(DB, Stream, TopicFilter, StartTime) end, + Streams + ). + +consume_stream(DB, Stream, TopicFilter, StartTime) -> + {ok, It0} = emqx_ds:make_iterator(DB, Stream, TopicFilter, StartTime), + {ok, _It, Msgs} = consume_iter(DB, It0), + Msgs. + +consume_iter(DB, It) -> + consume_iter(DB, It, #{}). + +consume_iter(DB, It, Opts) -> + consume_iter_with(fun emqx_ds:next/3, [DB], It, Opts). + +storage_consume(ShardId, TopicFilter) -> + storage_consume(ShardId, TopicFilter, 0). + +storage_consume(ShardId, TopicFilter, StartTime) -> + Streams = emqx_ds_storage_layer:get_streams(ShardId, TopicFilter, StartTime), + lists:flatmap( + fun({_Rank, Stream}) -> + storage_consume_stream(ShardId, Stream, TopicFilter, StartTime) + end, + Streams + ). + +storage_consume_stream(ShardId, Stream, TopicFilter, StartTime) -> + {ok, It0} = emqx_ds_storage_layer:make_iterator(ShardId, Stream, TopicFilter, StartTime), + {ok, _It, Msgs} = storage_consume_iter(ShardId, It0), + Msgs. + +storage_consume_iter(ShardId, It) -> + storage_consume_iter(ShardId, It, #{}). + +storage_consume_iter(ShardId, It, Opts) -> + consume_iter_with(fun emqx_ds_storage_layer:next/3, [ShardId], It, Opts). + +consume_iter_with(NextFun, Args, It0, Opts) -> + BatchSize = maps:get(batch_size, Opts, 5), + case erlang:apply(NextFun, Args ++ [It0, BatchSize]) of + {ok, It, _Msgs = []} -> + {ok, It, []}; + {ok, It1, Batch} -> + {ok, It, Msgs} = consume_iter_with(NextFun, Args, It1, Opts), + {ok, It, [Msg || {_DSKey, Msg} <- Batch] ++ Msgs}; + {ok, Eos = end_of_stream} -> + {ok, Eos, []}; + {error, Class, Reason} -> + error({error, Class, Reason}) + end. From 778e897f1f5711840e0773dab97cd9f7300c3c79 Mon Sep 17 00:00:00 2001 From: Andrew Mayorov Date: Fri, 29 Mar 2024 13:33:26 +0100 Subject: [PATCH 040/234] chore(dsrepl): describe snapshot ownership and few shortcomings --- .../src/emqx_ds_replication_snapshot.erl | 29 ++++++++++++++++++- 1 file changed, 28 insertions(+), 1 deletion(-) diff --git a/apps/emqx_durable_storage/src/emqx_ds_replication_snapshot.erl b/apps/emqx_durable_storage/src/emqx_ds_replication_snapshot.erl index ab06dff53..c90c71688 100644 --- a/apps/emqx_durable_storage/src/emqx_ds_replication_snapshot.erl +++ b/apps/emqx_durable_storage/src/emqx_ds_replication_snapshot.erl @@ -72,11 +72,25 @@ write(Dir, Meta, MachineState) -> ra_log_snapshot:write(Dir, Meta, MachineState). %% Reading a snapshot. +%% %% This is triggered by the leader when it finds out that a follower is %% behind so much that there are no log segments covering the gap anymore. %% This process, on the other hand, MUST involve reading the storage snapshot, %% (in addition to the log snapshot) to reconstruct the storage state on the -%% target node. +%% target server. +%% +%% Currently, a snapshot reader is owned by a special "snapshot sender" process +%% spawned by the leader `ra` server, which sends chunks to the target server +%% in a tight loop. This process terminates under the following conditions: +%% 1. The snapshot is completely read and sent. +%% 2. Remote server fails to accept a chunk, either due to network failure (most +%% likely) or a logic error (very unlikely). +%% +%% TODO +%% In the latter case the process terminates without the chance to clean up the +%% snapshot reader resource, which will cause the snapshot to linger indefinitely. +%% For better control over resources, observability, and niceties like flow +%% control and backpressure we need to move this into a dedicated process tree. -spec begin_read(_SnapshotDir :: file:filename(), _Context :: #{}) -> {ok, ra_snapshot:meta(), rs()} | {error, _Reason :: term()}. @@ -131,9 +145,22 @@ complete_read(RS = #rs{reader = SnapReader, started_at = StartedAt}) -> }). %% Accepting a snapshot. +%% %% This process is triggered by the target server, when the leader finds out %% that the target server is severely lagging behind. This is receiving side of %% `begin_read/2` and `read_chunk/3`. +%% +%% Currently, a snapshot writer is owned by the follower `ra` server process +%% residing in dedicated `receive_snapshot` state. This process reverts back +%% to the regular `follower` state under the following conditions: +%% 1. The snapshot is completely accepted, and the machine state is recovered. +%% 2. The process times out waiting for the next chunk. +%% 3. The process encounters a logic error (very unlikely). +%% +%% TODO +%% In the latter cases, the snapshot writer will not have a chance to clean up. +%% For better control over resources, observability, and niceties like flow +%% control and backpressure we need to move this into a dedicated process tree. -spec begin_accept(_SnapshotDir :: file:filename(), ra_snapshot:meta()) -> {ok, ws()}. From 2097e854fc00482d2e069578575fe23df494a4ef Mon Sep 17 00:00:00 2001 From: Thales Macedo Garitezi Date: Wed, 27 Mar 2024 14:56:40 -0300 Subject: [PATCH 041/234] feat(client mgmt api): add cursor-based list API Fixes https://emqx.atlassian.net/browse/EMQX-12028 --- .../src/emqx_dashboard_swagger.erl | 10 + .../src/emqx_mgmt_api_clients.erl | 489 ++++++++++++++---- .../test/emqx_mgmt_api_clients_SUITE.erl | 418 ++++++++++++++- changes/ce/feat-12798.en.md | 1 + 4 files changed, 805 insertions(+), 113 deletions(-) create mode 100644 changes/ce/feat-12798.en.md diff --git a/apps/emqx_dashboard/src/emqx_dashboard_swagger.erl b/apps/emqx_dashboard/src/emqx_dashboard_swagger.erl index a6038bcb7..8cad67695 100644 --- a/apps/emqx_dashboard/src/emqx_dashboard_swagger.erl +++ b/apps/emqx_dashboard/src/emqx_dashboard_swagger.erl @@ -164,6 +164,14 @@ fields(limit) -> ]), Meta = #{in => query, desc => Desc, default => ?DEFAULT_ROW, example => 50}, [{limit, hoconsc:mk(range(1, ?MAX_ROW_LIMIT), Meta)}]; +fields(cursor) -> + Desc = <<"Opaque value representing the current iteration state.">>, + Meta = #{default => none, in => query, desc => Desc}, + [{cursor, hoconsc:mk(hoconsc:union([none, binary()]), Meta)}]; +fields(cursor_response) -> + Desc = <<"Opaque value representing the current iteration state.">>, + Meta = #{desc => Desc, required => false}, + [{cursor, hoconsc:mk(binary(), Meta)}]; fields(count) -> Desc = << "Total number of records matching the query.
" @@ -197,6 +205,8 @@ fields(start) -> [{start, hoconsc:mk(hoconsc:union([none, binary()]), Meta)}]; fields(meta) -> fields(page) ++ fields(limit) ++ fields(count) ++ fields(hasnext); +fields(meta_with_cursor) -> + fields(count) ++ fields(hasnext) ++ fields(cursor_response); fields(continuation_meta) -> fields(start) ++ fields(position). diff --git a/apps/emqx_management/src/emqx_mgmt_api_clients.erl b/apps/emqx_management/src/emqx_mgmt_api_clients.erl index 07f407430..9175f91ff 100644 --- a/apps/emqx_management/src/emqx_mgmt_api_clients.erl +++ b/apps/emqx_management/src/emqx_mgmt_api_clients.erl @@ -38,6 +38,7 @@ -export([ clients/2, + list_clients_v2/2, kickout_clients/2, client/2, subscriptions/2, @@ -63,6 +64,10 @@ %% for batch operation -export([do_subscribe/3]). +-ifdef(TEST). +-export([parse_cursor/2, serialize_cursor/1]). +-endif. + -define(TAGS, [<<"Clients">>]). -define(CLIENT_QSCHEMA, [ @@ -95,6 +100,14 @@ message => <<"Client connection has been shutdown">> }). +%% tags +-define(CURSOR_VSN1, 1). +-define(CURSOR_TYPE_ETS, 1). +-define(CURSOR_TYPE_DS, 2). +%% field keys +-define(CURSOR_ETS_NODE_IDX, 1). +-define(CURSOR_ETS_CONT, 2). + namespace() -> undefined. api_spec() -> @@ -103,6 +116,7 @@ api_spec() -> paths() -> [ "/clients", + "/clients_v2", "/clients/kickout/bulk", "/clients/:clientid", "/clients/:clientid/authorization/cache", @@ -117,115 +131,38 @@ paths() -> "/sessions_count" ]. +schema("/clients_v2") -> + #{ + 'operationId' => list_clients_v2, + get => #{ + security => [], + description => ?DESC(list_clients), + tags => ?TAGS, + parameters => fields(list_clients_v2_inputs), + responses => #{ + 200 => + emqx_dashboard_swagger:schema_with_example(?R_REF(list_clients_v2_response), #{ + <<"data">> => [client_example()], + <<"meta">> => #{ + <<"count">> => 1, + <<"cursor">> => <<"g2wAAAADYQFhAm0AAAACYzJq">>, + <<"hasnext">> => true + } + }), + 400 => + emqx_dashboard_swagger:error_codes( + ['INVALID_PARAMETER'], <<"Invalid parameters">> + ) + } + } + }; schema("/clients") -> #{ 'operationId' => clients, get => #{ description => ?DESC(list_clients), tags => ?TAGS, - parameters => [ - hoconsc:ref(emqx_dashboard_swagger, page), - hoconsc:ref(emqx_dashboard_swagger, limit), - {node, - hoconsc:mk(binary(), #{ - in => query, - required => false, - desc => <<"Node name">>, - example => <<"emqx@127.0.0.1">> - })}, - {username, - hoconsc:mk(hoconsc:array(binary()), #{ - in => query, - required => false, - desc => << - "User name, multiple values can be specified by" - " repeating the parameter: username=u1&username=u2" - >> - })}, - {ip_address, - hoconsc:mk(binary(), #{ - in => query, - required => false, - desc => <<"Client's IP address">>, - example => <<"127.0.0.1">> - })}, - {conn_state, - hoconsc:mk(hoconsc:enum([connected, idle, disconnected]), #{ - in => query, - required => false, - desc => - <<"The current connection status of the client, ", - "the possible values are connected,idle,disconnected">> - })}, - {clean_start, - hoconsc:mk(boolean(), #{ - in => query, - required => false, - description => <<"Whether the client uses a new session">> - })}, - {proto_ver, - hoconsc:mk(binary(), #{ - in => query, - required => false, - desc => <<"Client protocol version">> - })}, - {like_clientid, - hoconsc:mk(binary(), #{ - in => query, - required => false, - desc => <<"Fuzzy search `clientid` as substring">> - })}, - {like_username, - hoconsc:mk(binary(), #{ - in => query, - required => false, - desc => <<"Fuzzy search `username` as substring">> - })}, - {gte_created_at, - hoconsc:mk(emqx_utils_calendar:epoch_millisecond(), #{ - in => query, - required => false, - desc => - <<"Search client session creation time by greater", - " than or equal method, rfc3339 or timestamp(millisecond)">> - })}, - {lte_created_at, - hoconsc:mk(emqx_utils_calendar:epoch_millisecond(), #{ - in => query, - required => false, - desc => - <<"Search client session creation time by less", - " than or equal method, rfc3339 or timestamp(millisecond)">> - })}, - {gte_connected_at, - hoconsc:mk(emqx_utils_calendar:epoch_millisecond(), #{ - in => query, - required => false, - desc => << - "Search client connection creation time by greater" - " than or equal method, rfc3339 or timestamp(epoch millisecond)" - >> - })}, - {lte_connected_at, - hoconsc:mk(emqx_utils_calendar:epoch_millisecond(), #{ - in => query, - required => false, - desc => << - "Search client connection creation time by less" - " than or equal method, rfc3339 or timestamp(millisecond)" - >> - })}, - {clientid, - hoconsc:mk(hoconsc:array(binary()), #{ - in => query, - required => false, - desc => << - "Client ID, multiple values can be specified by" - " repeating the parameter: clientid=c1&clientid=c2" - >> - })}, - ?R_REF(requested_client_fields) - ], + parameters => fields(list_clients_v1_inputs), responses => #{ 200 => emqx_dashboard_swagger:schema_with_example(?R_REF(clients), #{ @@ -453,11 +390,129 @@ schema("/sessions_count") -> } }. +fields(list_clients_v2_inputs) -> + [ + hoconsc:ref(emqx_dashboard_swagger, cursor) + | fields(common_list_clients_input) + ]; +fields(list_clients_v1_inputs) -> + [ + hoconsc:ref(emqx_dashboard_swagger, page), + {node, + hoconsc:mk(binary(), #{ + in => query, + required => false, + desc => <<"Node name">>, + example => <<"emqx@127.0.0.1">> + })} + | fields(common_list_clients_input) + ]; +fields(common_list_clients_input) -> + [ + hoconsc:ref(emqx_dashboard_swagger, limit), + {username, + hoconsc:mk(hoconsc:array(binary()), #{ + in => query, + required => false, + desc => << + "User name, multiple values can be specified by" + " repeating the parameter: username=u1&username=u2" + >> + })}, + {ip_address, + hoconsc:mk(binary(), #{ + in => query, + required => false, + desc => <<"Client's IP address">>, + example => <<"127.0.0.1">> + })}, + {conn_state, + hoconsc:mk(hoconsc:enum([connected, idle, disconnected]), #{ + in => query, + required => false, + desc => + <<"The current connection status of the client, ", + "the possible values are connected,idle,disconnected">> + })}, + {clean_start, + hoconsc:mk(boolean(), #{ + in => query, + required => false, + description => <<"Whether the client uses a new session">> + })}, + {proto_ver, + hoconsc:mk(binary(), #{ + in => query, + required => false, + desc => <<"Client protocol version">> + })}, + {like_clientid, + hoconsc:mk(binary(), #{ + in => query, + required => false, + desc => <<"Fuzzy search `clientid` as substring">> + })}, + {like_username, + hoconsc:mk(binary(), #{ + in => query, + required => false, + desc => <<"Fuzzy search `username` as substring">> + })}, + {gte_created_at, + hoconsc:mk(emqx_utils_calendar:epoch_millisecond(), #{ + in => query, + required => false, + desc => + <<"Search client session creation time by greater", + " than or equal method, rfc3339 or timestamp(millisecond)">> + })}, + {lte_created_at, + hoconsc:mk(emqx_utils_calendar:epoch_millisecond(), #{ + in => query, + required => false, + desc => + <<"Search client session creation time by less", + " than or equal method, rfc3339 or timestamp(millisecond)">> + })}, + {gte_connected_at, + hoconsc:mk(emqx_utils_calendar:epoch_millisecond(), #{ + in => query, + required => false, + desc => << + "Search client connection creation time by greater" + " than or equal method, rfc3339 or timestamp(epoch millisecond)" + >> + })}, + {lte_connected_at, + hoconsc:mk(emqx_utils_calendar:epoch_millisecond(), #{ + in => query, + required => false, + desc => << + "Search client connection creation time by less" + " than or equal method, rfc3339 or timestamp(millisecond)" + >> + })}, + {clientid, + hoconsc:mk(hoconsc:array(binary()), #{ + in => query, + required => false, + desc => << + "Client ID, multiple values can be specified by" + " repeating the parameter: clientid=c1&clientid=c2" + >> + })}, + ?R_REF(requested_client_fields) + ]; fields(clients) -> [ {data, hoconsc:mk(hoconsc:array(?REF(client)), #{})}, {meta, hoconsc:mk(hoconsc:ref(emqx_dashboard_swagger, meta), #{})} ]; +fields(list_clients_v2_response) -> + [ + {data, hoconsc:mk(hoconsc:array(?REF(client)), #{})}, + {meta, hoconsc:mk(hoconsc:ref(emqx_dashboard_swagger, meta_with_cursor), #{})} + ]; fields(client) -> [ {awaiting_rel_cnt, @@ -890,6 +945,218 @@ list_clients(QString) -> {200, Response} end. +list_clients_v2(get, #{query_string := QString0}) -> + Nodes = emqx:running_nodes(), + case maps:get(<<"cursor">>, QString0, none) of + none -> + Cursor = initial_ets_cursor(Nodes), + do_list_clients_v2(Nodes, Cursor, QString0); + CursorBin when is_binary(CursorBin) -> + case parse_cursor(CursorBin, Nodes) of + {ok, Cursor} -> + do_list_clients_v2(Nodes, Cursor, QString0); + {error, bad_cursor} -> + ?BAD_REQUEST(<<"bad cursor">>) + end + end. + +do_list_clients_v2(Nodes, Cursor, QString0) -> + Limit = maps:get(<<"limit">>, QString0, 100), + Acc = #{ + rows => [], + n => 0, + limit => Limit + }, + do_list_clients_v2(Nodes, Cursor, QString0, Acc). + +do_list_clients_v2(_Nodes, Cursor = done, _QString, Acc) -> + format_results(Acc, Cursor); +do_list_clients_v2(Nodes, Cursor = #{type := ?CURSOR_TYPE_ETS, node := Node}, QString0, Acc0) -> + {Rows, NewCursor} = do_ets_select(Nodes, QString0, Cursor), + Acc1 = maps:update_with(rows, fun(Rs) -> [{Node, Rows} | Rs] end, Acc0), + Acc = #{limit := Limit, n := N} = maps:update_with(n, fun(N) -> N + length(Rows) end, Acc1), + case N >= Limit of + true -> + format_results(Acc, NewCursor); + false -> + do_list_clients_v2(Nodes, NewCursor, QString0, Acc) + end; +do_list_clients_v2(Nodes, _Cursor = #{type := ?CURSOR_TYPE_DS, iterator := Iter0}, QString0, Acc0) -> + #{limit := Limit} = Acc0, + {Rows0, Iter} = emqx_persistent_session_ds_state:session_iterator_next(Iter0, Limit), + NewCursor = next_ds_cursor(Iter), + Rows1 = drop_live_and_expired(Rows0), + Rows = maybe_run_fuzzy_filter(Rows1, QString0), + Acc1 = maps:update_with(rows, fun(Rs) -> [{undefined, Rows} | Rs] end, Acc0), + Acc = #{n := N} = maps:update_with(n, fun(N) -> N + length(Rows) end, Acc1), + case N >= Limit of + true -> + format_results(Acc, NewCursor); + false -> + do_list_clients_v2(Nodes, NewCursor, QString0, Acc) + end. + +format_results(Acc, Cursor) -> + #{ + rows := NodeRows, + n := N + } = Acc, + Meta = + case Cursor of + done -> + #{ + hasnext => false, + count => N + }; + _ -> + #{ + hasnext => true, + count => N, + cursor => serialize_cursor(Cursor) + } + end, + Resp = #{ + meta => Meta, + data => [ + format_channel_info(Node, Row) + || {Node, Rows} <- NodeRows, + Row <- Rows + ] + }, + ?OK(Resp). + +do_ets_select(Nodes, QString0, #{node := Node, node_idx := NodeIdx, cont := Cont} = _Cursor) -> + {_, QString1} = emqx_mgmt_api:parse_qstring(QString0, ?CLIENT_QSCHEMA), + Limit = maps:get(<<"limit">>, QString0, 10), + {Rows, #{cont := NewCont, node_idx := NewNodeIdx}} = ets_select( + QString1, Limit, Node, NodeIdx, Cont + ), + {Rows, next_ets_cursor(Nodes, NewNodeIdx, NewCont)}. + +maybe_run_fuzzy_filter(Rows, QString0) -> + {_, {_, FuzzyQString}} = emqx_mgmt_api:parse_qstring(QString0, ?CLIENT_QSCHEMA), + FuzzyFilterFn = fuzzy_filter_fun(FuzzyQString), + case FuzzyFilterFn of + undefined -> + Rows; + {Fn, Args} -> + lists:filter( + fun(E) -> erlang:apply(Fn, [E | Args]) end, + Rows + ) + end. + +initial_ets_cursor([Node | _Rest] = _Nodes) -> + #{ + type => ?CURSOR_TYPE_ETS, + node => Node, + node_idx => 1, + cont => undefined + }. + +initial_ds_cursor() -> + case emqx_persistent_message:is_persistence_enabled() of + true -> + #{ + type => ?CURSOR_TYPE_DS, + iterator => init_persistent_session_iterator() + }; + false -> + done + end. + +next_ets_cursor(Nodes, NodeIdx, Cont) -> + case NodeIdx > length(Nodes) of + true -> + initial_ds_cursor(); + false -> + Node = lists:nth(NodeIdx, Nodes), + #{ + type => ?CURSOR_TYPE_ETS, + node_idx => NodeIdx, + node => Node, + cont => Cont + } + end. + +next_ds_cursor('$end_of_table') -> + done; +next_ds_cursor(Iter) -> + #{ + type => ?CURSOR_TYPE_DS, + iterator => Iter + }. + +parse_cursor(CursorBin, Nodes) -> + try base64:decode(CursorBin, #{mode => urlsafe, padding => false}) of + Bin -> + parse_cursor1(Bin, Nodes) + catch + _:_ -> + {error, bad_cursor} + end. + +parse_cursor1(CursorBin, Nodes) -> + try binary_to_term(CursorBin, [safe]) of + [ + ?CURSOR_VSN1, + ?CURSOR_TYPE_ETS, + #{?CURSOR_ETS_NODE_IDX := NodeIdx, ?CURSOR_ETS_CONT := Cont} + ] -> + case NodeIdx > length(Nodes) of + true -> + {error, bad_cursor}; + false -> + Node = lists:nth(NodeIdx, Nodes), + Cursor = #{ + type => ?CURSOR_TYPE_ETS, + node => Node, + node_idx => NodeIdx, + cont => Cont + }, + {ok, Cursor} + end; + [?CURSOR_VSN1, ?CURSOR_TYPE_DS, DSIter] -> + Cursor = #{type => ?CURSOR_TYPE_DS, iterator => DSIter}, + {ok, Cursor}; + _ -> + {error, bad_cursor} + catch + error:badarg -> + {error, bad_cursor} + end. + +serialize_cursor(#{type := ?CURSOR_TYPE_ETS, node_idx := NodeIdx, cont := Cont}) -> + Cursor0 = [ + ?CURSOR_VSN1, + ?CURSOR_TYPE_ETS, + #{?CURSOR_ETS_NODE_IDX => NodeIdx, ?CURSOR_ETS_CONT => Cont} + ], + Bin = term_to_binary(Cursor0, [{compressed, 9}]), + base64:encode(Bin, #{mode => urlsafe, padding => false}); +serialize_cursor(#{type := ?CURSOR_TYPE_DS, iterator := Iter}) -> + Cursor0 = [?CURSOR_VSN1, ?CURSOR_TYPE_DS, Iter], + Bin = term_to_binary(Cursor0, [{compressed, 9}]), + base64:encode(Bin, #{mode => urlsafe, padding => false}). + +%% An adapter function so we can reutilize all the logic in `emqx_mgmt_api' for +%% selecting/fuzzy filters, and also reutilize its BPAPI for selecting rows. +ets_select(NQString, Limit, Node, NodeIdx, Cont) -> + QueryState0 = emqx_mgmt_api:init_query_state( + ?CHAN_INFO_TAB, + NQString, + fun ?MODULE:qs2ms/2, + _Meta = #{page => unused, limit => Limit}, + _Options = #{} + ), + QueryState = QueryState0#{continuation => Cont}, + case emqx_mgmt_api:do_query(Node, QueryState) of + {Rows, #{complete := true}} -> + {Rows, #{node_idx => NodeIdx + 1, cont => undefined}}; + {Rows, #{continuation := NCont}} -> + {Rows, #{node_idx => NodeIdx, cont => NCont}} + end. + lookup(#{clientid := ClientID}) -> case emqx_mgmt:lookup_client({clientid, ClientID}, ?FORMAT_FUN) of [] -> @@ -1410,13 +1677,25 @@ fuzzy_filter_fun(Fuzzy) -> run_fuzzy_filter(_, []) -> true; -run_fuzzy_filter(E = {_, #{clientinfo := ClientInfo}, _}, [{Key, like, SubStr} | Fuzzy]) -> +run_fuzzy_filter( + Row = {_, #{metadata := #{clientinfo := ClientInfo}}}, + [{Key, like, SubStr} | RestArgs] +) -> + %% Row from DS + run_fuzzy_filter1(ClientInfo, Key, SubStr) andalso + run_fuzzy_filter(Row, RestArgs); +run_fuzzy_filter(Row = {_, #{clientinfo := ClientInfo}, _}, [{Key, like, SubStr} | RestArgs]) -> + %% Row from ETS + run_fuzzy_filter1(ClientInfo, Key, SubStr) andalso + run_fuzzy_filter(Row, RestArgs). + +run_fuzzy_filter1(ClientInfo, Key, SubStr) -> Val = case maps:get(Key, ClientInfo, <<>>) of undefined -> <<>>; V -> V end, - binary:match(Val, SubStr) /= nomatch andalso run_fuzzy_filter(E, Fuzzy). + binary:match(Val, SubStr) /= nomatch. %%-------------------------------------------------------------------- %% format funcs diff --git a/apps/emqx_management/test/emqx_mgmt_api_clients_SUITE.erl b/apps/emqx_management/test/emqx_mgmt_api_clients_SUITE.erl index 2f4804158..ebda34bc2 100644 --- a/apps/emqx_management/test/emqx_mgmt_api_clients_SUITE.erl +++ b/apps/emqx_management/test/emqx_mgmt_api_clients_SUITE.erl @@ -19,8 +19,9 @@ -include_lib("emqx/include/emqx_mqtt.hrl"). -include_lib("emqx/include/emqx_router.hrl"). --include_lib("eunit/include/eunit.hrl"). +-include_lib("stdlib/include/assert.hrl"). -include_lib("common_test/include/ct.hrl"). +-include_lib("proper/include/proper.hrl"). -include_lib("snabbkaffe/include/snabbkaffe.hrl"). -include_lib("emqx/include/asserts.hrl"). -include_lib("emqx/include/emqx_mqtt.hrl"). @@ -47,7 +48,8 @@ persistent_session_testcases() -> t_persistent_sessions2, t_persistent_sessions3, t_persistent_sessions4, - t_persistent_sessions5 + t_persistent_sessions5, + t_list_clients_v2 ]. client_msgs_testcases() -> [ @@ -56,11 +58,23 @@ client_msgs_testcases() -> ]. init_per_suite(Config) -> - emqx_mgmt_api_test_util:init_suite(), - Config. + ok = snabbkaffe:start_trace(), + Apps = emqx_cth_suite:start( + [ + emqx, + emqx_conf, + emqx_management, + emqx_mgmt_api_test_util:emqx_dashboard() + ], + #{work_dir => emqx_cth_suite:work_dir(Config)} + ), + {ok, _} = emqx_common_test_http:create_default_app(), + [{apps, Apps} | Config]. -end_per_suite(_) -> - emqx_mgmt_api_test_util:end_suite(). +end_per_suite(Config) -> + Apps = ?config(apps, Config), + emqx_cth_suite:stop(Apps), + ok. init_per_group(persistent_sessions, Config) -> AppSpecs = [ @@ -109,9 +123,12 @@ end_per_testcase(TC, _Config) when ?LINE, fun() -> [] =:= emqx_cm:lookup_channels(local, ClientId) end, 5000 - ); + ), + ok = snabbkaffe:stop(), + ok; end_per_testcase(_TC, _Config) -> - ok = snabbkaffe:stop(). + ok = snabbkaffe:stop(), + ok. t_clients(_) -> process_flag(trap_exit, true), @@ -522,6 +539,12 @@ t_persistent_sessions5(Config) -> ), lists:foreach(fun emqtt:stop/1, [C3, C4]), + lists:foreach( + fun(ClientId) -> + ok = erpc:call(N1, emqx_persistent_session_ds, destroy_session, [ClientId]) + end, + [ClientId1, ClientId2, ClientId3, ClientId4] + ), ok end, @@ -1415,6 +1438,319 @@ t_subscribe_shared_topic_nl(_Config) -> PostFun(post, PathFun(["subscribe"]), #{topic => T, qos => 1, nl => 1, rh => 1}) ). +t_list_clients_v2(Config) -> + [N1, N2] = ?config(nodes, Config), + APIPort = 18084, + Port1 = get_mqtt_port(N1, tcp), + Port2 = get_mqtt_port(N2, tcp), + + ?check_trace( + begin + ClientId1 = <<"ca1">>, + ClientId2 = <<"c2">>, + ClientId3 = <<"c3">>, + ClientId4 = <<"ca4">>, + ClientId5 = <<"ca5">>, + ClientId6 = <<"c6">>, + AllClientIds = [ + ClientId1, + ClientId2, + ClientId3, + ClientId4, + ClientId5, + ClientId6 + ], + C1 = connect_client(#{port => Port1, clientid => ClientId1, clean_start => true}), + C2 = connect_client(#{port => Port2, clientid => ClientId2, clean_start => true}), + C3 = connect_client(#{port => Port1, clientid => ClientId3, clean_start => true}), + C4 = connect_client(#{port => Port2, clientid => ClientId4, clean_start => true}), + %% in-memory clients + C5 = connect_client(#{ + port => Port1, clientid => ClientId5, expiry => 0, clean_start => true + }), + C6 = connect_client(#{ + port => Port2, clientid => ClientId6, expiry => 0, clean_start => true + }), + %% offline persistent clients + ok = emqtt:stop(C3), + ok = emqtt:stop(C4), + + %% one by one + QueryParams1 = #{limit => "1"}, + Res1 = list_all_v2(APIPort, QueryParams1), + ?assertMatch( + [ + #{ + <<"data">> := [_], + <<"meta">> := + #{ + <<"hasnext">> := true, + <<"count">> := 1, + <<"cursor">> := _ + } + }, + #{ + <<"data">> := [_], + <<"meta">> := + #{ + <<"hasnext">> := true, + <<"count">> := 1, + <<"cursor">> := _ + } + }, + #{ + <<"data">> := [_], + <<"meta">> := + #{ + <<"hasnext">> := true, + <<"count">> := 1, + <<"cursor">> := _ + } + }, + #{ + <<"data">> := [_], + <<"meta">> := + #{ + <<"hasnext">> := true, + <<"count">> := 1, + <<"cursor">> := _ + } + }, + #{ + <<"data">> := [_], + <<"meta">> := + #{ + <<"hasnext">> := true, + <<"count">> := 1, + <<"cursor">> := _ + } + }, + #{ + <<"data">> := [_], + <<"meta">> := + #{ + <<"hasnext">> := false, + <<"count">> := 1 + } + } + ], + Res1 + ), + assert_contains_clientids(Res1, AllClientIds), + + %% Reusing the same cursors yield the same pages + traverse_in_reverse_v2(APIPort, QueryParams1, Res1), + + %% paging + QueryParams2 = #{limit => "4"}, + Res2 = list_all_v2(APIPort, QueryParams2), + ?assertMatch( + [ + #{ + <<"data">> := [_, _, _, _], + <<"meta">> := + #{ + <<"hasnext">> := true, + <<"count">> := 4, + <<"cursor">> := _ + } + }, + #{ + <<"data">> := [_, _], + <<"meta">> := + #{ + <<"hasnext">> := false, + <<"count">> := 2 + } + } + ], + Res2 + ), + assert_contains_clientids(Res2, AllClientIds), + traverse_in_reverse_v2(APIPort, QueryParams2, Res2), + + QueryParams3 = #{limit => "2"}, + Res3 = list_all_v2(APIPort, QueryParams3), + ?assertMatch( + [ + #{ + <<"data">> := [_, _], + <<"meta">> := + #{ + <<"hasnext">> := true, + <<"count">> := 2, + <<"cursor">> := _ + } + }, + #{ + <<"data">> := [_, _], + <<"meta">> := + #{ + <<"hasnext">> := true, + <<"count">> := 2, + <<"cursor">> := _ + } + }, + #{ + <<"data">> := [_, _], + <<"meta">> := + #{ + <<"hasnext">> := false, + <<"count">> := 2 + } + } + ], + Res3 + ), + assert_contains_clientids(Res3, AllClientIds), + traverse_in_reverse_v2(APIPort, QueryParams3, Res3), + + %% fuzzy filters + QueryParams4 = #{limit => "100", like_clientid => "ca"}, + Res4 = list_all_v2(APIPort, QueryParams4), + ?assertMatch( + [ + #{ + <<"data">> := [_, _, _], + <<"meta">> := + #{ + <<"hasnext">> := false, + <<"count">> := 3 + } + } + ], + Res4 + ), + assert_contains_clientids(Res4, [ClientId1, ClientId4, ClientId5]), + traverse_in_reverse_v2(APIPort, QueryParams4, Res4), + QueryParams5 = #{limit => "1", like_clientid => "ca"}, + Res5 = list_all_v2(APIPort, QueryParams5), + ?assertMatch( + [ + #{ + <<"data">> := [_], + <<"meta">> := + #{ + <<"hasnext">> := true, + <<"count">> := 1, + <<"cursor">> := _ + } + }, + #{ + <<"data">> := [_], + <<"meta">> := + #{ + <<"hasnext">> := true, + <<"count">> := 1, + <<"cursor">> := _ + } + }, + #{ + <<"data">> := [_], + <<"meta">> := + #{ + <<"hasnext">> := false, + <<"count">> := 1 + } + } + ], + Res5 + ), + assert_contains_clientids(Res5, [ClientId1, ClientId4, ClientId5]), + traverse_in_reverse_v2(APIPort, QueryParams5, Res5), + + lists:foreach( + fun(C) -> + {_, {ok, _}} = + ?wait_async_action( + emqtt:stop(C), + #{?snk_kind := emqx_cm_clean_down} + ) + end, + [C1, C2, C5, C6] + ), + + %% Verify that a malicious cursor that could generate an atom on the node is + %% rejected + EvilAtomBin0 = <<131, 100, 0, 5, "some_atom_that_doesnt_exist_on_the_remote_node">>, + EvilAtomBin = base64:encode(EvilAtomBin0, #{mode => urlsafe, padding => false}), + + ?assertMatch( + {error, {{_, 400, _}, _, #{<<"message">> := <<"bad cursor">>}}}, + list_v2_request(APIPort, #{limit => "1", cursor => EvilAtomBin}) + ), + %% Verify that the atom was not created + erpc:call(N1, fun() -> + ?assertError(badarg, binary_to_term(EvilAtomBin0, [safe])) + end), + ?assert(is_atom(binary_to_term(EvilAtomBin0))), + + lists:foreach( + fun(ClientId) -> + ok = erpc:call(N1, emqx_persistent_session_ds, destroy_session, [ClientId]) + end, + AllClientIds + ), + + ok + end, + [] + ), + ok. + +t_cursor_serde_prop(_Config) -> + ?assert(proper:quickcheck(cursor_serde_prop(), [{numtests, 100}, {to_file, user}])). + +cursor_serde_prop() -> + ?FORALL( + NumNodes, + range(1, 10), + ?FORALL( + Cursor, + list_clients_cursor_gen(NumNodes), + begin + Nodes = lists:seq(1, NumNodes), + Bin = emqx_mgmt_api_clients:serialize_cursor(Cursor), + Res = emqx_mgmt_api_clients:parse_cursor(Bin, Nodes), + ?WHENFAIL( + ct:pal("original:\n ~p\nroundtrip:\n ~p", [Cursor, Res]), + {ok, Cursor} =:= Res + ) + end + ) + ). + +list_clients_cursor_gen(NumNodes) -> + oneof([ + lists_clients_ets_cursor_gen(NumNodes), + lists_clients_ds_cursor_gen() + ]). + +-define(CURSOR_TYPE_ETS, 1). +-define(CURSOR_TYPE_DS, 2). + +lists_clients_ets_cursor_gen(NumNodes) -> + ?LET( + {NodeIdx, Cont}, + {range(1, NumNodes), oneof([undefined, tuple()])}, + #{ + type => ?CURSOR_TYPE_ETS, + node => NodeIdx, + node_idx => NodeIdx, + cont => Cont + } + ). + +lists_clients_ds_cursor_gen() -> + ?LET( + Iter, + oneof(['$end_of_table', list(term())]), + #{ + type => ?CURSOR_TYPE_DS, + iterator => Iter + } + ). + time_string_to_epoch_millisecond(DateTime) -> time_string_to_epoch(DateTime, millisecond). @@ -1472,6 +1808,31 @@ list_request(Port, QueryParams) -> Path = emqx_mgmt_api_test_util:api_path(Host, ["clients"]), request(get, Path, [], QueryParams). +list_v2_request(Port, QueryParams = #{}) -> + Host = "http://127.0.0.1:" ++ integer_to_list(Port), + Path = emqx_mgmt_api_test_util:api_path(Host, ["clients_v2"]), + QS = uri_string:compose_query(maps:to_list(emqx_utils_maps:binary_key_map(QueryParams))), + request(get, Path, [], QS). + +list_all_v2(Port, QueryParams = #{}) -> + do_list_all_v2(Port, QueryParams, _Acc = []). + +do_list_all_v2(Port, QueryParams, Acc) -> + case list_v2_request(Port, QueryParams) of + {ok, {{_, 200, _}, _, Resp = #{<<"meta">> := #{<<"cursor">> := Cursor}}}} -> + do_list_all_v2(Port, QueryParams#{cursor => Cursor}, [Resp | Acc]); + {ok, {{_, 200, _}, _, Resp = #{<<"meta">> := #{<<"hasnext">> := false}}}} -> + lists:reverse([Resp | Acc]); + Other -> + error( + {unexpected_response, #{ + acc_so_far => Acc, + response => Other, + query_params => QueryParams + }} + ) + end. + lookup_request(ClientId) -> lookup_request(ClientId, 18083). @@ -1535,3 +1896,44 @@ connect_client(Opts) -> ]), {ok, _} = emqtt:connect(C), C. + +assert_contains_clientids(Results, ExpectedClientIds) -> + ContainedClientIds = [ + ClientId + || #{<<"data">> := Rows} <- Results, + #{<<"clientid">> := ClientId} <- Rows + ], + ?assertEqual( + lists:sort(ExpectedClientIds), + lists:sort(ContainedClientIds), + #{results => Results} + ). + +traverse_in_reverse_v2(APIPort, QueryParams0, Results) -> + Cursors0 = + lists:map( + fun(#{<<"meta">> := Meta}) -> + maps:get(<<"cursor">>, Meta, <<"wontbeused">>) + end, + Results + ), + Cursors1 = [<<"none">> | lists:droplast(Cursors0)], + DirectOrderClientIds = [ + ClientId + || #{<<"data">> := Rows} <- Results, + #{<<"clientid">> := ClientId} <- Rows + ], + ReverseCursors = lists:reverse(Cursors1), + do_traverse_in_reverse_v2( + APIPort, QueryParams0, ReverseCursors, DirectOrderClientIds, _Acc = [] + ). + +do_traverse_in_reverse_v2(_APIPort, _QueryParams0, _Cursors = [], DirectOrderClientIds, Acc) -> + ?assertEqual(DirectOrderClientIds, Acc); +do_traverse_in_reverse_v2(APIPort, QueryParams0, [Cursor | Rest], DirectOrderClientIds, Acc) -> + QueryParams = QueryParams0#{cursor => Cursor}, + Res0 = list_v2_request(APIPort, QueryParams), + ?assertMatch({ok, {{_, 200, _}, _, #{<<"data">> := _}}}, Res0), + {ok, {{_, 200, _}, _, #{<<"data">> := Rows}}} = Res0, + ClientIds = [ClientId || #{<<"clientid">> := ClientId} <- Rows], + do_traverse_in_reverse_v2(APIPort, QueryParams0, Rest, DirectOrderClientIds, ClientIds ++ Acc). diff --git a/changes/ce/feat-12798.en.md b/changes/ce/feat-12798.en.md new file mode 100644 index 000000000..a3b46f5e6 --- /dev/null +++ b/changes/ce/feat-12798.en.md @@ -0,0 +1 @@ +Added new `GET /api/v5/clients_v2` API that uses cursors instead of page numbers for pagination. This should be more efficient than the old API endpoint, which currently traverses tables multiple times. From d8204021dca78e1914ee37a77c2622182f5dcbf0 Mon Sep 17 00:00:00 2001 From: ieQu1 <99872536+ieQu1@users.noreply.github.com> Date: Thu, 21 Mar 2024 12:32:30 +0100 Subject: [PATCH 042/234] refactor(metrics): Move metrics worker to emqx_utils application --- apps/emqx_utils/README.md | 1 + apps/{emqx => emqx_utils}/src/emqx_metrics_worker.erl | 0 apps/{emqx => emqx_utils}/test/emqx_metrics_worker_SUITE.erl | 0 3 files changed, 1 insertion(+) rename apps/{emqx => emqx_utils}/src/emqx_metrics_worker.erl (100%) rename apps/{emqx => emqx_utils}/test/emqx_metrics_worker_SUITE.erl (100%) diff --git a/apps/emqx_utils/README.md b/apps/emqx_utils/README.md index f8c386f3d..d03b34c64 100644 --- a/apps/emqx_utils/README.md +++ b/apps/emqx_utils/README.md @@ -16,6 +16,7 @@ handling, data conversions, and more. - `emqx_utils_json`: JSON encoding and decoding - `emqx_utils_maps`: convenience functions for map lookup and manipulation like deep_get etc. +- `emqx_metrics`: counters, gauges, slides ## Contributing diff --git a/apps/emqx/src/emqx_metrics_worker.erl b/apps/emqx_utils/src/emqx_metrics_worker.erl similarity index 100% rename from apps/emqx/src/emqx_metrics_worker.erl rename to apps/emqx_utils/src/emqx_metrics_worker.erl diff --git a/apps/emqx/test/emqx_metrics_worker_SUITE.erl b/apps/emqx_utils/test/emqx_metrics_worker_SUITE.erl similarity index 100% rename from apps/emqx/test/emqx_metrics_worker_SUITE.erl rename to apps/emqx_utils/test/emqx_metrics_worker_SUITE.erl From c9de336234f5843fff674a3f0896a3fa86ea08fa Mon Sep 17 00:00:00 2001 From: ieQu1 <99872536+ieQu1@users.noreply.github.com> Date: Thu, 21 Mar 2024 12:33:34 +0100 Subject: [PATCH 043/234] feat(ds): Add metrics worker to the builtin db supervision tree --- .../src/emqx_ds_builtin_metrics.erl | 75 +++++++++++++++++++ .../src/emqx_ds_builtin_sup.erl | 3 +- 2 files changed, 77 insertions(+), 1 deletion(-) create mode 100644 apps/emqx_durable_storage/src/emqx_ds_builtin_metrics.erl diff --git a/apps/emqx_durable_storage/src/emqx_ds_builtin_metrics.erl b/apps/emqx_durable_storage/src/emqx_ds_builtin_metrics.erl new file mode 100644 index 000000000..a47540360 --- /dev/null +++ b/apps/emqx_durable_storage/src/emqx_ds_builtin_metrics.erl @@ -0,0 +1,75 @@ +%%-------------------------------------------------------------------- +%% Copyright (c) 2023-2024 EMQ Technologies Co., Ltd. All Rights Reserved. +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. +%%-------------------------------------------------------------------- +-module(emqx_ds_builtin_metrics). + +%% API: +-export([child_spec/0, init_for_db/1, init_for_shard/2]). + +%% behavior callbacks: +-export([]). + +%% internal exports: +-export([]). + +-export_type([]). + +%%================================================================================ +%% Type declarations +%%================================================================================ + +-define(WORKER, ?MODULE). + +-define(DB_METRICS, + [ + + ]). + +-define(SHARD_METRICS, + [ + 'egress.bytes', + 'egress.batches', + 'egress.messages', + {slide, 'egress.flush_time'} + ]). + +%%================================================================================ +%% API functions +%%================================================================================ + +-spec child_spec() -> supervisor:child_spec(). +child_spec() -> + emqx_metrics_worker:child_spec(?WORKER). + +-spec init_for_db(emqx_ds:db()) -> ok. +init_for_db(DB) -> + emqx_metrics_worker:create_metrics(?WORKER, DB, ?DB_METRICS, []). + +-spec init_for_shard(emqx_ds:db(), emqx_ds_replication_layer:shard_id()) -> ok. +init_for_shard(DB, ShardId) -> + Id = iolist_to_binary([atom_to_list(DB), $/, ShardId]), + emqx_metrics_worker:create_metrics(?WORKER, Id, ?SHARD_METRICS, []). + +%%================================================================================ +%% behavior callbacks +%%================================================================================ + +%%================================================================================ +%% Internal exports +%%================================================================================ + +%%================================================================================ +%% Internal functions +%%================================================================================ diff --git a/apps/emqx_durable_storage/src/emqx_ds_builtin_sup.erl b/apps/emqx_durable_storage/src/emqx_ds_builtin_sup.erl index 50ed18de1..45e81bdc9 100644 --- a/apps/emqx_durable_storage/src/emqx_ds_builtin_sup.erl +++ b/apps/emqx_durable_storage/src/emqx_ds_builtin_sup.erl @@ -81,6 +81,7 @@ stop_db(DB) -> %% Chidren are attached dynamically to this one. init(?top) -> %% Children: + MetricsWorker = emqx_ds_builtin_metrics:child_spec(), MetadataServer = #{ id => metadata_server, start => {emqx_ds_replication_layer_meta, start_link, []}, @@ -102,7 +103,7 @@ init(?top) -> period => 1, auto_shutdown => never }, - {ok, {SupFlags, [MetadataServer, DBsSup]}}; + {ok, {SupFlags, [MetricsWorker, MetadataServer, DBsSup]}}; init(?databases) -> %% Children are added dynamically: SupFlags = #{ From 606f2a88cd11e2eef067f247a73a89ff47699cd0 Mon Sep 17 00:00:00 2001 From: ieQu1 <99872536+ieQu1@users.noreply.github.com> Date: Thu, 21 Mar 2024 15:03:33 +0100 Subject: [PATCH 044/234] feat(ds): Add egress metrics --- .../src/emqx_ds_builtin_metrics.erl | 71 +++++++++++++------ .../src/emqx_ds_replication_layer_egress.erl | 64 ++++++++++++++--- apps/emqx_prometheus/src/emqx_prometheus.erl | 13 ++++ 3 files changed, 115 insertions(+), 33 deletions(-) diff --git a/apps/emqx_durable_storage/src/emqx_ds_builtin_metrics.erl b/apps/emqx_durable_storage/src/emqx_ds_builtin_metrics.erl index a47540360..8075238b3 100644 --- a/apps/emqx_durable_storage/src/emqx_ds_builtin_metrics.erl +++ b/apps/emqx_durable_storage/src/emqx_ds_builtin_metrics.erl @@ -16,7 +16,14 @@ -module(emqx_ds_builtin_metrics). %% API: --export([child_spec/0, init_for_db/1, init_for_shard/2]). +-export([child_spec/0, init_for_db/1, shard_metric_id/2, init_for_shard/1]). +-export([ + inc_egress_batches/1, + inc_egress_batches_retry/1, + inc_egress_messages/2, + inc_egress_bytes/2, + observe_egress_flush_time/2 +]). %% behavior callbacks: -export([]). @@ -24,7 +31,7 @@ %% internal exports: -export([]). --export_type([]). +-export_type([shard_metrics_id/0]). %%================================================================================ %% Type declarations @@ -32,18 +39,17 @@ -define(WORKER, ?MODULE). --define(DB_METRICS, - [ +-define(DB_METRICS, []). - ]). +-define(SHARD_METRICS, [ + 'egress.batches', + 'egress.batches.retry', + 'egress.messages', + 'egress.bytes', + {slide, 'egress.flush_time'} +]). --define(SHARD_METRICS, - [ - 'egress.bytes', - 'egress.batches', - 'egress.messages', - {slide, 'egress.flush_time'} - ]). +-type shard_metrics_id() :: binary(). %%================================================================================ %% API functions @@ -57,18 +63,39 @@ child_spec() -> init_for_db(DB) -> emqx_metrics_worker:create_metrics(?WORKER, DB, ?DB_METRICS, []). --spec init_for_shard(emqx_ds:db(), emqx_ds_replication_layer:shard_id()) -> ok. -init_for_shard(DB, ShardId) -> - Id = iolist_to_binary([atom_to_list(DB), $/, ShardId]), - emqx_metrics_worker:create_metrics(?WORKER, Id, ?SHARD_METRICS, []). +-spec shard_metric_id(emqx_ds:db(), emqx_ds_replication_layer:shard_id()) -> shard_metrics_id(). +shard_metric_id(DB, ShardId) -> + iolist_to_binary([atom_to_list(DB), $/, ShardId]). -%%================================================================================ -%% behavior callbacks -%%================================================================================ +-spec init_for_shard(shard_metrics_id()) -> ok. +init_for_shard(ShardId) -> + emqx_metrics_worker:create_metrics(?WORKER, ShardId, ?SHARD_METRICS, []). -%%================================================================================ -%% Internal exports -%%================================================================================ +%% @doc Increase the number of successfully flushed batches +-spec inc_egress_batches(shard_metrics_id()) -> ok. +inc_egress_batches(Id) -> + emqx_metrics_worker:inc(?WORKER, Id, 'egress.batches'). + +%% @doc Increase the number of time the egress worker had to retry +%% flushing the batch +-spec inc_egress_batches_retry(shard_metrics_id()) -> ok. +inc_egress_batches_retry(Id) -> + emqx_metrics_worker:inc(?WORKER, Id, 'egress.batches.retry'). + +%% @doc Increase the number of messages successfully saved to the shard +-spec inc_egress_messages(shard_metrics_id(), non_neg_integer()) -> ok. +inc_egress_messages(Id, NMessages) -> + emqx_metrics_worker:inc(?WORKER, Id, 'egress.messages', NMessages). + +%% @doc Increase the number of messages successfully saved to the shard +-spec inc_egress_bytes(shard_metrics_id(), non_neg_integer()) -> ok. +inc_egress_bytes(Id, NMessages) -> + emqx_metrics_worker:inc(?WORKER, Id, 'egress.bytes', NMessages). + +%% @doc Add a sample of time spent flushing the egress to the Raft log (in microseconds) +-spec observe_egress_flush_time(shard_metrics_id(), non_neg_integer()) -> ok. +observe_egress_flush_time(Id, FlushTime) -> + emqx_metrics_worker:observe(?WORKER, Id, 'egress.flush_time', FlushTime). %%================================================================================ %% Internal functions diff --git a/apps/emqx_durable_storage/src/emqx_ds_replication_layer_egress.erl b/apps/emqx_durable_storage/src/emqx_ds_replication_layer_egress.erl index 128aeb380..3f9188312 100644 --- a/apps/emqx_durable_storage/src/emqx_ds_replication_layer_egress.erl +++ b/apps/emqx_durable_storage/src/emqx_ds_replication_layer_egress.erl @@ -40,6 +40,7 @@ -export_type([]). +-include_lib("emqx_utils/include/emqx_message.hrl"). -include_lib("snabbkaffe/include/trace.hrl"). %%================================================================================ @@ -49,8 +50,16 @@ -define(via(DB, Shard), {via, gproc, {n, l, {?MODULE, DB, Shard}}}). -define(flush, flush). --record(enqueue_req, {message :: emqx_types:message(), sync :: boolean()}). --record(enqueue_atomic_req, {batch :: [emqx_types:message()], sync :: boolean()}). +-record(enqueue_req, { + message :: emqx_types:message(), + sync :: boolean(), + payload_bytes :: non_neg_integer() +}). +-record(enqueue_atomic_req, { + batch :: [emqx_types:message()], + sync :: boolean(), + payload_bytes :: non_neg_integer() +}). %%================================================================================ %% API functions @@ -73,7 +82,8 @@ store_batch(DB, Messages, Opts) -> ?via(DB, Shard), #enqueue_req{ message = Message, - sync = Sync + sync = Sync, + payload_bytes = payload_size(Message) }, infinity ) @@ -83,11 +93,19 @@ store_batch(DB, Messages, Opts) -> true -> maps:foreach( fun(Shard, Batch) -> + PayloadBytes = lists:foldl( + fun(Msg, Acc) -> + Acc + payload_size(Msg) + end, + 0, + Batch + ), gen_server:call( ?via(DB, Shard), #enqueue_atomic_req{ batch = Batch, - sync = Sync + sync = Sync, + payload_bytes = PayloadBytes }, infinity ) @@ -108,7 +126,9 @@ store_batch(DB, Messages, Opts) -> -record(s, { db :: emqx_ds:db(), shard :: emqx_ds_replication_layer:shard_id(), + metrics_id :: emqx_ds_builtin_metrics:shard_metrics_id(), n = 0 :: non_neg_integer(), + n_bytes = 0 :: non_neg_integer(), tref :: reference(), batch = [] :: [emqx_types:message()], pending_replies = [] :: [gen_server:from()] @@ -117,18 +137,21 @@ store_batch(DB, Messages, Opts) -> init([DB, Shard]) -> process_flag(trap_exit, true), process_flag(message_queue_data, off_heap), + MetricsId = emqx_ds_builtin_metrics:shard_metric_id(DB, Shard), + ok = emqx_ds_builtin_metrics:init_for_shard(MetricsId), S = #s{ db = DB, shard = Shard, + metrics_id = MetricsId, tref = start_timer() }, {ok, S}. -handle_call(#enqueue_req{message = Msg, sync = Sync}, From, S) -> - do_enqueue(From, Sync, Msg, S); -handle_call(#enqueue_atomic_req{batch = Batch, sync = Sync}, From, S) -> +handle_call(#enqueue_req{message = Msg, sync = Sync, payload_bytes = NBytes}, From, S) -> + do_enqueue(From, Sync, Msg, NBytes, S); +handle_call(#enqueue_atomic_req{batch = Batch, sync = Sync, payload_bytes = NBytes}, From, S) -> Len = length(Batch), - do_enqueue(From, Sync, {atomic, Len, Batch}, S); + do_enqueue(From, Sync, {atomic, Len, NBytes, Batch}, NBytes, S); handle_call(_Call, _From, S) -> {reply, {error, unknown_call}, S}. @@ -161,6 +184,11 @@ do_flush( ) -> case emqx_ds_replication_layer:ra_store_batch(DB, Shard, lists:reverse(Messages)) of ok -> + emqx_ds_builtin_metrics:inc_egress_batches(S#s.metrics_id), + emqx_ds_builtin_metrics:inc_egress_messages(S#s.metrics_id, S#s.n), + emqx_ds_builtin_metrics:inc_egress_bytes(S#s.metrics_id, S#s.n_bytes), + lists:foreach(fun(From) -> gen_server:reply(From, ok) end, Replies), + true = erlang:garbage_collect(), ?tp( emqx_ds_replication_layer_egress_flush, #{db => DB, shard => Shard, batch => Messages} @@ -169,6 +197,7 @@ do_flush( true = erlang:garbage_collect(), ok; Error -> + emqx_ds_builtin_metrics:inc_egress_batches_retry(S#s.metrics_id), true = erlang:garbage_collect(), ?tp( warning, @@ -184,19 +213,27 @@ do_flush( end, S#s{ n = 0, + n_bytes = 0, batch = [], pending_replies = [], tref = start_timer() }. -do_enqueue(From, Sync, MsgOrBatch, S0 = #s{n = N, batch = Batch, pending_replies = Replies}) -> +do_enqueue( + From, + Sync, + MsgOrBatch, + BatchBytes, + S0 = #s{n = N, n_bytes = NBytes0, batch = Batch, pending_replies = Replies} +) -> + NBytes = NBytes0 + BatchBytes, NMax = application:get_env(emqx_durable_storage, egress_batch_size, 1000), S1 = case MsgOrBatch of {atomic, NumMsgs, Msgs} -> - S0#s{n = N + NumMsgs, batch = Msgs ++ Batch}; + S0#s{n = N + NumMsgs, n_bytes = NBytes, batch = Msgs ++ Batch}; Msg -> - S0#s{n = N + 1, batch = [Msg | Batch]} + S0#s{n = N + 1, n_bytes = NBytes, batch = [Msg | Batch]} end, %% TODO: later we may want to delay the reply until the message is %% replicated, but it requies changes to the PUBACK/PUBREC flow to @@ -228,3 +265,8 @@ do_enqueue(From, Sync, MsgOrBatch, S0 = #s{n = N, batch = Batch, pending_replies start_timer() -> Interval = application:get_env(emqx_durable_storage, egress_flush_interval, 100), erlang:send_after(Interval, self(), ?flush). + +%% @doc Return approximate size of the MQTT message (it doesn't take +%% all things into account, for example headers and extras) +payload_size(#message{payload = P, topic = T}) -> + size(P) + size(T). diff --git a/apps/emqx_prometheus/src/emqx_prometheus.erl b/apps/emqx_prometheus/src/emqx_prometheus.erl index 8556e82d3..813f91dbd 100644 --- a/apps/emqx_prometheus/src/emqx_prometheus.erl +++ b/apps/emqx_prometheus/src/emqx_prometheus.erl @@ -212,6 +212,7 @@ collect_mf(?PROMETHEUS_DEFAULT_REGISTRY, Callback) -> ok = add_collect_family(Callback, cert_metric_meta(), ?MG(cert_data, RawData)), ok = add_collect_family(Callback, mria_metric_meta(), ?MG(mria_data, RawData)), + ok = add_collect_family(Callback, ds_metric_meta(), ?MG(ds_data, RawData)), ok = maybe_license_add_collect_family(Callback, RawData), ok; collect_mf(_Registry, _Callback) -> @@ -1011,6 +1012,18 @@ catch_all(DataFun) -> _:_ -> undefined end. +%%======================================== +%% Durable storge +%%======================================== + +ds_metric_meta() -> + [ + {emqx_ds_egress_batches, counter, 'egress.batches'}, + {emqx_ds_egress_batches_retry, counter, 'egress.batches.retry'}, + {emqx_ds_egress_messages, counter, 'egress.messages'}, + {emqx_ds_egress_bytes, counter, 'egress.bytes'} + ]. + %%-------------------------------------------------------------------- %% Collect functions %%-------------------------------------------------------------------- From 044f3d4ef5d0863b90492fa52fd2cfb6ebe396f6 Mon Sep 17 00:00:00 2001 From: ieQu1 <99872536+ieQu1@users.noreply.github.com> Date: Mon, 25 Mar 2024 20:58:24 +0100 Subject: [PATCH 045/234] fix(ds): Don't reverse entries in the atomic batch --- .../src/emqx_ds_replication_layer_egress.erl | 54 +++++++++---------- 1 file changed, 27 insertions(+), 27 deletions(-) diff --git a/apps/emqx_durable_storage/src/emqx_ds_replication_layer_egress.erl b/apps/emqx_durable_storage/src/emqx_ds_replication_layer_egress.erl index 3f9188312..1b2ac30dd 100644 --- a/apps/emqx_durable_storage/src/emqx_ds_replication_layer_egress.erl +++ b/apps/emqx_durable_storage/src/emqx_ds_replication_layer_egress.erl @@ -130,7 +130,7 @@ store_batch(DB, Messages, Opts) -> n = 0 :: non_neg_integer(), n_bytes = 0 :: non_neg_integer(), tref :: reference(), - batch = [] :: [emqx_types:message()], + queue :: queue:queue(emqx_types:message()), pending_replies = [] :: [gen_server:from()] }). @@ -143,7 +143,8 @@ init([DB, Shard]) -> db = DB, shard = Shard, metrics_id = MetricsId, - tref = start_timer() + tref = start_timer(), + queue = queue:new() }, {ok, S}. @@ -151,7 +152,7 @@ handle_call(#enqueue_req{message = Msg, sync = Sync, payload_bytes = NBytes}, Fr do_enqueue(From, Sync, Msg, NBytes, S); handle_call(#enqueue_atomic_req{batch = Batch, sync = Sync, payload_bytes = NBytes}, From, S) -> Len = length(Batch), - do_enqueue(From, Sync, {atomic, Len, NBytes, Batch}, NBytes, S); + do_enqueue(From, Sync, {atomic, Len, Batch}, NBytes, S); handle_call(_Call, _From, S) -> {reply, {error, unknown_call}, S}. @@ -177,63 +178,59 @@ terminate(_Reason, _S) -> -define(COOLDOWN_MIN, 1000). -define(COOLDOWN_MAX, 5000). -do_flush(S = #s{batch = []}) -> - S#s{tref = start_timer()}; do_flush( - S = #s{batch = Messages, pending_replies = Replies, db = DB, shard = Shard} + S = #s{queue = Q, pending_replies = Replies, db = DB, shard = Shard} ) -> - case emqx_ds_replication_layer:ra_store_batch(DB, Shard, lists:reverse(Messages)) of + Messages = queue:to_list(Q), + case emqx_ds_replication_layer:ra_store_batch(DB, Shard, Messages) of ok -> emqx_ds_builtin_metrics:inc_egress_batches(S#s.metrics_id), emqx_ds_builtin_metrics:inc_egress_messages(S#s.metrics_id, S#s.n), emqx_ds_builtin_metrics:inc_egress_bytes(S#s.metrics_id, S#s.n_bytes), lists:foreach(fun(From) -> gen_server:reply(From, ok) end, Replies), - true = erlang:garbage_collect(), ?tp( emqx_ds_replication_layer_egress_flush, #{db => DB, shard => Shard, batch => Messages} ), lists:foreach(fun(From) -> gen_server:reply(From, ok) end, Replies), - true = erlang:garbage_collect(), - ok; + erlang:garbage_collect(), + S#s{ + n = 0, + n_bytes = 0, + queue = queue:new(), + pending_replies = [], + tref = start_timer() + }; Error -> emqx_ds_builtin_metrics:inc_egress_batches_retry(S#s.metrics_id), - true = erlang:garbage_collect(), + erlang:garbage_collect(), ?tp( warning, emqx_ds_replication_layer_egress_flush_failed, #{db => DB, shard => Shard, reason => Error} ), Cooldown = ?COOLDOWN_MIN + rand:uniform(?COOLDOWN_MAX - ?COOLDOWN_MIN), - ok = timer:sleep(Cooldown), - %% Since we drop the entire batch here, we at least reply callers with an - %% error so they don't hang indefinitely in the `gen_server' call with - %% `infinity' timeout. - lists:foreach(fun(From) -> gen_server:reply(From, {error, Error}) end, Replies) - end, - S#s{ - n = 0, - n_bytes = 0, - batch = [], - pending_replies = [], - tref = start_timer() - }. + S#s{ + tref = start_timer(Cooldown) + } + end. do_enqueue( From, Sync, MsgOrBatch, BatchBytes, - S0 = #s{n = N, n_bytes = NBytes0, batch = Batch, pending_replies = Replies} + S0 = #s{n = N, n_bytes = NBytes0, queue = Q0, pending_replies = Replies} ) -> NBytes = NBytes0 + BatchBytes, NMax = application:get_env(emqx_durable_storage, egress_batch_size, 1000), S1 = case MsgOrBatch of {atomic, NumMsgs, Msgs} -> - S0#s{n = N + NumMsgs, n_bytes = NBytes, batch = Msgs ++ Batch}; + Q = lists:foldl(fun queue:in/2, Q0, Msgs), + S0#s{n = N + NumMsgs, n_bytes = NBytes, queue = Q}; Msg -> - S0#s{n = N + 1, n_bytes = NBytes, batch = [Msg | Batch]} + S0#s{n = N + 1, n_bytes = NBytes, queue = queue:in(Msg, Q0)} end, %% TODO: later we may want to delay the reply until the message is %% replicated, but it requies changes to the PUBACK/PUBREC flow to @@ -264,6 +261,9 @@ do_enqueue( start_timer() -> Interval = application:get_env(emqx_durable_storage, egress_flush_interval, 100), + start_timer(Interval). + +start_timer(Interval) -> erlang:send_after(Interval, self(), ?flush). %% @doc Return approximate size of the MQTT message (it doesn't take From 0de255cac87bc3219029b4c3692ce57ebc6b939d Mon Sep 17 00:00:00 2001 From: ieQu1 <99872536+ieQu1@users.noreply.github.com> Date: Thu, 21 Mar 2024 16:15:31 +0100 Subject: [PATCH 046/234] feat(ds): Report egress flush time --- .../src/emqx_ds_replication_layer_egress.erl | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/apps/emqx_durable_storage/src/emqx_ds_replication_layer_egress.erl b/apps/emqx_durable_storage/src/emqx_ds_replication_layer_egress.erl index 1b2ac30dd..9651c029e 100644 --- a/apps/emqx_durable_storage/src/emqx_ds_replication_layer_egress.erl +++ b/apps/emqx_durable_storage/src/emqx_ds_replication_layer_egress.erl @@ -179,14 +179,18 @@ terminate(_Reason, _S) -> -define(COOLDOWN_MAX, 5000). do_flush( - S = #s{queue = Q, pending_replies = Replies, db = DB, shard = Shard} + S = #s{queue = Q, pending_replies = Replies, db = DB, shard = Shard, metrics_id = Metrics} ) -> Messages = queue:to_list(Q), - case emqx_ds_replication_layer:ra_store_batch(DB, Shard, Messages) of + T0 = erlang:monotonic_time(microsecond), + Result = emqx_ds_replication_layer:ra_store_batch(DB, Shard, Messages), + T1 = erlang:monotonic_time(microsecond), + emqx_ds_builtin_metrics:observe_egress_flush_time(Metrics, T1 - T0), + case Result of ok -> - emqx_ds_builtin_metrics:inc_egress_batches(S#s.metrics_id), - emqx_ds_builtin_metrics:inc_egress_messages(S#s.metrics_id, S#s.n), - emqx_ds_builtin_metrics:inc_egress_bytes(S#s.metrics_id, S#s.n_bytes), + emqx_ds_builtin_metrics:inc_egress_batches(Metrics), + emqx_ds_builtin_metrics:inc_egress_messages(Metrics, S#s.n), + emqx_ds_builtin_metrics:inc_egress_bytes(Metrics, S#s.n_bytes), lists:foreach(fun(From) -> gen_server:reply(From, ok) end, Replies), ?tp( emqx_ds_replication_layer_egress_flush, From 75b092bf0ed2d65e1ed2687390c95154b218b3fc Mon Sep 17 00:00:00 2001 From: ieQu1 <99872536+ieQu1@users.noreply.github.com> Date: Fri, 22 Mar 2024 22:30:28 +0100 Subject: [PATCH 047/234] fix(ds): Actually retry sending batch --- .../src/emqx_ds_builtin_db_sup.erl | 10 +- .../src/emqx_ds_builtin_metrics.erl | 132 +++++++- .../src/emqx_ds_replication_layer_egress.erl | 286 +++++++++++------- .../src/emqx_ds_storage_layer.erl | 3 + .../test/emqx_ds_SUITE.erl | 103 ++++++- .../test/emqx_ds_test_helpers.erl | 11 +- apps/emqx_prometheus/src/emqx_prometheus.erl | 26 +- 7 files changed, 411 insertions(+), 160 deletions(-) diff --git a/apps/emqx_durable_storage/src/emqx_ds_builtin_db_sup.erl b/apps/emqx_durable_storage/src/emqx_ds_builtin_db_sup.erl index 79e2f6120..a697b9276 100644 --- a/apps/emqx_durable_storage/src/emqx_ds_builtin_db_sup.erl +++ b/apps/emqx_durable_storage/src/emqx_ds_builtin_db_sup.erl @@ -31,7 +31,7 @@ ensure_shard/1, ensure_egress/1 ]). --export([which_shards/1]). +-export([which_dbs/0, which_shards/1]). %% behaviour callbacks: -export([init/1]). @@ -104,6 +104,13 @@ ensure_egress(Shard) -> which_shards(DB) -> supervisor:which_children(?via(#?shards_sup{db = DB})). +%% @doc Return the list of builtin DS databases that are currently +%% active on the node. +-spec which_dbs() -> [emqx_ds:db()]. +which_dbs() -> + Key = {n, l, #?db_sup{_ = '_', db = '$1'}}, + gproc:select({local, names}, [{{Key, '_', '_'}, [], ['$1']}]). + %%================================================================================ %% behaviour callbacks %%================================================================================ @@ -111,6 +118,7 @@ which_shards(DB) -> init({#?db_sup{db = DB}, DefaultOpts}) -> %% Spec for the top-level supervisor for the database: logger:notice("Starting DS DB ~p", [DB]), + emqx_ds_builtin_metrics:init_for_db(DB), Opts = emqx_ds_replication_layer_meta:open_db(DB, DefaultOpts), ok = start_ra_system(DB, Opts), Children = [ diff --git a/apps/emqx_durable_storage/src/emqx_ds_builtin_metrics.erl b/apps/emqx_durable_storage/src/emqx_ds_builtin_metrics.erl index 8075238b3..f0eac9652 100644 --- a/apps/emqx_durable_storage/src/emqx_ds_builtin_metrics.erl +++ b/apps/emqx_durable_storage/src/emqx_ds_builtin_metrics.erl @@ -15,11 +15,16 @@ %%-------------------------------------------------------------------- -module(emqx_ds_builtin_metrics). -%% API: +%% DS-facing API: -export([child_spec/0, init_for_db/1, shard_metric_id/2, init_for_shard/1]). + +%% Prometheus-facing API: +-export([prometheus_meta/0, prometheus_collect/1]). + -export([ inc_egress_batches/1, inc_egress_batches_retry/1, + inc_egress_batches_failed/1, inc_egress_messages/2, inc_egress_bytes/2, observe_egress_flush_time/2 @@ -42,11 +47,12 @@ -define(DB_METRICS, []). -define(SHARD_METRICS, [ - 'egress.batches', - 'egress.batches.retry', - 'egress.messages', - 'egress.bytes', - {slide, 'egress.flush_time'} + {counter, 'emqx_ds_egress_batches'}, + {counter, 'emqx_ds_egress_batches_retry'}, + {counter, 'emqx_ds_egress_batches_failed'}, + {counter, 'emqx_ds_egress_messages'}, + {counter, 'emqx_ds_egress_bytes'}, + {slide, 'emqx_ds_egress_flush_time'} ]). -type shard_metrics_id() :: binary(). @@ -59,14 +65,16 @@ child_spec() -> emqx_metrics_worker:child_spec(?WORKER). +%% @doc Initialize metrics that are global for a DS database -spec init_for_db(emqx_ds:db()) -> ok. -init_for_db(DB) -> - emqx_metrics_worker:create_metrics(?WORKER, DB, ?DB_METRICS, []). +init_for_db(_DB) -> + ok. -spec shard_metric_id(emqx_ds:db(), emqx_ds_replication_layer:shard_id()) -> shard_metrics_id(). shard_metric_id(DB, ShardId) -> iolist_to_binary([atom_to_list(DB), $/, ShardId]). +%% @doc Initialize metrics that are specific for the shard. -spec init_for_shard(shard_metrics_id()) -> ok. init_for_shard(ShardId) -> emqx_metrics_worker:create_metrics(?WORKER, ShardId, ?SHARD_METRICS, []). @@ -74,28 +82,124 @@ init_for_shard(ShardId) -> %% @doc Increase the number of successfully flushed batches -spec inc_egress_batches(shard_metrics_id()) -> ok. inc_egress_batches(Id) -> - emqx_metrics_worker:inc(?WORKER, Id, 'egress.batches'). + emqx_metrics_worker:inc(?WORKER, Id, 'emqx_ds_egress_batches'). %% @doc Increase the number of time the egress worker had to retry %% flushing the batch -spec inc_egress_batches_retry(shard_metrics_id()) -> ok. inc_egress_batches_retry(Id) -> - emqx_metrics_worker:inc(?WORKER, Id, 'egress.batches.retry'). + emqx_metrics_worker:inc(?WORKER, Id, 'emqx_ds_egress_batches_retry'). + +%% @doc Increase the number of time the egress worker encountered an +%% unrecoverable error while trying to flush the batch +-spec inc_egress_batches_failed(shard_metrics_id()) -> ok. +inc_egress_batches_failed(Id) -> + emqx_metrics_worker:inc(?WORKER, Id, 'emqx_ds_egress_batches_failed'). %% @doc Increase the number of messages successfully saved to the shard -spec inc_egress_messages(shard_metrics_id(), non_neg_integer()) -> ok. inc_egress_messages(Id, NMessages) -> - emqx_metrics_worker:inc(?WORKER, Id, 'egress.messages', NMessages). + emqx_metrics_worker:inc(?WORKER, Id, 'emqx_ds_egress_messages', NMessages). %% @doc Increase the number of messages successfully saved to the shard -spec inc_egress_bytes(shard_metrics_id(), non_neg_integer()) -> ok. inc_egress_bytes(Id, NMessages) -> - emqx_metrics_worker:inc(?WORKER, Id, 'egress.bytes', NMessages). + emqx_metrics_worker:inc(?WORKER, Id, 'emqx_ds_egress_bytes', NMessages). -%% @doc Add a sample of time spent flushing the egress to the Raft log (in microseconds) +%% @doc Add a sample of elapsed time spent flushing the egress to the +%% Raft log (in microseconds) -spec observe_egress_flush_time(shard_metrics_id(), non_neg_integer()) -> ok. observe_egress_flush_time(Id, FlushTime) -> - emqx_metrics_worker:observe(?WORKER, Id, 'egress.flush_time', FlushTime). + emqx_metrics_worker:observe(?WORKER, Id, 'emqx_ds_egress_flush_time', FlushTime). + +prometheus_meta() -> + lists:map( + fun + ({counter, A}) -> + {A, counter, A}; + ({slide, A}) -> + {A, counter, A} + end, + ?SHARD_METRICS + ). + +prometheus_collect(NodeOrAggr) -> + prometheus_per_shard(NodeOrAggr). + +%% This function returns the data in the following format: +%% ``` +%% #{emqx_ds_egress_batches => +%% [{[{db,emqx_persistent_message},{shard,<<"1">>}],99408}, +%% {[{db,emqx_persistent_message},{shard,<<"0">>}],99409}], +%% emqx_ds_egress_batches_retry => +%% [{[{db,emqx_persistent_message},{shard,<<"1">>}],0}, +%% {[{db,emqx_persistent_message},{shard,<<"0">>}],0}], +%% emqx_ds_egress_messages => +%% ... +%% } +%% ''' +%% +%% If `NodeOrAggr' = `node' then node name is appended to the list of +%% labels. +prometheus_per_shard(NodeOrAggr) -> + lists:foldl( + fun(DB, Acc0) -> + lists:foldl( + fun(Shard, Acc) -> + prometheus_per_shard(NodeOrAggr, DB, Shard, Acc) + end, + Acc0, + emqx_ds_replication_layer_meta:shards(DB) + ) + end, + #{}, + emqx_ds_builtin_db_sup:which_dbs() + ). + +prometheus_per_shard(NodeOrAggr, DB, Shard, Acc0) -> + Labels = [ + {db, DB}, + {shard, Shard} + | case NodeOrAggr of + node -> []; + _ -> [{node, node()}] + end + ], + #{counters := CC, slides := SS} = emqx_metrics_worker:get_metrics( + ?WORKER, shard_metric_id(DB, Shard) + ), + %% Collect counters: + Acc1 = maps:fold( + fun(MetricId, Value, Acc1) -> + append_to_key(MetricId, {Labels, Value}, Acc1) + end, + Acc0, + CC + ), + %% Collect slides: + maps:fold( + fun(MetricId, Value, Acc2) -> + Acc3 = append_to_key(MetricId, slide_value(current, Value, Labels), Acc2), + append_to_key(MetricId, slide_value(last5m, Value, Labels), Acc3) + end, + Acc1, + SS + ). + +-spec append_to_key(K, V, #{K => [V]}) -> #{K => [V]}. +append_to_key(Key, Value, Map) -> + maps:update_with( + Key, + fun(L) -> + [Value | L] + end, + [Value], + Map + ). + +slide_value(Interval, Value, Labels0) -> + Labels = [{interval, Interval} | Labels0], + {Labels, maps:get(Interval, Value, 0)}. %%================================================================================ %% Internal functions diff --git a/apps/emqx_durable_storage/src/emqx_ds_replication_layer_egress.erl b/apps/emqx_durable_storage/src/emqx_ds_replication_layer_egress.erl index 9651c029e..667e1daa4 100644 --- a/apps/emqx_durable_storage/src/emqx_ds_replication_layer_egress.erl +++ b/apps/emqx_durable_storage/src/emqx_ds_replication_layer_egress.erl @@ -51,13 +51,10 @@ -define(flush, flush). -record(enqueue_req, { - message :: emqx_types:message(), - sync :: boolean(), - payload_bytes :: non_neg_integer() -}). --record(enqueue_atomic_req, { - batch :: [emqx_types:message()], + messages :: [emqx_types:message()], sync :: boolean(), + atomic :: boolean(), + n_messages :: non_neg_integer(), payload_bytes :: non_neg_integer() }). @@ -70,53 +67,33 @@ start_link(DB, Shard) -> gen_server:start_link(?via(DB, Shard), ?MODULE, [DB, Shard], []). -spec store_batch(emqx_ds:db(), [emqx_types:message()], emqx_ds:message_store_opts()) -> - ok. + emqx_ds:store_batch_result(). store_batch(DB, Messages, Opts) -> Sync = maps:get(sync, Opts, true), - case maps:get(atomic, Opts, false) of - false -> - lists:foreach( - fun(Message) -> - Shard = emqx_ds_replication_layer:shard_of_message(DB, Message, clientid), - gen_server:call( - ?via(DB, Shard), - #enqueue_req{ - message = Message, - sync = Sync, - payload_bytes = payload_size(Message) - }, - infinity - ) - end, - Messages + Atomic = maps:get(atomic, Opts, false), + %% Usually we expect all messages in the batch to go into the + %% single shard, so this function is optimized for the happy case. + case shards_of_batch(DB, Messages) of + [{Shard, {NMsgs, NBytes}}] -> + %% Happy case: + gen_server:call( + ?via(DB, Shard), + #enqueue_req{ + messages = Messages, + sync = Sync, + atomic = Atomic, + n_messages = NMsgs, + payload_bytes = NBytes + }, + infinity ); - true -> - maps:foreach( - fun(Shard, Batch) -> - PayloadBytes = lists:foldl( - fun(Msg, Acc) -> - Acc + payload_size(Msg) - end, - 0, - Batch - ), - gen_server:call( - ?via(DB, Shard), - #enqueue_atomic_req{ - batch = Batch, - sync = Sync, - payload_bytes = PayloadBytes - }, - infinity - ) - end, - maps:groups_from_list( - fun(Message) -> - emqx_ds_replication_layer:shard_of_message(DB, Message, clientid) - end, - Messages - ) - ) + [_, _ | _] when Atomic -> + %% It's impossible to commit a batch to multiple shards + %% atomically + {error, unrecoverable, atomic_commit_to_multiple_shards}; + _Shards -> + %% Use a slower implementation for the unlikely case: + repackage_messages(DB, Messages, Sync, Atomic) end. %%================================================================================ @@ -129,7 +106,7 @@ store_batch(DB, Messages, Opts) -> metrics_id :: emqx_ds_builtin_metrics:shard_metrics_id(), n = 0 :: non_neg_integer(), n_bytes = 0 :: non_neg_integer(), - tref :: reference(), + tref :: undefined | reference(), queue :: queue:queue(emqx_types:message()), pending_replies = [] :: [gen_server:from()] }). @@ -143,16 +120,18 @@ init([DB, Shard]) -> db = DB, shard = Shard, metrics_id = MetricsId, - tref = start_timer(), queue = queue:new() }, - {ok, S}. + {ok, start_timer(S)}. -handle_call(#enqueue_req{message = Msg, sync = Sync, payload_bytes = NBytes}, From, S) -> - do_enqueue(From, Sync, Msg, NBytes, S); -handle_call(#enqueue_atomic_req{batch = Batch, sync = Sync, payload_bytes = NBytes}, From, S) -> - Len = length(Batch), - do_enqueue(From, Sync, {atomic, Len, Batch}, NBytes, S); +handle_call( + #enqueue_req{ + messages = Msgs, sync = Sync, atomic = Atomic, n_messages = NMsgs, payload_bytes = NBytes + }, + From, + S +) -> + {noreply, enqueue(From, Sync, Atomic, Msgs, NMsgs, NBytes, S)}; handle_call(_Call, _From, S) -> {reply, {error, unknown_call}, S}. @@ -160,7 +139,7 @@ handle_cast(_Cast, S) -> {noreply, S}. handle_info(?flush, S) -> - {noreply, do_flush(S)}; + {noreply, flush(S)}; handle_info(_Info, S) -> {noreply, S}. @@ -175,9 +154,60 @@ terminate(_Reason, _S) -> %% Internal functions %%================================================================================ +enqueue( + From, + Sync, + Atomic, + Msgs, + BatchSize, + BatchBytes, + S0 = #s{n = NMsgs0, n_bytes = NBytes0, queue = Q0, pending_replies = Replies0} +) -> + %% At this point we don't split the batches, even when they aren't + %% atomic. It wouldn't win us anything in terms of memory, and + %% EMQX currently feeds data to DS in very small batches, so + %% granularity should be fine enough. + NMax = application:get_env(emqx_durable_storage, egress_batch_size, 1000), + NBytesMax = application:get_env(emqx_durable_storage, egress_batch_bytes, infinity), + NMsgs = NMsgs0 + BatchSize, + NBytes = NBytes0 + BatchBytes, + case (NMsgs >= NMax orelse NBytes >= NBytesMax) andalso (NMsgs0 > 0) of + true -> + %% Adding this batch would cause buffer to overflow. Flush + %% it now, and retry: + cancel_timer(S0), + S1 = flush(S0), + enqueue(From, Sync, Atomic, Msgs, BatchSize, BatchBytes, S1); + false -> + %% The buffer is empty, we enqueue the atomic batch in its + %% entirety: + Q1 = lists:foldl(fun queue:in/2, Q0, Msgs), + Replies = + case Sync of + true -> + [From | Replies0]; + false -> + gen_server:reply(From, ok), + Replies0 + end, + S1 = S0#s{n = NMsgs, n_bytes = NBytes, queue = Q1, pending_replies = Replies}, + case NMsgs >= NMax orelse NBytes >= NBytes of + true -> + cancel_timer(S1), + flush(S1); + false -> + S1 + end + end. + -define(COOLDOWN_MIN, 1000). -define(COOLDOWN_MAX, 5000). +flush(S) -> + start_timer(do_flush(S)). + +do_flush(S0 = #s{n = 0}) -> + S0; do_flush( S = #s{queue = Q, pending_replies = Replies, db = DB, shard = Shard, metrics_id = Metrics} ) -> @@ -202,73 +232,103 @@ do_flush( n = 0, n_bytes = 0, queue = queue:new(), - pending_replies = [], - tref = start_timer() + pending_replies = [] }; - Error -> - emqx_ds_builtin_metrics:inc_egress_batches_retry(S#s.metrics_id), + {error, recoverable, Reason} -> + %% Retry sending the batch: + emqx_ds_builtin_metrics:inc_egress_batches_retry(Metrics), erlang:garbage_collect(), + %% We block the gen_server until the next retry. + BlockTime = ?COOLDOWN_MIN + rand:uniform(?COOLDOWN_MAX - ?COOLDOWN_MIN), + timer:sleep(BlockTime), ?tp( warning, emqx_ds_replication_layer_egress_flush_failed, - #{db => DB, shard => Shard, reason => Error} + #{db => DB, shard => Shard, reason => Reason} ), - Cooldown = ?COOLDOWN_MIN + rand:uniform(?COOLDOWN_MAX - ?COOLDOWN_MIN), + S; + Err = {error, unrecoverable, _} -> + emqx_ds_builtin_metrics:inc_egress_batches_failed(Metrics), + lists:foreach(fun(From) -> gen_server:reply(From, Err) end, Replies), + erlang:garbage_collect(), S#s{ - tref = start_timer(Cooldown) + n = 0, + n_bytes = 0, + queue = queue:new(), + pending_replies = [] } end. -do_enqueue( - From, - Sync, - MsgOrBatch, - BatchBytes, - S0 = #s{n = N, n_bytes = NBytes0, queue = Q0, pending_replies = Replies} -) -> - NBytes = NBytes0 + BatchBytes, - NMax = application:get_env(emqx_durable_storage, egress_batch_size, 1000), - S1 = - case MsgOrBatch of - {atomic, NumMsgs, Msgs} -> - Q = lists:foldl(fun queue:in/2, Q0, Msgs), - S0#s{n = N + NumMsgs, n_bytes = NBytes, queue = Q}; - Msg -> - S0#s{n = N + 1, n_bytes = NBytes, queue = queue:in(Msg, Q0)} - end, - %% TODO: later we may want to delay the reply until the message is - %% replicated, but it requies changes to the PUBACK/PUBREC flow to - %% allow for async replies. For now, we ack when the message is - %% _buffered_ rather than stored. - %% - %% Otherwise, the client would freeze for at least flush interval, - %% or until the buffer is filled. - S2 = - case Sync of - true -> - S1#s{pending_replies = [From | Replies]}; - false -> - gen_server:reply(From, ok), - S1 - end, - S = - case N >= NMax of - true -> - _ = erlang:cancel_timer(S2#s.tref), - do_flush(S2); - false -> - S2 - end, - %% TODO: add a backpressure mechanism for the server to avoid - %% building a long message queue. - {noreply, S}. +-spec shards_of_batch(emqx_ds:db(), [emqx_types:message()]) -> + [{emqx_ds_replication_layer:shard_id(), {NMessages, NBytes}}] +when + NMessages :: non_neg_integer(), + NBytes :: non_neg_integer(). +shards_of_batch(DB, Messages) -> + maps:to_list( + lists:foldl( + fun(Message, Acc) -> + %% TODO: sharding strategy must be part of the DS DB schema: + Shard = emqx_ds_replication_layer:shard_of_message(DB, Message, clientid), + Size = payload_size(Message), + maps:update_with( + Shard, + fun({N, S}) -> + {N + 1, S + Size} + end, + {1, Size}, + Acc + ) + end, + #{}, + Messages + ) + ). -start_timer() -> +repackage_messages(DB, Messages, Sync, Atomic) -> + Batches = lists:foldl( + fun(Message, Acc) -> + Shard = emqx_ds_replication_layer:shard_of_message(DB, Message, clientid), + Size = payload_size(Message), + maps:update_with( + Shard, + fun({N, S, Msgs}) -> + {N + 1, S + Size, [Message | Msgs]} + end, + {1, Size, [Message]}, + Acc + ) + end, + #{}, + Messages + ), + maps:foreach( + fun(Shard, {NMsgs, ByteSize, RevMessages}) -> + gen_server:call( + ?via(DB, Shard), + #enqueue_req{ + messages = lists:reverse(RevMessages), + sync = Sync, + atomic = Atomic, + n_messages = NMsgs, + payload_bytes = ByteSize + }, + infinity + ) + end, + Batches + ). + +start_timer(S) -> Interval = application:get_env(emqx_durable_storage, egress_flush_interval, 100), - start_timer(Interval). + Tref = erlang:send_after(Interval, self(), ?flush), + S#s{tref = Tref}. -start_timer(Interval) -> - erlang:send_after(Interval, self(), ?flush). +cancel_timer(#s{tref = undefined}) -> + ok; +cancel_timer(#s{tref = TRef}) -> + _ = erlang:cancel_timer(TRef), + ok. %% @doc Return approximate size of the MQTT message (it doesn't take %% all things into account, for example headers and extras) diff --git a/apps/emqx_durable_storage/src/emqx_ds_storage_layer.erl b/apps/emqx_durable_storage/src/emqx_ds_storage_layer.erl index 6b85328b6..cbc0e9abf 100644 --- a/apps/emqx_durable_storage/src/emqx_ds_storage_layer.erl +++ b/apps/emqx_durable_storage/src/emqx_ds_storage_layer.erl @@ -254,6 +254,9 @@ drop_shard(Shard) -> store_batch(Shard, Messages = [{Time, _Msg} | _], Options) -> %% NOTE %% We assume that batches do not span generations. Callers should enforce this. + ?tp(emqx_ds_storage_layer_store_batch, #{ + shard => Shard, messages => Messages, options => Options + }), #{module := Mod, data := GenData} = generation_at(Shard, Time), Mod:store_batch(Shard, GenData, Messages, Options); store_batch(_Shard, [], _Options) -> diff --git a/apps/emqx_durable_storage/test/emqx_ds_SUITE.erl b/apps/emqx_durable_storage/test/emqx_ds_SUITE.erl index 3df16dc1c..33988a974 100644 --- a/apps/emqx_durable_storage/test/emqx_ds_SUITE.erl +++ b/apps/emqx_durable_storage/test/emqx_ds_SUITE.erl @@ -322,17 +322,10 @@ t_09_atomic_store_batch(_Config) -> sync => true }) ), - - ok + {ok, Flush} = ?block_until(#{?snk_kind := emqx_ds_replication_layer_egress_flush}), + ?assertMatch(#{batch := [_, _, _]}, Flush) end, - fun(Trace) -> - %% Must contain exactly one flush with all messages. - ?assertMatch( - [#{batch := [_, _, _]}], - ?of_kind(emqx_ds_replication_layer_egress_flush, Trace) - ), - ok - end + [] ), ok. @@ -355,14 +348,15 @@ t_10_non_atomic_store_batch(_Config) -> sync => true }) ), - - ok + timer:sleep(1000) end, fun(Trace) -> %% Should contain one flush per message. + Batches = ?projection(batch, ?of_kind(emqx_ds_replication_layer_egress_flush, Trace)), + ?assertMatch([_], Batches), ?assertMatch( - [#{batch := [_]}, #{batch := [_]}, #{batch := [_]}], - ?of_kind(emqx_ds_replication_layer_egress_flush, Trace) + [_, _, _], + lists:append(Batches) ), ok end @@ -681,10 +675,86 @@ t_error_mapping_replication_layer(_Config) -> length([error || {error, _, _} <- Results2]) > 0, Results2 ), - - snabbkaffe:stop(), meck:unload(). +%% This test suite verifies the behavior of `store_batch' operation +%% when the underlying code experiences recoverable or unrecoverable +%% problems. +t_store_batch_fail(_Config) -> + ?check_trace( + #{timetrap => 15_000}, + try + meck:new(emqx_ds_replication_layer, [passthrough, no_history]), + DB = ?FUNCTION_NAME, + ?assertMatch(ok, emqx_ds:open_db(DB, (opts())#{n_shards => 2})), + %% Success: + Batch1 = [ + message(<<"C1">>, <<"foo/bar">>, <<"1">>, 1), + message(<<"C1">>, <<"foo/bar">>, <<"2">>, 1) + ], + ?assertMatch(ok, emqx_ds:store_batch(DB, Batch1, #{sync => true})), + %% Inject unrecoverable error: + meck:expect(emqx_ds_replication_layer, ra_store_batch, fun(_DB, _Shard, _Messages) -> + {error, unrecoverable, mock} + end), + Batch2 = [ + message(<<"C1">>, <<"foo/bar">>, <<"3">>, 1), + message(<<"C1">>, <<"foo/bar">>, <<"4">>, 1) + ], + ?assertMatch( + {error, unrecoverable, mock}, emqx_ds:store_batch(DB, Batch2, #{sync => true}) + ), + %% Inject a recoverable error: + Batch3 = [ + message(<<"C1">>, <<"foo/bar">>, <<"5">>, 2), + message(<<"C2">>, <<"foo/bar">>, <<"6">>, 2), + message(<<"C1">>, <<"foo/bar">>, <<"7">>, 3), + message(<<"C2">>, <<"foo/bar">>, <<"8">>, 3) + ], + meck:expect(emqx_ds_replication_layer, ra_store_batch, fun(DB, Shard, Messages) -> + try + ?tp(store_batch, #{messages => Messages}), + meck:passthrough([DB, Shard, Messages]) + catch + _:_ -> + {error, recoverable, mock} + end + end), + ?inject_crash(#{?snk_kind := store_batch}, snabbkaffe_nemesis:recover_after(3)), + ?assertMatch(ok, emqx_ds:store_batch(DB, Batch3, #{sync => true})), + lists:sort(emqx_ds_test_helpers:consume_per_stream(DB, ['#'], 1)) + after + meck:unload() + end, + [ + {"number of successfull flushes after retry", fun(Trace) -> + ?assertMatch([_, _], ?of_kind(store_batch, Trace)) + end}, + {"number of retries", fun(Trace) -> + ?assertMatch([_, _, _], ?of_kind(snabbkaffe_crash, Trace)) + end}, + {"message ordering", fun(StoredMessages, _Trace) -> + [{_, Stream1}, {_, Stream2}] = StoredMessages, + ?assertMatch( + [ + #message{payload = <<"1">>}, + #message{payload = <<"2">>}, + #message{payload = <<"5">>}, + #message{payload = <<"7">>} + ], + Stream1 + ), + ?assertMatch( + [ + #message{payload = <<"6">>}, + #message{payload = <<"8">>} + ], + Stream2 + ) + end} + ] + ). + update_data_set() -> [ [ @@ -748,6 +818,7 @@ init_per_testcase(_TC, Config) -> Config. end_per_testcase(_TC, _Config) -> + snabbkaffe:stop(), ok = application:stop(emqx_durable_storage), mria:stop(), _ = mnesia:delete_schema([node()]), diff --git a/apps/emqx_durable_storage/test/emqx_ds_test_helpers.erl b/apps/emqx_durable_storage/test/emqx_ds_test_helpers.erl index 44c45248b..4af1e9791 100644 --- a/apps/emqx_durable_storage/test/emqx_ds_test_helpers.erl +++ b/apps/emqx_durable_storage/test/emqx_ds_test_helpers.erl @@ -63,9 +63,16 @@ consume(DB, TopicFilter) -> consume(DB, TopicFilter, 0). consume(DB, TopicFilter, StartTime) -> - Streams = emqx_ds:get_streams(DB, TopicFilter, StartTime), lists:flatmap( - fun({_Rank, Stream}) -> consume_stream(DB, Stream, TopicFilter, StartTime) end, + fun({_Stream, Msgs}) -> + Msgs + end, + consume_per_stream(DB, TopicFilter, StartTime)). + +consume_per_stream(DB, TopicFilter, StartTime) -> + Streams = emqx_ds:get_streams(DB, TopicFilter, StartTime), + lists:map( + fun({_Rank, Stream}) -> {Stream, consume_stream(DB, Stream, TopicFilter, StartTime)} end, Streams ). diff --git a/apps/emqx_prometheus/src/emqx_prometheus.erl b/apps/emqx_prometheus/src/emqx_prometheus.erl index 813f91dbd..044e701e3 100644 --- a/apps/emqx_prometheus/src/emqx_prometheus.erl +++ b/apps/emqx_prometheus/src/emqx_prometheus.erl @@ -212,7 +212,9 @@ collect_mf(?PROMETHEUS_DEFAULT_REGISTRY, Callback) -> ok = add_collect_family(Callback, cert_metric_meta(), ?MG(cert_data, RawData)), ok = add_collect_family(Callback, mria_metric_meta(), ?MG(mria_data, RawData)), - ok = add_collect_family(Callback, ds_metric_meta(), ?MG(ds_data, RawData)), + ok = add_collect_family( + Callback, emqx_ds_builtin_metrics:prometheus_meta(), ?MG(ds_data, RawData) + ), ok = maybe_license_add_collect_family(Callback, RawData), ok; collect_mf(_Registry, _Callback) -> @@ -265,6 +267,7 @@ fetch_from_local_node(Mode) -> emqx_olp_data => emqx_metric_data(olp_metric_meta(), Mode), emqx_acl_data => emqx_metric_data(acl_metric_meta(), Mode), emqx_authn_data => emqx_metric_data(authn_metric_meta(), Mode), + ds_data => emqx_ds_builtin_metrics:prometheus_collect(Mode), mria_data => mria_data(Mode) }}. @@ -481,7 +484,14 @@ emqx_collect(K = emqx_mria_lag, D) -> gauge_metrics(?MG(K, D, [])); emqx_collect(K = emqx_mria_bootstrap_time, D) -> gauge_metrics(?MG(K, D, [])); emqx_collect(K = emqx_mria_bootstrap_num_keys, D) -> gauge_metrics(?MG(K, D, [])); emqx_collect(K = emqx_mria_message_queue_len, D) -> gauge_metrics(?MG(K, D, [])); -emqx_collect(K = emqx_mria_replayq_len, D) -> gauge_metrics(?MG(K, D, [])). +emqx_collect(K = emqx_mria_replayq_len, D) -> gauge_metrics(?MG(K, D, [])); +%% DS +emqx_collect(K = emqx_ds_egress_batches, D) -> counter_metrics(?MG(K, D, [])); +emqx_collect(K = emqx_ds_egress_batches_retry, D) -> counter_metrics(?MG(K, D, [])); +emqx_collect(K = emqx_ds_egress_batches_failed, D) -> counter_metrics(?MG(K, D, [])); +emqx_collect(K = emqx_ds_egress_messages, D) -> counter_metrics(?MG(K, D, [])); +emqx_collect(K = emqx_ds_egress_bytes, D) -> counter_metrics(?MG(K, D, [])); +emqx_collect(K = emqx_ds_egress_flush_time, D) -> gauge_metrics(?MG(K, D, [])). %%-------------------------------------------------------------------- %% Indicators @@ -1012,18 +1022,6 @@ catch_all(DataFun) -> _:_ -> undefined end. -%%======================================== -%% Durable storge -%%======================================== - -ds_metric_meta() -> - [ - {emqx_ds_egress_batches, counter, 'egress.batches'}, - {emqx_ds_egress_batches_retry, counter, 'egress.batches.retry'}, - {emqx_ds_egress_messages, counter, 'egress.messages'}, - {emqx_ds_egress_bytes, counter, 'egress.bytes'} - ]. - %%-------------------------------------------------------------------- %% Collect functions %%-------------------------------------------------------------------- From b9ad241658f5cc283d35a39670f253433e27bc9b Mon Sep 17 00:00:00 2001 From: ieQu1 <99872536+ieQu1@users.noreply.github.com> Date: Sun, 31 Mar 2024 15:44:31 +0200 Subject: [PATCH 048/234] feat(sessds): Add metrics for the number of persisted messages --- apps/emqx/src/emqx_metrics.erl | 5 ++++- apps/emqx/src/emqx_persistent_message.erl | 1 + apps/emqx_dashboard/include/emqx_dashboard.hrl | 6 ++++-- apps/emqx_dashboard/src/emqx_dashboard_monitor.erl | 3 ++- apps/emqx_dashboard/src/emqx_dashboard_monitor_api.erl | 4 ++++ apps/emqx_prometheus/test/emqx_prometheus_SUITE.erl | 4 +++- 6 files changed, 18 insertions(+), 5 deletions(-) diff --git a/apps/emqx/src/emqx_metrics.erl b/apps/emqx/src/emqx_metrics.erl index 6b8b60209..13ac40c68 100644 --- a/apps/emqx/src/emqx_metrics.erl +++ b/apps/emqx/src/emqx_metrics.erl @@ -222,7 +222,9 @@ % Messages delivered {counter, 'messages.delivered'}, % Messages acked - {counter, 'messages.acked'} + {counter, 'messages.acked'}, + % Messages persistently stored + {counter, 'messages.persisted'} ] ). @@ -718,4 +720,5 @@ reserved_idx('overload_protection.gc') -> 403; reserved_idx('overload_protection.new_conn') -> 404; reserved_idx('messages.validation_succeeded') -> 405; reserved_idx('messages.validation_failed') -> 406; +reserved_idx('messages.persisted') -> 407; reserved_idx(_) -> undefined. diff --git a/apps/emqx/src/emqx_persistent_message.erl b/apps/emqx/src/emqx_persistent_message.erl index e3fa23296..10497216d 100644 --- a/apps/emqx/src/emqx_persistent_message.erl +++ b/apps/emqx/src/emqx_persistent_message.erl @@ -114,6 +114,7 @@ needs_persistence(Msg) -> -spec store_message(emqx_types:message()) -> emqx_ds:store_batch_result(). store_message(Msg) -> + emqx_metrics:inc('messages.persisted'), emqx_ds:store_batch(?PERSISTENT_MESSAGE_DB, [Msg], #{sync => false}). has_subscribers(#message{topic = Topic}) -> diff --git a/apps/emqx_dashboard/include/emqx_dashboard.hrl b/apps/emqx_dashboard/include/emqx_dashboard.hrl index 13458b4b4..40f2ba2b3 100644 --- a/apps/emqx_dashboard/include/emqx_dashboard.hrl +++ b/apps/emqx_dashboard/include/emqx_dashboard.hrl @@ -67,7 +67,8 @@ %, sent_bytes validation_succeeded, validation_failed, - dropped + dropped, + persisted ]). -define(GAUGE_SAMPLER_LIST, [ @@ -87,7 +88,8 @@ sent => sent_msg_rate, validation_succeeded => validation_succeeded_rate, validation_failed => validation_failed_rate, - dropped => dropped_msg_rate + dropped => dropped_msg_rate, + persisted => persisted_rate }). -define(CURRENT_SAMPLE_NON_RATE, diff --git a/apps/emqx_dashboard/src/emqx_dashboard_monitor.erl b/apps/emqx_dashboard/src/emqx_dashboard_monitor.erl index 6a9e868dd..fe0476e6d 100644 --- a/apps/emqx_dashboard/src/emqx_dashboard_monitor.erl +++ b/apps/emqx_dashboard/src/emqx_dashboard_monitor.erl @@ -428,7 +428,8 @@ stats(sent) -> emqx_metrics:val('messages.sent'); stats(sent_bytes) -> emqx_metrics:val('bytes.sent'); stats(validation_succeeded) -> emqx_metrics:val('messages.validation_succeeded'); stats(validation_failed) -> emqx_metrics:val('messages.validation_failed'); -stats(dropped) -> emqx_metrics:val('messages.dropped'). +stats(dropped) -> emqx_metrics:val('messages.dropped'); +stats(persisted) -> emqx_metrics:val('messages.persisted'). %% ------------------------------------------------------------------------------------------------- %% Retained && License Quota diff --git a/apps/emqx_dashboard/src/emqx_dashboard_monitor_api.erl b/apps/emqx_dashboard/src/emqx_dashboard_monitor_api.erl index 3ffadc1b2..1b6773b87 100644 --- a/apps/emqx_dashboard/src/emqx_dashboard_monitor_api.erl +++ b/apps/emqx_dashboard/src/emqx_dashboard_monitor_api.erl @@ -192,6 +192,8 @@ swagger_desc(validation_succeeded) -> swagger_desc_format("Message validations succeeded "); swagger_desc(validation_failed) -> swagger_desc_format("Message validations failed "); +swagger_desc(persisted) -> + swagger_desc_format("Messages saved to the durable storage "); swagger_desc(subscriptions) -> <<"Subscriptions at the time of sampling.", ?APPROXIMATE_DESC>>; swagger_desc(topics) -> @@ -218,6 +220,8 @@ swagger_desc(validation_succeeded_rate) -> swagger_desc_format("Message validations succeeded ", per); swagger_desc(validation_failed_rate) -> swagger_desc_format("Message validations failed ", per); +swagger_desc(persisted_rate) -> + swagger_desc_format("Messages saved to the durable storage ", per); swagger_desc(retained_msg_count) -> <<"Retained messages count at the time of sampling.", ?APPROXIMATE_DESC>>; swagger_desc(shared_subscriptions) -> diff --git a/apps/emqx_prometheus/test/emqx_prometheus_SUITE.erl b/apps/emqx_prometheus/test/emqx_prometheus_SUITE.erl index a2c2b96c2..841610985 100644 --- a/apps/emqx_prometheus/test/emqx_prometheus_SUITE.erl +++ b/apps/emqx_prometheus/test/emqx_prometheus_SUITE.erl @@ -89,7 +89,9 @@ init_per_group(new_config, Config) -> Apps = emqx_cth_suite:start( [ %% coverage olp metrics - {emqx, "overload_protection.enable = true"}, + {emqx, + "overload_protection.enable = true\n" + "session_persistence.enable = true"}, {emqx_license, "license.key = default"}, {emqx_prometheus, #{config => config(default)}} ], From f41e538526be8357cf73afc2a16be28670b54dd3 Mon Sep 17 00:00:00 2001 From: ieQu1 <99872536+ieQu1@users.noreply.github.com> Date: Sun, 31 Mar 2024 16:58:51 +0200 Subject: [PATCH 049/234] feat(sessds): Observe next time --- .../src/emqx_ds_builtin_metrics.erl | 20 +++++++++++++++++-- .../src/emqx_ds_replication_layer.erl | 6 +++++- 2 files changed, 23 insertions(+), 3 deletions(-) diff --git a/apps/emqx_durable_storage/src/emqx_ds_builtin_metrics.erl b/apps/emqx_durable_storage/src/emqx_ds_builtin_metrics.erl index f0eac9652..fc6de2861 100644 --- a/apps/emqx_durable_storage/src/emqx_ds_builtin_metrics.erl +++ b/apps/emqx_durable_storage/src/emqx_ds_builtin_metrics.erl @@ -27,7 +27,9 @@ inc_egress_batches_failed/1, inc_egress_messages/2, inc_egress_bytes/2, - observe_egress_flush_time/2 + + observe_egress_flush_time/2, + observe_next_time/3 ]). %% behavior callbacks: @@ -46,7 +48,7 @@ -define(DB_METRICS, []). --define(SHARD_METRICS, [ +-define(EGRESS_METRICS, [ {counter, 'emqx_ds_egress_batches'}, {counter, 'emqx_ds_egress_batches_retry'}, {counter, 'emqx_ds_egress_batches_failed'}, @@ -55,6 +57,12 @@ {slide, 'emqx_ds_egress_flush_time'} ]). +-define(INGRESS_METRICS, [ + {slide, 'emqx_ds_builtin_next_time'} +]). + +-define(SHARD_METRICS, ?EGRESS_METRICS ++ ?INGRESS_METRICS). + -type shard_metrics_id() :: binary(). %%================================================================================ @@ -112,6 +120,14 @@ inc_egress_bytes(Id, NMessages) -> observe_egress_flush_time(Id, FlushTime) -> emqx_metrics_worker:observe(?WORKER, Id, 'emqx_ds_egress_flush_time', FlushTime). +%% @doc Add a sample of elapsed time spent waiting for a +%% `emqx_ds_replication_layer:next' +-spec observe_next_time(emqx_ds:db(), emqx_ds_replication_layer:shard_id(), non_neg_integer()) -> + ok. +observe_next_time(DB, Shard, NextTime) -> + Id = shard_metric_id(DB, Shard), + emqx_metrics_worker:observe(?WORKER, Id, 'emqx_ds_builtin_next_time', NextTime). + prometheus_meta() -> lists:map( fun diff --git a/apps/emqx_durable_storage/src/emqx_ds_replication_layer.erl b/apps/emqx_durable_storage/src/emqx_ds_replication_layer.erl index 14c2268b8..2d4982af3 100644 --- a/apps/emqx_durable_storage/src/emqx_ds_replication_layer.erl +++ b/apps/emqx_durable_storage/src/emqx_ds_replication_layer.erl @@ -329,7 +329,11 @@ next(DB, Iter0, BatchSize) -> %% %% This kind of trickery should be probably done here in the %% replication layer. Or, perhaps, in the logic layer. - case ra_next(DB, Shard, StorageIter0, BatchSize) of + T0 = erlang:monotonic_time(microsecond), + Result = ra_next(DB, Shard, StorageIter0, BatchSize), + T1 = erlang:monotonic_time(microsecond), + emqx_ds_builtin_metrics:observe_next_time(DB, Shard, T1 - T0), + case Result of {ok, StorageIter, Batch} -> Iter = Iter0#{?enc := StorageIter}, {ok, Iter, Batch}; From b379f331de614d53b1e809cf3bff73c5f4bd1321 Mon Sep 17 00:00:00 2001 From: ieQu1 <99872536+ieQu1@users.noreply.github.com> Date: Sun, 31 Mar 2024 18:00:48 +0200 Subject: [PATCH 050/234] fix(sessds): Handle errors when storing messages --- apps/emqx/src/emqx_broker.erl | 8 +- apps/emqx/src/emqx_persistent_message.erl | 2 +- .../src/emqx_ds_builtin_metrics.erl | 102 ++++++++++++++---- .../src/emqx_ds_replication_layer.erl | 2 +- .../src/emqx_ds_storage_layer.erl | 6 +- apps/emqx_prometheus/src/emqx_prometheus.erl | 4 +- 6 files changed, 95 insertions(+), 29 deletions(-) diff --git a/apps/emqx/src/emqx_broker.erl b/apps/emqx/src/emqx_broker.erl index 1470b7d8b..ed29ea614 100644 --- a/apps/emqx/src/emqx_broker.erl +++ b/apps/emqx/src/emqx_broker.erl @@ -253,8 +253,12 @@ persist_publish(Msg) -> case emqx_persistent_message:persist(Msg) of ok -> [persisted]; - {_SkipOrError, _Reason} -> - % TODO: log errors? + {skipped, _} -> + []; + {error, Recoverable, Reason} -> + ?SLOG(debug, #{ + msg => "failed_to_persist_message", is_recoverable => Recoverable, reason => Reason + }), [] end. diff --git a/apps/emqx/src/emqx_persistent_message.erl b/apps/emqx/src/emqx_persistent_message.erl index 10497216d..c909c5c5f 100644 --- a/apps/emqx/src/emqx_persistent_message.erl +++ b/apps/emqx/src/emqx_persistent_message.erl @@ -98,7 +98,7 @@ pre_config_update(_Root, _NewConf, _OldConf) -> %%-------------------------------------------------------------------- -spec persist(emqx_types:message()) -> - ok | {skipped, _Reason} | {error, _TODO}. + emqx_ds:store_batch_result() | {skipped, needs_no_persistence}. persist(Msg) -> ?WHEN_ENABLED( case needs_persistence(Msg) andalso has_subscribers(Msg) of diff --git a/apps/emqx_durable_storage/src/emqx_ds_builtin_metrics.erl b/apps/emqx_durable_storage/src/emqx_ds_builtin_metrics.erl index fc6de2861..833e39211 100644 --- a/apps/emqx_durable_storage/src/emqx_ds_builtin_metrics.erl +++ b/apps/emqx_durable_storage/src/emqx_ds_builtin_metrics.erl @@ -29,7 +29,10 @@ inc_egress_bytes/2, observe_egress_flush_time/2, - observe_next_time/3 + + observe_store_batch_time/2, + + observe_next_time/2 ]). %% behavior callbacks: @@ -46,7 +49,15 @@ -define(WORKER, ?MODULE). --define(DB_METRICS, []). +-define(STORAGE_LAYER_METRICS, [ + {slide, 'emqx_ds_store_batch_time'} +]). + +-define(FETCH_METRICS, [ + {slide, 'emqx_ds_builtin_next_time'} +]). + +-define(DB_METRICS, ?STORAGE_LAYER_METRICS ++ ?FETCH_METRICS). -define(EGRESS_METRICS, [ {counter, 'emqx_ds_egress_batches'}, @@ -57,14 +68,12 @@ {slide, 'emqx_ds_egress_flush_time'} ]). --define(INGRESS_METRICS, [ - {slide, 'emqx_ds_builtin_next_time'} -]). - --define(SHARD_METRICS, ?EGRESS_METRICS ++ ?INGRESS_METRICS). +-define(SHARD_METRICS, ?EGRESS_METRICS). -type shard_metrics_id() :: binary(). +-elvis([{elvis_style, dont_repeat_yourself, disable}]). + %%================================================================================ %% API functions %%================================================================================ @@ -75,8 +84,8 @@ child_spec() -> %% @doc Initialize metrics that are global for a DS database -spec init_for_db(emqx_ds:db()) -> ok. -init_for_db(_DB) -> - ok. +init_for_db(DB) -> + emqx_metrics_worker:create_metrics(?WORKER, DB, ?DB_METRICS, []). -spec shard_metric_id(emqx_ds:db(), emqx_ds_replication_layer:shard_id()) -> shard_metrics_id(). shard_metric_id(DB, ShardId) -> @@ -90,43 +99,45 @@ init_for_shard(ShardId) -> %% @doc Increase the number of successfully flushed batches -spec inc_egress_batches(shard_metrics_id()) -> ok. inc_egress_batches(Id) -> - emqx_metrics_worker:inc(?WORKER, Id, 'emqx_ds_egress_batches'). + catch emqx_metrics_worker:inc(?WORKER, Id, 'emqx_ds_egress_batches'). %% @doc Increase the number of time the egress worker had to retry %% flushing the batch -spec inc_egress_batches_retry(shard_metrics_id()) -> ok. inc_egress_batches_retry(Id) -> - emqx_metrics_worker:inc(?WORKER, Id, 'emqx_ds_egress_batches_retry'). + catch emqx_metrics_worker:inc(?WORKER, Id, 'emqx_ds_egress_batches_retry'). %% @doc Increase the number of time the egress worker encountered an %% unrecoverable error while trying to flush the batch -spec inc_egress_batches_failed(shard_metrics_id()) -> ok. inc_egress_batches_failed(Id) -> - emqx_metrics_worker:inc(?WORKER, Id, 'emqx_ds_egress_batches_failed'). + catch emqx_metrics_worker:inc(?WORKER, Id, 'emqx_ds_egress_batches_failed'). %% @doc Increase the number of messages successfully saved to the shard -spec inc_egress_messages(shard_metrics_id(), non_neg_integer()) -> ok. inc_egress_messages(Id, NMessages) -> - emqx_metrics_worker:inc(?WORKER, Id, 'emqx_ds_egress_messages', NMessages). + catch emqx_metrics_worker:inc(?WORKER, Id, 'emqx_ds_egress_messages', NMessages). %% @doc Increase the number of messages successfully saved to the shard -spec inc_egress_bytes(shard_metrics_id(), non_neg_integer()) -> ok. inc_egress_bytes(Id, NMessages) -> - emqx_metrics_worker:inc(?WORKER, Id, 'emqx_ds_egress_bytes', NMessages). + catch emqx_metrics_worker:inc(?WORKER, Id, 'emqx_ds_egress_bytes', NMessages). %% @doc Add a sample of elapsed time spent flushing the egress to the %% Raft log (in microseconds) -spec observe_egress_flush_time(shard_metrics_id(), non_neg_integer()) -> ok. observe_egress_flush_time(Id, FlushTime) -> - emqx_metrics_worker:observe(?WORKER, Id, 'emqx_ds_egress_flush_time', FlushTime). + catch emqx_metrics_worker:observe(?WORKER, Id, 'emqx_ds_egress_flush_time', FlushTime). -%% @doc Add a sample of elapsed time spent waiting for a +-spec observe_store_batch_time(emqx_ds_storage_layer:shard_id(), non_neg_integer()) -> ok. +observe_store_batch_time({DB, _}, StoreTime) -> + catch emqx_metrics_worker:observe(?WORKER, DB, 'emqx_ds_store_batch_time', StoreTime). + +%% @doc Add a sample of elapsed time spent waiting for a batch %% `emqx_ds_replication_layer:next' --spec observe_next_time(emqx_ds:db(), emqx_ds_replication_layer:shard_id(), non_neg_integer()) -> - ok. -observe_next_time(DB, Shard, NextTime) -> - Id = shard_metric_id(DB, Shard), - emqx_metrics_worker:observe(?WORKER, Id, 'emqx_ds_builtin_next_time', NextTime). +-spec observe_next_time(emqx_ds:db(), non_neg_integer()) -> ok. +observe_next_time(DB, NextTime) -> + catch emqx_metrics_worker:observe(?WORKER, DB, 'emqx_ds_builtin_next_time', NextTime). prometheus_meta() -> lists:map( @@ -136,11 +147,56 @@ prometheus_meta() -> ({slide, A}) -> {A, counter, A} end, - ?SHARD_METRICS + ?DB_METRICS ++ ?SHARD_METRICS ). prometheus_collect(NodeOrAggr) -> - prometheus_per_shard(NodeOrAggr). + maps:merge(prometheus_per_db(NodeOrAggr), prometheus_per_shard(NodeOrAggr)). + +prometheus_per_db(NodeOrAggr) -> + lists:foldl( + fun(DB, Acc) -> + prometheus_per_db(NodeOrAggr, DB, Acc) + end, + #{}, + emqx_ds_builtin_db_sup:which_dbs() + ). + +%% This function returns the data in the following format: +%% ``` +%% #{emqx_ds_store_batch_time => +%% [{[{db, emqx_persistent_message}], 42}], +%% ... +%% ''' +%% +%% If `NodeOrAggr' = `node' then node name is appended to the list of +%% labels. +prometheus_per_db(NodeOrAggr, DB, Acc0) -> + Labels = [ + {db, DB} + | case NodeOrAggr of + node -> []; + _ -> [{node, node()}] + end + ], + #{counters := CC, slides := SS} = emqx_metrics_worker:get_metrics(?WORKER, DB), + %% Collect counters: + Acc1 = maps:fold( + fun(MetricId, Value, Acc1) -> + append_to_key(MetricId, {Labels, Value}, Acc1) + end, + Acc0, + CC + ), + %% Collect slides: + maps:fold( + fun(MetricId, Value, Acc2) -> + Acc3 = append_to_key(MetricId, slide_value(current, Value, Labels), Acc2), + append_to_key(MetricId, slide_value(last5m, Value, Labels), Acc3) + end, + Acc1, + SS + ). %% This function returns the data in the following format: %% ``` diff --git a/apps/emqx_durable_storage/src/emqx_ds_replication_layer.erl b/apps/emqx_durable_storage/src/emqx_ds_replication_layer.erl index 2d4982af3..f8478bb72 100644 --- a/apps/emqx_durable_storage/src/emqx_ds_replication_layer.erl +++ b/apps/emqx_durable_storage/src/emqx_ds_replication_layer.erl @@ -332,7 +332,7 @@ next(DB, Iter0, BatchSize) -> T0 = erlang:monotonic_time(microsecond), Result = ra_next(DB, Shard, StorageIter0, BatchSize), T1 = erlang:monotonic_time(microsecond), - emqx_ds_builtin_metrics:observe_next_time(DB, Shard, T1 - T0), + emqx_ds_builtin_metrics:observe_next_time(DB, T1 - T0), case Result of {ok, StorageIter, Batch} -> Iter = Iter0#{?enc := StorageIter}, diff --git a/apps/emqx_durable_storage/src/emqx_ds_storage_layer.erl b/apps/emqx_durable_storage/src/emqx_ds_storage_layer.erl index cbc0e9abf..28ce1d943 100644 --- a/apps/emqx_durable_storage/src/emqx_ds_storage_layer.erl +++ b/apps/emqx_durable_storage/src/emqx_ds_storage_layer.erl @@ -258,7 +258,11 @@ store_batch(Shard, Messages = [{Time, _Msg} | _], Options) -> shard => Shard, messages => Messages, options => Options }), #{module := Mod, data := GenData} = generation_at(Shard, Time), - Mod:store_batch(Shard, GenData, Messages, Options); + T0 = erlang:monotonic_time(microsecond), + Result = Mod:store_batch(Shard, GenData, Messages, Options), + T1 = erlang:monotonic_time(microsecond), + emqx_ds_builtin_metrics:observe_store_batch_time(Shard, T1 - T0), + Result; store_batch(_Shard, [], _Options) -> ok. diff --git a/apps/emqx_prometheus/src/emqx_prometheus.erl b/apps/emqx_prometheus/src/emqx_prometheus.erl index 044e701e3..0ac032824 100644 --- a/apps/emqx_prometheus/src/emqx_prometheus.erl +++ b/apps/emqx_prometheus/src/emqx_prometheus.erl @@ -491,7 +491,9 @@ emqx_collect(K = emqx_ds_egress_batches_retry, D) -> counter_metrics(?MG(K, D, [ emqx_collect(K = emqx_ds_egress_batches_failed, D) -> counter_metrics(?MG(K, D, [])); emqx_collect(K = emqx_ds_egress_messages, D) -> counter_metrics(?MG(K, D, [])); emqx_collect(K = emqx_ds_egress_bytes, D) -> counter_metrics(?MG(K, D, [])); -emqx_collect(K = emqx_ds_egress_flush_time, D) -> gauge_metrics(?MG(K, D, [])). +emqx_collect(K = emqx_ds_egress_flush_time, D) -> gauge_metrics(?MG(K, D, [])); +emqx_collect(K = emqx_ds_store_batch_time, D) -> gauge_metrics(?MG(K, D, [])); +emqx_collect(K = emqx_ds_builtin_next_time, D) -> gauge_metrics(?MG(K, D, [])). %%-------------------------------------------------------------------- %% Indicators From f14c253dea4b8af720b2fdb6fd0b02af617695d5 Mon Sep 17 00:00:00 2001 From: ieQu1 <99872536+ieQu1@users.noreply.github.com> Date: Mon, 1 Apr 2024 23:41:29 +0200 Subject: [PATCH 051/234] fix(prometheus): Don't add DS metrics when feature is disabled --- apps/emqx_prometheus/src/emqx_prometheus.erl | 25 +++++++++++++++---- .../test/emqx_prometheus_SUITE.erl | 4 +-- 2 files changed, 21 insertions(+), 8 deletions(-) diff --git a/apps/emqx_prometheus/src/emqx_prometheus.erl b/apps/emqx_prometheus/src/emqx_prometheus.erl index 0ac032824..2327a7263 100644 --- a/apps/emqx_prometheus/src/emqx_prometheus.erl +++ b/apps/emqx_prometheus/src/emqx_prometheus.erl @@ -212,14 +212,30 @@ collect_mf(?PROMETHEUS_DEFAULT_REGISTRY, Callback) -> ok = add_collect_family(Callback, cert_metric_meta(), ?MG(cert_data, RawData)), ok = add_collect_family(Callback, mria_metric_meta(), ?MG(mria_data, RawData)), - ok = add_collect_family( - Callback, emqx_ds_builtin_metrics:prometheus_meta(), ?MG(ds_data, RawData) - ), + ok = maybe_add_ds_collect_family(Callback, RawData), ok = maybe_license_add_collect_family(Callback, RawData), ok; collect_mf(_Registry, _Callback) -> ok. +maybe_add_ds_collect_family(Callback, RawData) -> + case emqx_persistent_message:is_persistence_enabled() of + true -> + add_collect_family( + Callback, emqx_ds_builtin_metrics:prometheus_meta(), ?MG(ds_data, RawData) + ); + false -> + ok + end. + +maybe_collect_ds_data(Mode) -> + case emqx_persistent_message:is_persistence_enabled() of + true -> + #{ds_data => emqx_ds_builtin_metrics:prometheus_collect(Mode)}; + false -> + #{} + end. + %% @private collect(<<"json">>) -> RawData = emqx_prometheus_cluster:raw_data(?MODULE, ?GET_PROM_DATA_MODE()), @@ -254,7 +270,7 @@ add_collect_family(Name, Data, Callback, Type) -> %% behaviour fetch_from_local_node(Mode) -> - {node(), #{ + {node(), (maybe_collect_ds_data(Mode))#{ stats_data => stats_data(Mode), vm_data => vm_data(Mode), cluster_data => cluster_data(Mode), @@ -267,7 +283,6 @@ fetch_from_local_node(Mode) -> emqx_olp_data => emqx_metric_data(olp_metric_meta(), Mode), emqx_acl_data => emqx_metric_data(acl_metric_meta(), Mode), emqx_authn_data => emqx_metric_data(authn_metric_meta(), Mode), - ds_data => emqx_ds_builtin_metrics:prometheus_collect(Mode), mria_data => mria_data(Mode) }}. diff --git a/apps/emqx_prometheus/test/emqx_prometheus_SUITE.erl b/apps/emqx_prometheus/test/emqx_prometheus_SUITE.erl index 841610985..a2c2b96c2 100644 --- a/apps/emqx_prometheus/test/emqx_prometheus_SUITE.erl +++ b/apps/emqx_prometheus/test/emqx_prometheus_SUITE.erl @@ -89,9 +89,7 @@ init_per_group(new_config, Config) -> Apps = emqx_cth_suite:start( [ %% coverage olp metrics - {emqx, - "overload_protection.enable = true\n" - "session_persistence.enable = true"}, + {emqx, "overload_protection.enable = true"}, {emqx_license, "license.key = default"}, {emqx_prometheus, #{config => config(default)}} ], From 94ca7ad0f86c6f924351f3852cf35bfb66b8bb02 Mon Sep 17 00:00:00 2001 From: ieQu1 <99872536+ieQu1@users.noreply.github.com> Date: Tue, 2 Apr 2024 01:52:34 +0200 Subject: [PATCH 052/234] feat(ds): Report counters for LTS storage layout --- apps/emqx_durable_storage/include/emqx_ds.hrl | 4 +- .../include/emqx_ds_metrics.hrl | 49 ++++++++++++++++ .../src/emqx_ds_builtin_metrics.erl | 55 ++++++++++++------ .../src/emqx_ds_storage_bitfield_lts.erl | 57 ++++++++++++------- .../test/emqx_ds_SUITE.erl | 2 +- .../emqx_ds_storage_bitfield_lts_SUITE.erl | 22 ++++--- apps/emqx_prometheus/src/emqx_prometheus.erl | 20 ++++--- 7 files changed, 150 insertions(+), 59 deletions(-) create mode 100644 apps/emqx_durable_storage/include/emqx_ds_metrics.hrl diff --git a/apps/emqx_durable_storage/include/emqx_ds.hrl b/apps/emqx_durable_storage/include/emqx_ds.hrl index f24605175..cc7a7431f 100644 --- a/apps/emqx_durable_storage/include/emqx_ds.hrl +++ b/apps/emqx_durable_storage/include/emqx_ds.hrl @@ -13,7 +13,7 @@ %% See the License for the specific language governing permissions and %% limitations under the License. %%-------------------------------------------------------------------- --ifndef(EMQX_DS_HRL_HRL). --define(EMQX_DS_HRL_HRL, true). +-ifndef(EMQX_DS_HRL). +-define(EMQX_DS_HRL, true). -endif. diff --git a/apps/emqx_durable_storage/include/emqx_ds_metrics.hrl b/apps/emqx_durable_storage/include/emqx_ds_metrics.hrl new file mode 100644 index 000000000..0a82a6682 --- /dev/null +++ b/apps/emqx_durable_storage/include/emqx_ds_metrics.hrl @@ -0,0 +1,49 @@ +%%-------------------------------------------------------------------- +%% Copyright (c) 2023-2024 EMQ Technologies Co., Ltd. All Rights Reserved. +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. +%%-------------------------------------------------------------------- +-ifndef(EMQX_DS_METRICS_HRL). +-define(EMQX_DS_METRICS_HRL, true). + +%%%% Egress metrics: + +%% Number of successfully flushed batches: +-define(DS_EGRESS_BATCHES, emqx_ds_egress_batches). +%% Number of batch flush retries: +-define(DS_EGRESS_BATCHES_RETRY, emqx_ds_egress_batches_retry). +%% Number of batches that weren't flushed due to unrecoverable errors: +-define(DS_EGRESS_BATCHES_FAILED, emqx_ds_egress_batches_failed). +%% Total number of messages that were successfully committed to the storage: +-define(DS_EGRESS_MESSAGES, emqx_ds_egress_messages). +%% Total size of payloads that were successfully committed to the storage: +-define(DS_EGRESS_BYTES, emqx_ds_egress_bytes). +%% Sliding average of flush time (microseconds): +-define(DS_EGRESS_FLUSH_TIME, emqx_ds_egress_flush_time). + +%%%% Storage layer metrics: +-define(DS_STORE_BATCH_TIME, emqx_ds_store_batch_time). +-define(DS_BUILTIN_NEXT_TIME, emqx_ds_builtin_next_time). + +%%% LTS Storage counters: + +%% This counter is incremented when the iterator seeks to the next interval: +-define(DS_LTS_SEEK_COUNTER, emqx_ds_storage_bitfield_lts_counter_seek). +%% This counter is incremented when the iterator proceeds to the next +%% key within the interval (this is is best case scenario): +-define(DS_LTS_NEXT_COUNTER, emqx_ds_storage_bitfield_lts_counter_next). +%% This counter is incremented when the key passes bitmask check, but +%% the value is rejected by the subsequent post-processing: +-define(DS_LTS_COLLISION_COUNTER, emqx_ds_storage_bitfield_lts_counter_collision). + +-endif. diff --git a/apps/emqx_durable_storage/src/emqx_ds_builtin_metrics.erl b/apps/emqx_durable_storage/src/emqx_ds_builtin_metrics.erl index 833e39211..ce984db57 100644 --- a/apps/emqx_durable_storage/src/emqx_ds_builtin_metrics.erl +++ b/apps/emqx_durable_storage/src/emqx_ds_builtin_metrics.erl @@ -32,7 +32,11 @@ observe_store_batch_time/2, - observe_next_time/2 + observe_next_time/2, + + inc_lts_seek_counter/2, + inc_lts_next_counter/2, + inc_lts_collision_counter/2 ]). %% behavior callbacks: @@ -43,6 +47,8 @@ -export_type([shard_metrics_id/0]). +-include("emqx_ds_metrics.hrl"). + %%================================================================================ %% Type declarations %%================================================================================ @@ -50,22 +56,25 @@ -define(WORKER, ?MODULE). -define(STORAGE_LAYER_METRICS, [ - {slide, 'emqx_ds_store_batch_time'} + {slide, ?DS_STORE_BATCH_TIME}, + {counter, ?DS_LTS_SEEK_COUNTER}, + {counter, ?DS_LTS_NEXT_COUNTER}, + {counter, ?DS_LTS_COLLISION_COUNTER} ]). -define(FETCH_METRICS, [ - {slide, 'emqx_ds_builtin_next_time'} + {slide, ?DS_BUILTIN_NEXT_TIME} ]). -define(DB_METRICS, ?STORAGE_LAYER_METRICS ++ ?FETCH_METRICS). -define(EGRESS_METRICS, [ - {counter, 'emqx_ds_egress_batches'}, - {counter, 'emqx_ds_egress_batches_retry'}, - {counter, 'emqx_ds_egress_batches_failed'}, - {counter, 'emqx_ds_egress_messages'}, - {counter, 'emqx_ds_egress_bytes'}, - {slide, 'emqx_ds_egress_flush_time'} + {counter, ?DS_EGRESS_BATCHES}, + {counter, ?DS_EGRESS_BATCHES_RETRY}, + {counter, ?DS_EGRESS_BATCHES_FAILED}, + {counter, ?DS_EGRESS_MESSAGES}, + {counter, ?DS_EGRESS_BYTES}, + {slide, ?DS_EGRESS_FLUSH_TIME} ]). -define(SHARD_METRICS, ?EGRESS_METRICS). @@ -99,45 +108,57 @@ init_for_shard(ShardId) -> %% @doc Increase the number of successfully flushed batches -spec inc_egress_batches(shard_metrics_id()) -> ok. inc_egress_batches(Id) -> - catch emqx_metrics_worker:inc(?WORKER, Id, 'emqx_ds_egress_batches'). + catch emqx_metrics_worker:inc(?WORKER, Id, ?DS_EGRESS_BATCHES). %% @doc Increase the number of time the egress worker had to retry %% flushing the batch -spec inc_egress_batches_retry(shard_metrics_id()) -> ok. inc_egress_batches_retry(Id) -> - catch emqx_metrics_worker:inc(?WORKER, Id, 'emqx_ds_egress_batches_retry'). + catch emqx_metrics_worker:inc(?WORKER, Id, ?DS_EGRESS_BATCHES_RETRY). %% @doc Increase the number of time the egress worker encountered an %% unrecoverable error while trying to flush the batch -spec inc_egress_batches_failed(shard_metrics_id()) -> ok. inc_egress_batches_failed(Id) -> - catch emqx_metrics_worker:inc(?WORKER, Id, 'emqx_ds_egress_batches_failed'). + catch emqx_metrics_worker:inc(?WORKER, Id, ?DS_EGRESS_BATCHES_FAILED). %% @doc Increase the number of messages successfully saved to the shard -spec inc_egress_messages(shard_metrics_id(), non_neg_integer()) -> ok. inc_egress_messages(Id, NMessages) -> - catch emqx_metrics_worker:inc(?WORKER, Id, 'emqx_ds_egress_messages', NMessages). + catch emqx_metrics_worker:inc(?WORKER, Id, ?DS_EGRESS_MESSAGES, NMessages). %% @doc Increase the number of messages successfully saved to the shard -spec inc_egress_bytes(shard_metrics_id(), non_neg_integer()) -> ok. inc_egress_bytes(Id, NMessages) -> - catch emqx_metrics_worker:inc(?WORKER, Id, 'emqx_ds_egress_bytes', NMessages). + catch emqx_metrics_worker:inc(?WORKER, Id, ?DS_EGRESS_BYTES, NMessages). %% @doc Add a sample of elapsed time spent flushing the egress to the %% Raft log (in microseconds) -spec observe_egress_flush_time(shard_metrics_id(), non_neg_integer()) -> ok. observe_egress_flush_time(Id, FlushTime) -> - catch emqx_metrics_worker:observe(?WORKER, Id, 'emqx_ds_egress_flush_time', FlushTime). + catch emqx_metrics_worker:observe(?WORKER, Id, ?DS_EGRESS_FLUSH_TIME, FlushTime). -spec observe_store_batch_time(emqx_ds_storage_layer:shard_id(), non_neg_integer()) -> ok. observe_store_batch_time({DB, _}, StoreTime) -> - catch emqx_metrics_worker:observe(?WORKER, DB, 'emqx_ds_store_batch_time', StoreTime). + catch emqx_metrics_worker:observe(?WORKER, DB, ?DS_STORE_BATCH_TIME, StoreTime). %% @doc Add a sample of elapsed time spent waiting for a batch %% `emqx_ds_replication_layer:next' -spec observe_next_time(emqx_ds:db(), non_neg_integer()) -> ok. observe_next_time(DB, NextTime) -> - catch emqx_metrics_worker:observe(?WORKER, DB, 'emqx_ds_builtin_next_time', NextTime). + catch emqx_metrics_worker:observe(?WORKER, DB, ?DS_BUILTIN_NEXT_TIME, NextTime). + +-spec inc_lts_seek_counter(emqx_ds_storage_layer:shard_id(), non_neg_integer()) -> ok. +inc_lts_seek_counter({DB, _}, Inc) -> + catch emqx_metrics_worker:inc(?WORKER, DB, ?DS_LTS_SEEK_COUNTER, Inc). + +-spec inc_lts_next_counter(emqx_ds_storage_layer:shard_id(), non_neg_integer()) -> ok. +inc_lts_next_counter({DB, _}, Inc) -> + catch emqx_metrics_worker:inc(?WORKER, DB, ?DS_LTS_NEXT_COUNTER, Inc). + +-spec inc_lts_collision_counter(emqx_ds_storage_layer:shard_id(), non_neg_integer()) -> ok. +inc_lts_collision_counter({DB, _}, Inc) -> + catch emqx_metrics_worker:inc(?WORKER, DB, ?DS_LTS_COLLISION_COUNTER, Inc). prometheus_meta() -> lists:map( diff --git a/apps/emqx_durable_storage/src/emqx_ds_storage_bitfield_lts.erl b/apps/emqx_durable_storage/src/emqx_ds_storage_bitfield_lts.erl index 594854d21..2ec6674b6 100644 --- a/apps/emqx_durable_storage/src/emqx_ds_storage_bitfield_lts.erl +++ b/apps/emqx_durable_storage/src/emqx_ds_storage_bitfield_lts.erl @@ -44,6 +44,7 @@ -export_type([options/0]). +-include("emqx_ds_metrics.hrl"). -include_lib("emqx_utils/include/emqx_message.hrl"). -include_lib("snabbkaffe/include/trace.hrl"). @@ -115,8 +116,6 @@ ?last_seen_key := binary() }. --define(COUNTER, emqx_ds_storage_bitfield_lts_counter). - %% Limit on the number of wildcard levels in the learned topic trie: -define(WILDCARD_LIMIT, 10). @@ -140,6 +139,8 @@ -define(DIM_TOPIC, 1). -define(DIM_TS, 2). +-define(DS_LTS_COUNTERS, [?DS_LTS_SEEK_COUNTER, ?DS_LTS_NEXT_COUNTER, ?DS_LTS_COLLISION_COUNTER]). + -ifdef(TEST). -include_lib("eunit/include/eunit.hrl"). -endif. @@ -347,13 +348,18 @@ update_iterator( ) -> {ok, OldIter#{?last_seen_key => DSKey}}. -next(_Shard, Schema = #s{ts_offset = TSOffset}, It, BatchSize) -> +next(Shard, Schema = #s{ts_offset = TSOffset}, It, BatchSize) -> %% Compute safe cutoff time. %% It's the point in time where the last complete epoch ends, so we need to know %% the current time to compute it. + init_counters(), Now = emqx_ds:timestamp_us(), SafeCutoffTime = (Now bsr TSOffset) bsl TSOffset, - next_until(Schema, It, SafeCutoffTime, BatchSize). + try + next_until(Schema, It, SafeCutoffTime, BatchSize) + after + report_counters(Shard) + end. next_until(_Schema, It = #{?tag := ?IT, ?start_time := StartTime}, SafeCutoffTime, _BatchSize) when StartTime >= SafeCutoffTime @@ -375,20 +381,23 @@ next_until(#s{db = DB, data = CF, keymappers = Keymappers}, It, SafeCutoffTime, filter := Filter } = prepare_loop_context(DB, CF, TopicIndex, StartTime, SafeCutoffTime, Varying, Keymappers), try - put(?COUNTER, 0), next_loop(ITHandle, Keymapper, Filter, SafeCutoffTime, It, [], BatchSize) after - rocksdb:iterator_close(ITHandle), - erase(?COUNTER) + rocksdb:iterator_close(ITHandle) end. -delete_next(_Shard, Schema = #s{ts_offset = TSOffset}, It, Selector, BatchSize) -> +delete_next(Shard, Schema = #s{ts_offset = TSOffset}, It, Selector, BatchSize) -> %% Compute safe cutoff time. %% It's the point in time where the last complete epoch ends, so we need to know %% the current time to compute it. + init_counters(), Now = emqx_message:timestamp_now(), SafeCutoffTime = (Now bsr TSOffset) bsl TSOffset, - delete_next_until(Schema, It, SafeCutoffTime, Selector, BatchSize). + try + delete_next_until(Schema, It, SafeCutoffTime, Selector, BatchSize) + after + report_counters(Shard) + end. delete_next_until( _Schema, @@ -417,7 +426,6 @@ delete_next_until( DB, CF, TopicIndex, StartTime, SafeCutoffTime, Varying, Keymappers ), try - put(?COUNTER, 0), LoopContext = LoopContext0#{ db => DB, cf => CF, @@ -430,8 +438,7 @@ delete_next_until( }, delete_next_loop(LoopContext) after - rocksdb:iterator_close(ITHandle), - erase(?COUNTER) + rocksdb:iterator_close(ITHandle) end. %%================================================================================ @@ -477,7 +484,6 @@ prepare_loop_context(DB, CF, TopicIndex, StartTime, SafeCutoffTime, Varying, Key next_loop(_ITHandle, _KeyMapper, _Filter, _Cutoff, It, Acc, 0) -> {ok, It, lists:reverse(Acc)}; next_loop(ITHandle, KeyMapper, Filter, Cutoff, It0, Acc0, N0) -> - inc_counter(), #{?tag := ?IT, ?last_seen_key := Key0} = It0, case emqx_ds_bitmask_keymapper:bin_increment(Filter, Key0) of overflow -> @@ -485,6 +491,7 @@ next_loop(ITHandle, KeyMapper, Filter, Cutoff, It0, Acc0, N0) -> Key1 -> %% assert true = Key1 > Key0, + inc_counter(?DS_LTS_SEEK_COUNTER), case rocksdb:iterator_move(ITHandle, {seek, Key1}) of {ok, Key, Val} -> {N, It, Acc} = traverse_interval( @@ -510,6 +517,7 @@ traverse_interval(ITHandle, KeyMapper, Filter, Cutoff, Key, Val, It0, Acc0, N) - Acc = [{Key, Msg} | Acc0], traverse_interval(ITHandle, KeyMapper, Filter, Cutoff, It, Acc, N - 1); false -> + inc_counter(?DS_LTS_COLLISION_COUNTER), traverse_interval(ITHandle, KeyMapper, Filter, Cutoff, It, Acc0, N) end; overflow -> @@ -521,7 +529,7 @@ traverse_interval(ITHandle, KeyMapper, Filter, Cutoff, Key, Val, It0, Acc0, N) - traverse_interval(_ITHandle, _KeyMapper, _Filter, _Cutoff, It, Acc, 0) -> {0, It, Acc}; traverse_interval(ITHandle, KeyMapper, Filter, Cutoff, It, Acc, N) -> - inc_counter(), + inc_counter(?DS_LTS_NEXT_COUNTER), case rocksdb:iterator_move(ITHandle, next) of {ok, Key, Val} -> traverse_interval(ITHandle, KeyMapper, Filter, Cutoff, Key, Val, It, Acc, N); @@ -541,7 +549,7 @@ delete_next_loop(LoopContext0) -> iterated_over := AccIter0, it_handle := ITHandle } = LoopContext0, - inc_counter(), + inc_counter(?DS_LTS_SEEK_COUNTER), #{?tag := ?DELETE_IT, ?last_seen_key := Key0} = It0, case emqx_ds_bitmask_keymapper:bin_increment(Filter, Key0) of overflow -> @@ -623,7 +631,7 @@ delete_traverse_interval1(LoopContext0) -> iterated_over := AccIter, storage_iter := It } = LoopContext0, - inc_counter(), + inc_counter(?DS_LTS_NEXT_COUNTER), case rocksdb:iterator_move(ITHandle, next) of {ok, Key, Val} -> delete_traverse_interval(LoopContext0#{ @@ -767,9 +775,20 @@ read_persisted_trie(IT, {ok, KeyB, ValB}) -> read_persisted_trie(_IT, {error, invalid_iterator}) -> []. -inc_counter() -> - N = get(?COUNTER), - put(?COUNTER, N + 1). +inc_counter(Counter) -> + N = get(Counter), + put(Counter, N + 1). + +init_counters() -> + _ = [put(I, 0) || I <- ?DS_LTS_COUNTERS], + ok. + +report_counters(Shard) -> + emqx_ds_builtin_metrics:inc_lts_seek_counter(Shard, get(?DS_LTS_SEEK_COUNTER)), + emqx_ds_builtin_metrics:inc_lts_next_counter(Shard, get(?DS_LTS_NEXT_COUNTER)), + emqx_ds_builtin_metrics:inc_lts_collision_counter(Shard, get(?DS_LTS_COLLISION_COUNTER)), + _ = [erase(I) || I <- ?DS_LTS_COUNTERS], + ok. %% @doc Generate a column family ID for the MQTT messages -spec data_cf(emqx_ds_storage_layer:gen_id()) -> [char()]. diff --git a/apps/emqx_durable_storage/test/emqx_ds_SUITE.erl b/apps/emqx_durable_storage/test/emqx_ds_SUITE.erl index 33988a974..727f424b8 100644 --- a/apps/emqx_durable_storage/test/emqx_ds_SUITE.erl +++ b/apps/emqx_durable_storage/test/emqx_ds_SUITE.erl @@ -677,7 +677,7 @@ t_error_mapping_replication_layer(_Config) -> ), meck:unload(). -%% This test suite verifies the behavior of `store_batch' operation +%% This testcase verifies the behavior of `store_batch' operation %% when the underlying code experiences recoverable or unrecoverable %% problems. t_store_batch_fail(_Config) -> diff --git a/apps/emqx_durable_storage/test/emqx_ds_storage_bitfield_lts_SUITE.erl b/apps/emqx_durable_storage/test/emqx_ds_storage_bitfield_lts_SUITE.erl index 636b57b89..78838e675 100644 --- a/apps/emqx_durable_storage/test/emqx_ds_storage_bitfield_lts_SUITE.erl +++ b/apps/emqx_durable_storage/test/emqx_ds_storage_bitfield_lts_SUITE.erl @@ -261,8 +261,7 @@ t_atomic_store_batch(_Config) -> sync => true }) ), - - ok + timer:sleep(1000) end, fun(Trace) -> %% Must contain exactly one flush with all messages. @@ -293,19 +292,18 @@ t_non_atomic_store_batch(_Config) -> sync => true }) ), - - ok + Msgs end, - fun(Trace) -> - %% Should contain one flush per message. - ?assertMatch( - [#{batch := [_]}, #{batch := [_]}, #{batch := [_]}], - ?of_kind(emqx_ds_replication_layer_egress_flush, Trace) + fun(ExpectedMsgs, Trace) -> + ProcessedMsgs = lists:append( + ?projection(batch, ?of_kind(emqx_ds_replication_layer_egress_flush, Trace)) ), - ok + ?assertEqual( + ExpectedMsgs, + ProcessedMsgs + ) end - ), - ok. + ). check(Shard, TopicFilter, StartTime, ExpectedMessages) -> ExpectedFiltered = lists:filter( diff --git a/apps/emqx_prometheus/src/emqx_prometheus.erl b/apps/emqx_prometheus/src/emqx_prometheus.erl index 2327a7263..450033f18 100644 --- a/apps/emqx_prometheus/src/emqx_prometheus.erl +++ b/apps/emqx_prometheus/src/emqx_prometheus.erl @@ -37,6 +37,7 @@ -include_lib("public_key/include/public_key.hrl"). -include_lib("prometheus/include/prometheus_model.hrl"). -include_lib("emqx/include/logger.hrl"). +-include_lib("emqx_durable_storage/include/emqx_ds_metrics.hrl"). -import( prometheus_model_helpers, @@ -501,14 +502,17 @@ emqx_collect(K = emqx_mria_bootstrap_num_keys, D) -> gauge_metrics(?MG(K, D, []) emqx_collect(K = emqx_mria_message_queue_len, D) -> gauge_metrics(?MG(K, D, [])); emqx_collect(K = emqx_mria_replayq_len, D) -> gauge_metrics(?MG(K, D, [])); %% DS -emqx_collect(K = emqx_ds_egress_batches, D) -> counter_metrics(?MG(K, D, [])); -emqx_collect(K = emqx_ds_egress_batches_retry, D) -> counter_metrics(?MG(K, D, [])); -emqx_collect(K = emqx_ds_egress_batches_failed, D) -> counter_metrics(?MG(K, D, [])); -emqx_collect(K = emqx_ds_egress_messages, D) -> counter_metrics(?MG(K, D, [])); -emqx_collect(K = emqx_ds_egress_bytes, D) -> counter_metrics(?MG(K, D, [])); -emqx_collect(K = emqx_ds_egress_flush_time, D) -> gauge_metrics(?MG(K, D, [])); -emqx_collect(K = emqx_ds_store_batch_time, D) -> gauge_metrics(?MG(K, D, [])); -emqx_collect(K = emqx_ds_builtin_next_time, D) -> gauge_metrics(?MG(K, D, [])). +emqx_collect(K = ?DS_EGRESS_BATCHES, D) -> counter_metrics(?MG(K, D, [])); +emqx_collect(K = ?DS_EGRESS_BATCHES_RETRY, D) -> counter_metrics(?MG(K, D, [])); +emqx_collect(K = ?DS_EGRESS_BATCHES_FAILED, D) -> counter_metrics(?MG(K, D, [])); +emqx_collect(K = ?DS_EGRESS_MESSAGES, D) -> counter_metrics(?MG(K, D, [])); +emqx_collect(K = ?DS_EGRESS_BYTES, D) -> counter_metrics(?MG(K, D, [])); +emqx_collect(K = ?DS_EGRESS_FLUSH_TIME, D) -> gauge_metrics(?MG(K, D, [])); +emqx_collect(K = ?DS_STORE_BATCH_TIME, D) -> gauge_metrics(?MG(K, D, [])); +emqx_collect(K = ?DS_BUILTIN_NEXT_TIME, D) -> gauge_metrics(?MG(K, D, [])); +emqx_collect(K = ?DS_LTS_SEEK_COUNTER, D) -> counter_metrics(?MG(K, D, [])); +emqx_collect(K = ?DS_LTS_NEXT_COUNTER, D) -> counter_metrics(?MG(K, D, [])); +emqx_collect(K = ?DS_LTS_COLLISION_COUNTER, D) -> counter_metrics(?MG(K, D, [])). %%-------------------------------------------------------------------- %% Indicators From 4382971443548ed5f92bfb77eb644bf2805eb25c Mon Sep 17 00:00:00 2001 From: ieQu1 <99872536+ieQu1@users.noreply.github.com> Date: Tue, 2 Apr 2024 10:45:13 +0200 Subject: [PATCH 053/234] fix(ds): Preserve errors in the egress --- .../src/emqx_ds_replication_layer_egress.erl | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/apps/emqx_durable_storage/src/emqx_ds_replication_layer_egress.erl b/apps/emqx_durable_storage/src/emqx_ds_replication_layer_egress.erl index 667e1daa4..72b0a468b 100644 --- a/apps/emqx_durable_storage/src/emqx_ds_replication_layer_egress.erl +++ b/apps/emqx_durable_storage/src/emqx_ds_replication_layer_egress.erl @@ -302,9 +302,9 @@ repackage_messages(DB, Messages, Sync, Atomic) -> #{}, Messages ), - maps:foreach( - fun(Shard, {NMsgs, ByteSize, RevMessages}) -> - gen_server:call( + maps:fold( + fun(Shard, {NMsgs, ByteSize, RevMessages}, ErrAcc) -> + Err = gen_server:call( ?via(DB, Shard), #enqueue_req{ messages = lists:reverse(RevMessages), @@ -314,11 +314,22 @@ repackage_messages(DB, Messages, Sync, Atomic) -> payload_bytes = ByteSize }, infinity - ) + ), + compose_errors(ErrAcc, Err) end, + ok, Batches ). +compose_errors(ErrAcc, ok) -> + ErrAcc; +compose_errors(ok, Err) -> + Err; +compose_errors({error, recoverable, _}, {error, unrecoverable, Err}) -> + {error, unrecoverable, Err}; +compose_errors(ErrAcc, _Err) -> + ErrAcc. + start_timer(S) -> Interval = application:get_env(emqx_durable_storage, egress_flush_interval, 100), Tref = erlang:send_after(Interval, self(), ?flush), From ae5935e7f76eca57fab15459cd95537e3584cf6c Mon Sep 17 00:00:00 2001 From: ieQu1 <99872536+ieQu1@users.noreply.github.com> Date: Tue, 2 Apr 2024 17:41:38 +0200 Subject: [PATCH 054/234] test(ds): Attempt to stabilize metrics_worker tests in CI --- .../src/emqx_ds_replication_layer_egress.erl | 1 - .../test/emqx_ds_test_helpers.erl | 9 +++--- .../test/emqx_metrics_worker_SUITE.erl | 5 ++-- changes/ce/feat-12781.en.md | 29 +++++++++++++++++++ 4 files changed, 36 insertions(+), 8 deletions(-) create mode 100644 changes/ce/feat-12781.en.md diff --git a/apps/emqx_durable_storage/src/emqx_ds_replication_layer_egress.erl b/apps/emqx_durable_storage/src/emqx_ds_replication_layer_egress.erl index 72b0a468b..bc0765f27 100644 --- a/apps/emqx_durable_storage/src/emqx_ds_replication_layer_egress.erl +++ b/apps/emqx_durable_storage/src/emqx_ds_replication_layer_egress.erl @@ -221,7 +221,6 @@ do_flush( emqx_ds_builtin_metrics:inc_egress_batches(Metrics), emqx_ds_builtin_metrics:inc_egress_messages(Metrics, S#s.n), emqx_ds_builtin_metrics:inc_egress_bytes(Metrics, S#s.n_bytes), - lists:foreach(fun(From) -> gen_server:reply(From, ok) end, Replies), ?tp( emqx_ds_replication_layer_egress_flush, #{db => DB, shard => Shard, batch => Messages} diff --git a/apps/emqx_durable_storage/test/emqx_ds_test_helpers.erl b/apps/emqx_durable_storage/test/emqx_ds_test_helpers.erl index 4af1e9791..be4f7bcdf 100644 --- a/apps/emqx_durable_storage/test/emqx_ds_test_helpers.erl +++ b/apps/emqx_durable_storage/test/emqx_ds_test_helpers.erl @@ -64,10 +64,11 @@ consume(DB, TopicFilter) -> consume(DB, TopicFilter, StartTime) -> lists:flatmap( - fun({_Stream, Msgs}) -> - Msgs - end, - consume_per_stream(DB, TopicFilter, StartTime)). + fun({_Stream, Msgs}) -> + Msgs + end, + consume_per_stream(DB, TopicFilter, StartTime) + ). consume_per_stream(DB, TopicFilter, StartTime) -> Streams = emqx_ds:get_streams(DB, TopicFilter, StartTime), diff --git a/apps/emqx_utils/test/emqx_metrics_worker_SUITE.erl b/apps/emqx_utils/test/emqx_metrics_worker_SUITE.erl index 387e069cf..15866feb0 100644 --- a/apps/emqx_utils/test/emqx_metrics_worker_SUITE.erl +++ b/apps/emqx_utils/test/emqx_metrics_worker_SUITE.erl @@ -31,18 +31,17 @@ suite() -> -define(NAME, ?MODULE). init_per_suite(Config) -> - {ok, _} = emqx_metrics_worker:start_link(?NAME), Config. end_per_suite(_Config) -> - ok = emqx_metrics_worker:stop(?NAME). + ok. init_per_testcase(_, Config) -> - ok = emqx_metrics_worker:stop(?NAME), {ok, _} = emqx_metrics_worker:start_link(?NAME), Config. end_per_testcase(_, _Config) -> + ok = emqx_metrics_worker:stop(?NAME), ok. t_get_metrics(_) -> diff --git a/changes/ce/feat-12781.en.md b/changes/ce/feat-12781.en.md new file mode 100644 index 000000000..c884ccbc4 --- /dev/null +++ b/changes/ce/feat-12781.en.md @@ -0,0 +1,29 @@ +Added metrics related to EMQX durable storage to Prometheus. + +New metrics: + +- `emqx_ds_egress_batches` + +- `emqx_ds_egress_batches_retry` + +- `emqx_ds_egress_batches_failed` + +- `emqx_ds_egress_messages` + +- `emqx_ds_egress_bytes` + +- `emqx_ds_egress_flush_time` + +- `emqx_ds_store_batch_time` + +- `emqx_ds_builtin_next_time` + +- `emqx_ds_storage_bitfield_lts_counter_seek` + +- `emqx_ds_storage_bitfield_lts_counter_next` + +- `emqx_ds_storage_bitfield_lts_counter_collision` + +Note: these metrics are only visible when session persistence is enabled. + +Number of persisted messages has been also added to the dashboard. From 92ca90c0ca54c4c4f42d36d1e1a6bbdffaf32641 Mon Sep 17 00:00:00 2001 From: ieQu1 <99872536+ieQu1@users.noreply.github.com> Date: Tue, 2 Apr 2024 22:29:10 +0200 Subject: [PATCH 055/234] fix(ds): Improve egress logging --- .../src/emqx_ds_replication_layer_egress.erl | 23 ++++++++++++++----- 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/apps/emqx_durable_storage/src/emqx_ds_replication_layer_egress.erl b/apps/emqx_durable_storage/src/emqx_ds_replication_layer_egress.erl index bc0765f27..eb4b1fc70 100644 --- a/apps/emqx_durable_storage/src/emqx_ds_replication_layer_egress.erl +++ b/apps/emqx_durable_storage/src/emqx_ds_replication_layer_egress.erl @@ -114,6 +114,7 @@ store_batch(DB, Messages, Opts) -> init([DB, Shard]) -> process_flag(trap_exit, true), process_flag(message_queue_data, off_heap), + logger:update_process_metadata(#{domain => [emqx, ds, egress, DB]}), MetricsId = emqx_ds_builtin_metrics:shard_metric_id(DB, Shard), ok = emqx_ds_builtin_metrics:init_for_shard(MetricsId), S = #s{ @@ -234,19 +235,29 @@ do_flush( pending_replies = [] }; {error, recoverable, Reason} -> + %% Note: this is a hot loop, so we report error messages + %% with `debug' level to avoid wiping the logs. Instead, + %% error the detection must rely on the metrics. Debug + %% logging can be enabled for the particular egress server + %% via logger domain. + ?tp( + debug, + emqx_ds_replication_layer_egress_flush_failed, + #{db => DB, shard => Shard, reason => Reason, recoverable => true} + ), %% Retry sending the batch: emqx_ds_builtin_metrics:inc_egress_batches_retry(Metrics), erlang:garbage_collect(), %% We block the gen_server until the next retry. BlockTime = ?COOLDOWN_MIN + rand:uniform(?COOLDOWN_MAX - ?COOLDOWN_MIN), timer:sleep(BlockTime), - ?tp( - warning, - emqx_ds_replication_layer_egress_flush_failed, - #{db => DB, shard => Shard, reason => Reason} - ), S; - Err = {error, unrecoverable, _} -> + Err = {error, unrecoverable, Reason} -> + ?tp( + debug, + emqx_ds_replication_layer_egress_flush_failed, + #{db => DB, shard => Shard, reason => Reason, recoverable => false} + ), emqx_ds_builtin_metrics:inc_egress_batches_failed(Metrics), lists:foreach(fun(From) -> gen_server:reply(From, Err) end, Replies), erlang:garbage_collect(), From 2bbfada7af8a2fa6b2253adb1bdaf7c182b8f309 Mon Sep 17 00:00:00 2001 From: ieQu1 <99872536+ieQu1@users.noreply.github.com> Date: Tue, 2 Apr 2024 23:02:47 +0200 Subject: [PATCH 056/234] fix(ds): Make async batches truly async --- .../src/emqx_ds_replication_layer_egress.erl | 58 +++++++++++-------- 1 file changed, 34 insertions(+), 24 deletions(-) diff --git a/apps/emqx_durable_storage/src/emqx_ds_replication_layer_egress.erl b/apps/emqx_durable_storage/src/emqx_ds_replication_layer_egress.erl index eb4b1fc70..f328c7a99 100644 --- a/apps/emqx_durable_storage/src/emqx_ds_replication_layer_egress.erl +++ b/apps/emqx_durable_storage/src/emqx_ds_replication_layer_egress.erl @@ -76,7 +76,7 @@ store_batch(DB, Messages, Opts) -> case shards_of_batch(DB, Messages) of [{Shard, {NMsgs, NBytes}}] -> %% Happy case: - gen_server:call( + enqueue_call_or_cast( ?via(DB, Shard), #enqueue_req{ messages = Messages, @@ -84,8 +84,7 @@ store_batch(DB, Messages, Opts) -> atomic = Atomic, n_messages = NMsgs, payload_bytes = NBytes - }, - infinity + } ); [_, _ | _] when Atomic -> %% It's impossible to commit a batch to multiple shards @@ -93,7 +92,7 @@ store_batch(DB, Messages, Opts) -> {error, unrecoverable, atomic_commit_to_multiple_shards}; _Shards -> %% Use a slower implementation for the unlikely case: - repackage_messages(DB, Messages, Sync, Atomic) + repackage_messages(DB, Messages, Sync) end. %%================================================================================ @@ -127,15 +126,31 @@ init([DB, Shard]) -> handle_call( #enqueue_req{ - messages = Msgs, sync = Sync, atomic = Atomic, n_messages = NMsgs, payload_bytes = NBytes + messages = Msgs, + sync = Sync, + atomic = Atomic, + n_messages = NMsgs, + payload_bytes = NBytes }, From, - S + S0 = #s{pending_replies = Replies0} ) -> - {noreply, enqueue(From, Sync, Atomic, Msgs, NMsgs, NBytes, S)}; + S = S0#s{pending_replies = [From | Replies0]}, + {noreply, enqueue(Sync, Atomic, Msgs, NMsgs, NBytes, S)}; handle_call(_Call, _From, S) -> {reply, {error, unknown_call}, S}. +handle_cast( + #enqueue_req{ + messages = Msgs, + sync = Sync, + atomic = Atomic, + n_messages = NMsgs, + payload_bytes = NBytes + }, + S +) -> + {noreply, enqueue(Sync, Atomic, Msgs, NMsgs, NBytes, S)}; handle_cast(_Cast, S) -> {noreply, S}. @@ -156,13 +171,12 @@ terminate(_Reason, _S) -> %%================================================================================ enqueue( - From, Sync, Atomic, Msgs, BatchSize, BatchBytes, - S0 = #s{n = NMsgs0, n_bytes = NBytes0, queue = Q0, pending_replies = Replies0} + S0 = #s{n = NMsgs0, n_bytes = NBytes0, queue = Q0} ) -> %% At this point we don't split the batches, even when they aren't %% atomic. It wouldn't win us anything in terms of memory, and @@ -178,20 +192,12 @@ enqueue( %% it now, and retry: cancel_timer(S0), S1 = flush(S0), - enqueue(From, Sync, Atomic, Msgs, BatchSize, BatchBytes, S1); + enqueue(Sync, Atomic, Msgs, BatchSize, BatchBytes, S1); false -> %% The buffer is empty, we enqueue the atomic batch in its %% entirety: Q1 = lists:foldl(fun queue:in/2, Q0, Msgs), - Replies = - case Sync of - true -> - [From | Replies0]; - false -> - gen_server:reply(From, ok), - Replies0 - end, - S1 = S0#s{n = NMsgs, n_bytes = NBytes, queue = Q1, pending_replies = Replies}, + S1 = S0#s{n = NMsgs, n_bytes = NBytes, queue = Q1}, case NMsgs >= NMax orelse NBytes >= NBytes of true -> cancel_timer(S1), @@ -295,7 +301,7 @@ shards_of_batch(DB, Messages) -> ) ). -repackage_messages(DB, Messages, Sync, Atomic) -> +repackage_messages(DB, Messages, Sync) -> Batches = lists:foldl( fun(Message, Acc) -> Shard = emqx_ds_replication_layer:shard_of_message(DB, Message, clientid), @@ -314,16 +320,15 @@ repackage_messages(DB, Messages, Sync, Atomic) -> ), maps:fold( fun(Shard, {NMsgs, ByteSize, RevMessages}, ErrAcc) -> - Err = gen_server:call( + Err = enqueue_call_or_cast( ?via(DB, Shard), #enqueue_req{ messages = lists:reverse(RevMessages), sync = Sync, - atomic = Atomic, + atomic = false, n_messages = NMsgs, payload_bytes = ByteSize - }, - infinity + } ), compose_errors(ErrAcc, Err) end, @@ -331,6 +336,11 @@ repackage_messages(DB, Messages, Sync, Atomic) -> Batches ). +enqueue_call_or_cast(To, Req = #enqueue_req{sync = true}) -> + gen_server:call(To, Req, infinity); +enqueue_call_or_cast(To, Req = #enqueue_req{sync = false}) -> + gen_server:cast(To, Req). + compose_errors(ErrAcc, ok) -> ErrAcc; compose_errors(ok, Err) -> From 319ec50c0d394f87b97dce0e6a5e1f609da4dc71 Mon Sep 17 00:00:00 2001 From: Shawn <506895667@qq.com> Date: Wed, 3 Apr 2024 11:57:01 +0800 Subject: [PATCH 057/234] fix: source bridges missing after restore the backup files --- apps/emqx/src/bhvrs/emqx_config_backup.erl | 15 ++++-- apps/emqx/src/emqx.app.src | 2 +- apps/emqx_bridge/src/emqx_bridge.app.src | 2 +- apps/emqx_bridge/src/emqx_bridge_v2.erl | 21 ++++++++- .../src/emqx_mgmt_data_backup.erl | 47 +++++++++++++------ changes/ce/fix-12826.en.md | 18 +++++++ 6 files changed, 83 insertions(+), 22 deletions(-) create mode 100644 changes/ce/fix-12826.en.md diff --git a/apps/emqx/src/bhvrs/emqx_config_backup.erl b/apps/emqx/src/bhvrs/emqx_config_backup.erl index e4818a871..1ec08c23b 100644 --- a/apps/emqx/src/bhvrs/emqx_config_backup.erl +++ b/apps/emqx/src/bhvrs/emqx_config_backup.erl @@ -16,9 +16,14 @@ -module(emqx_config_backup). +-type ok_result() :: #{ + root_key => emqx_utils_maps:config_key(), + changed => [emqx_utils_maps:config_key_path()] +}. + +-type error_result() :: #{root_key => emqx_utils_maps:config_key(), reason => term()}. + -callback import_config(RawConf :: map()) -> - {ok, #{ - root_key => emqx_utils_maps:config_key(), - changed => [emqx_utils_maps:config_key_path()] - }} - | {error, #{root_key => emqx_utils_maps:config_key(), reason => term()}}. + {ok, ok_result()} + | {error, error_result()} + | {results, {[ok_result()], [error_result()]}}. diff --git a/apps/emqx/src/emqx.app.src b/apps/emqx/src/emqx.app.src index 1d8c55fe9..462b7e74b 100644 --- a/apps/emqx/src/emqx.app.src +++ b/apps/emqx/src/emqx.app.src @@ -2,7 +2,7 @@ {application, emqx, [ {id, "emqx"}, {description, "EMQX Core"}, - {vsn, "5.2.0"}, + {vsn, "5.2.1"}, {modules, []}, {registered, []}, {applications, [ diff --git a/apps/emqx_bridge/src/emqx_bridge.app.src b/apps/emqx_bridge/src/emqx_bridge.app.src index 9ef567f23..57dbc26ba 100644 --- a/apps/emqx_bridge/src/emqx_bridge.app.src +++ b/apps/emqx_bridge/src/emqx_bridge.app.src @@ -1,7 +1,7 @@ %% -*- mode: erlang -*- {application, emqx_bridge, [ {description, "EMQX bridges"}, - {vsn, "0.1.34"}, + {vsn, "0.1.35"}, {registered, [emqx_bridge_sup]}, {mod, {emqx_bridge_app, []}}, {applications, [ diff --git a/apps/emqx_bridge/src/emqx_bridge_v2.erl b/apps/emqx_bridge/src/emqx_bridge_v2.erl index e834dc42e..10d597d36 100644 --- a/apps/emqx_bridge/src/emqx_bridge_v2.erl +++ b/apps/emqx_bridge/src/emqx_bridge_v2.erl @@ -1030,7 +1030,26 @@ bridge_v2_type_to_connector_type(Type) -> import_config(RawConf) -> %% actions structure - emqx_bridge:import_config(RawConf, <<"actions">>, ?ROOT_KEY_ACTIONS, config_key_path()). + ActionRes = emqx_bridge:import_config( + RawConf, <<"actions">>, ?ROOT_KEY_ACTIONS, config_key_path() + ), + SourceRes = emqx_bridge:import_config( + RawConf, <<"sources">>, ?ROOT_KEY_SOURCES, config_key_path_sources() + ), + combine_import_results([ActionRes, SourceRes]). + +combine_import_results(Results0) -> + Results = lists:foldr( + fun + ({ok, OkRes}, {OkAcc, ErrAcc}) -> + {[OkRes | OkAcc], ErrAcc}; + ({error, ErrRes}, {OkAcc, ErrAcc}) -> + {OkAcc, [ErrRes | ErrAcc]} + end, + {[], []}, + Results0 + ), + {results, Results}. %%==================================================================== %% Config Update Handler API diff --git a/apps/emqx_management/src/emqx_mgmt_data_backup.erl b/apps/emqx_management/src/emqx_mgmt_data_backup.erl index 2aaa014a8..03eb7ac06 100644 --- a/apps/emqx_management/src/emqx_mgmt_data_backup.erl +++ b/apps/emqx_management/src/emqx_mgmt_data_backup.erl @@ -773,23 +773,42 @@ validate_cluster_hocon(RawConf) -> do_import_conf(RawConf, Opts) -> GenConfErrs = filter_errors(maps:from_list(import_generic_conf(RawConf))), maybe_print_conf_errors(GenConfErrs, Opts), - Errors = - lists:foldl( - fun(Module, ErrorsAcc) -> - case Module:import_config(RawConf) of - {ok, #{changed := Changed}} -> - maybe_print_changed(Changed, Opts), - ErrorsAcc; - {error, #{root_key := RootKey, reason := Reason}} -> - ErrorsAcc#{[RootKey] => Reason} - end - end, - GenConfErrs, - sort_importer_modules(find_behaviours(emqx_config_backup)) - ), + Modules = sort_importer_modules(find_behaviours(emqx_config_backup)), + Errors = lists:foldl(print_ok_results_collect_errors(RawConf, Opts), GenConfErrs, Modules), maybe_print_conf_errors(Errors, Opts), Errors. +print_ok_results_collect_errors(RawConf, Opts) -> + fun(Module, Errors) -> + case Module:import_config(RawConf) of + {results, {OkResults, ErrResults}} -> + print_ok_results(OkResults, Opts), + collect_errors(ErrResults, Errors); + {ok, OkResult} -> + print_ok_results([OkResult], Opts), + Errors; + {error, ErrResult} -> + collect_errors([ErrResult], Errors) + end + end. + +print_ok_results(Results, Opts) -> + lists:foreach( + fun(#{changed := Changed}) -> + maybe_print_changed(Changed, Opts) + end, + Results + ). + +collect_errors(Results, Errors) -> + lists:foldr( + fun(#{root_key := RootKey, reason := Reason}, Acc) -> + Acc#{[RootKey] => Reason} + end, + Errors, + Results + ). + sort_importer_modules(Modules) -> lists:sort( fun(M1, M2) -> order(M1, ?IMPORT_ORDER) =< order(M2, ?IMPORT_ORDER) end, diff --git a/changes/ce/fix-12826.en.md b/changes/ce/fix-12826.en.md new file mode 100644 index 000000000..51255059d --- /dev/null +++ b/changes/ce/fix-12826.en.md @@ -0,0 +1,18 @@ +Cannot import `sources` from backup files. + +Before the fix, the following configs in backup files cannot be imported: + +``` +sources { + mqtt { + source_c384b174 { + connector = source_connector_c8287217 + enable = true + parameters { + qos = 0 + topic = "t/#" + } + } + } +} +``` From f37ed3a40a93896d3be2e1e58b8e073b214ec7b9 Mon Sep 17 00:00:00 2001 From: ieQu1 <99872536+ieQu1@users.noreply.github.com> Date: Wed, 3 Apr 2024 13:57:16 +0200 Subject: [PATCH 058/234] fix(ds): Limit the number of retries in egress to 0 --- .../src/emqx_ds_replication_layer.erl | 2 + .../src/emqx_ds_replication_layer_egress.erl | 40 ++++++++++++++----- .../test/emqx_ds_SUITE.erl | 35 ++++++++-------- 3 files changed, 49 insertions(+), 28 deletions(-) diff --git a/apps/emqx_durable_storage/src/emqx_ds_replication_layer.erl b/apps/emqx_durable_storage/src/emqx_ds_replication_layer.erl index f8478bb72..ecc6a492e 100644 --- a/apps/emqx_durable_storage/src/emqx_ds_replication_layer.erl +++ b/apps/emqx_durable_storage/src/emqx_ds_replication_layer.erl @@ -551,6 +551,8 @@ list_nodes() -> end ). +-spec ra_store_batch(emqx_ds:db(), emqx_ds_replication_layer:shard_id(), [emqx_types:message()]) -> + ok | {timeout, _} | {error, recoverable | unrecoverable, _Err} | _Err. ra_store_batch(DB, Shard, Messages) -> Command = #{ ?tag => ?BATCH, diff --git a/apps/emqx_durable_storage/src/emqx_ds_replication_layer_egress.erl b/apps/emqx_durable_storage/src/emqx_ds_replication_layer_egress.erl index f328c7a99..4122d937d 100644 --- a/apps/emqx_durable_storage/src/emqx_ds_replication_layer_egress.erl +++ b/apps/emqx_durable_storage/src/emqx_ds_replication_layer_egress.erl @@ -103,6 +103,11 @@ store_batch(DB, Messages, Opts) -> db :: emqx_ds:db(), shard :: emqx_ds_replication_layer:shard_id(), metrics_id :: emqx_ds_builtin_metrics:shard_metrics_id(), + n_retries = 0 :: non_neg_integer(), + %% FIXME: Currently max_retries is always 0, because replication + %% layer doesn't guarantee idempotency. Retrying would create + %% duplicate messages. + max_retries = 0 :: non_neg_integer(), n = 0 :: non_neg_integer(), n_bytes = 0 :: non_neg_integer(), tref :: undefined | reference(), @@ -216,7 +221,15 @@ flush(S) -> do_flush(S0 = #s{n = 0}) -> S0; do_flush( - S = #s{queue = Q, pending_replies = Replies, db = DB, shard = Shard, metrics_id = Metrics} + S = #s{ + queue = Q, + pending_replies = Replies, + db = DB, + shard = Shard, + metrics_id = Metrics, + n_retries = Retries, + max_retries = MaxRetries + } ) -> Messages = queue:to_list(Q), T0 = erlang:monotonic_time(microsecond), @@ -240,7 +253,7 @@ do_flush( queue = queue:new(), pending_replies = [] }; - {error, recoverable, Reason} -> + {timeout, ServerId} when Retries < MaxRetries -> %% Note: this is a hot loop, so we report error messages %% with `debug' level to avoid wiping the logs. Instead, %% error the detection must rely on the metrics. Debug @@ -248,8 +261,8 @@ do_flush( %% via logger domain. ?tp( debug, - emqx_ds_replication_layer_egress_flush_failed, - #{db => DB, shard => Shard, reason => Reason, recoverable => true} + emqx_ds_replication_layer_egress_flush_retry, + #{db => DB, shard => Shard, reason => timeout, server_id => ServerId} ), %% Retry sending the batch: emqx_ds_builtin_metrics:inc_egress_batches_retry(Metrics), @@ -257,21 +270,30 @@ do_flush( %% We block the gen_server until the next retry. BlockTime = ?COOLDOWN_MIN + rand:uniform(?COOLDOWN_MAX - ?COOLDOWN_MIN), timer:sleep(BlockTime), - S; - Err = {error, unrecoverable, Reason} -> + S#s{n_retries = Retries + 1}; + Err -> ?tp( debug, emqx_ds_replication_layer_egress_flush_failed, - #{db => DB, shard => Shard, reason => Reason, recoverable => false} + #{db => DB, shard => Shard, error => Err} ), emqx_ds_builtin_metrics:inc_egress_batches_failed(Metrics), - lists:foreach(fun(From) -> gen_server:reply(From, Err) end, Replies), + Reply = + case Err of + {error, _, _} -> Err; + {timeout, ServerId} -> {error, recoverable, {timeout, ServerId}}; + _ -> {error, unrecoverable, Err} + end, + lists:foreach( + fun(From) -> gen_server:reply(From, Reply) end, Replies + ), erlang:garbage_collect(), S#s{ n = 0, n_bytes = 0, queue = queue:new(), - pending_replies = [] + pending_replies = [], + n_retries = 0 } end. diff --git a/apps/emqx_durable_storage/test/emqx_ds_SUITE.erl b/apps/emqx_durable_storage/test/emqx_ds_SUITE.erl index 727f424b8..1d2daacbb 100644 --- a/apps/emqx_durable_storage/test/emqx_ds_SUITE.erl +++ b/apps/emqx_durable_storage/test/emqx_ds_SUITE.erl @@ -684,7 +684,7 @@ t_store_batch_fail(_Config) -> ?check_trace( #{timetrap => 15_000}, try - meck:new(emqx_ds_replication_layer, [passthrough, no_history]), + meck:new(emqx_ds_storage_layer, [passthrough, no_history]), DB = ?FUNCTION_NAME, ?assertMatch(ok, emqx_ds:open_db(DB, (opts())#{n_shards => 2})), %% Success: @@ -694,7 +694,7 @@ t_store_batch_fail(_Config) -> ], ?assertMatch(ok, emqx_ds:store_batch(DB, Batch1, #{sync => true})), %% Inject unrecoverable error: - meck:expect(emqx_ds_replication_layer, ra_store_batch, fun(_DB, _Shard, _Messages) -> + meck:expect(emqx_ds_storage_layer, store_batch, fun(_DB, _Shard, _Messages) -> {error, unrecoverable, mock} end), Batch2 = [ @@ -704,35 +704,32 @@ t_store_batch_fail(_Config) -> ?assertMatch( {error, unrecoverable, mock}, emqx_ds:store_batch(DB, Batch2, #{sync => true}) ), - %% Inject a recoverable error: + meck:unload(emqx_ds_storage_layer), + %% Inject a recoveralbe error: + meck:new(ra, [passthrough, no_history]), + meck:expect(ra, process_command, fun(Servers, Shard, Command) -> + ?tp(ra_command, #{servers => Servers, shard => Shard, command => Command}), + {timeout, mock} + end), Batch3 = [ message(<<"C1">>, <<"foo/bar">>, <<"5">>, 2), message(<<"C2">>, <<"foo/bar">>, <<"6">>, 2), message(<<"C1">>, <<"foo/bar">>, <<"7">>, 3), message(<<"C2">>, <<"foo/bar">>, <<"8">>, 3) ], - meck:expect(emqx_ds_replication_layer, ra_store_batch, fun(DB, Shard, Messages) -> - try - ?tp(store_batch, #{messages => Messages}), - meck:passthrough([DB, Shard, Messages]) - catch - _:_ -> - {error, recoverable, mock} - end - end), - ?inject_crash(#{?snk_kind := store_batch}, snabbkaffe_nemesis:recover_after(3)), + %% Note: due to idempotency issues the number of retries + %% is currently set to 0: + ?assertMatch( + {error, recoverable, {timeout, mock}}, + emqx_ds:store_batch(DB, Batch3, #{sync => true}) + ), + meck:unload(ra), ?assertMatch(ok, emqx_ds:store_batch(DB, Batch3, #{sync => true})), lists:sort(emqx_ds_test_helpers:consume_per_stream(DB, ['#'], 1)) after meck:unload() end, [ - {"number of successfull flushes after retry", fun(Trace) -> - ?assertMatch([_, _], ?of_kind(store_batch, Trace)) - end}, - {"number of retries", fun(Trace) -> - ?assertMatch([_, _, _], ?of_kind(snabbkaffe_crash, Trace)) - end}, {"message ordering", fun(StoredMessages, _Trace) -> [{_, Stream1}, {_, Stream2}] = StoredMessages, ?assertMatch( From 0e79b543cf51fcc83da27e2dd0ff4fcb7e69def4 Mon Sep 17 00:00:00 2001 From: zmstone Date: Thu, 4 Apr 2024 11:10:56 +0200 Subject: [PATCH 059/234] refactor: move variform to emqx_utils --- .gitignore | 4 ++-- apps/{emqx/src/variform => emqx_utils/src}/emqx_variform.erl | 0 .../src/variform => emqx_utils/src}/emqx_variform_parser.yrl | 0 .../src/variform => emqx_utils/src}/emqx_variform_scan.xrl | 0 .../src/variform => emqx_utils/src}/emqx_variform_str.erl | 0 5 files changed, 2 insertions(+), 2 deletions(-) rename apps/{emqx/src/variform => emqx_utils/src}/emqx_variform.erl (100%) rename apps/{emqx/src/variform => emqx_utils/src}/emqx_variform_parser.yrl (100%) rename apps/{emqx/src/variform => emqx_utils/src}/emqx_variform_scan.xrl (100%) rename apps/{emqx/src/variform => emqx_utils/src}/emqx_variform_str.erl (100%) diff --git a/.gitignore b/.gitignore index a2c8b7e65..d5338d5c4 100644 --- a/.gitignore +++ b/.gitignore @@ -76,5 +76,5 @@ rebar-git-cache.tar .docker_image_tag .emqx_docker_image_tags .git/ -apps/emqx/src/emqx_variform_parser.erl -apps/emqx/src/variform/emqx_variform_scan.erl +apps/emqx_utils/src/emqx_variform_parser.erl +apps/emqx_utils/src/emqx_variform_scan.erl diff --git a/apps/emqx/src/variform/emqx_variform.erl b/apps/emqx_utils/src/emqx_variform.erl similarity index 100% rename from apps/emqx/src/variform/emqx_variform.erl rename to apps/emqx_utils/src/emqx_variform.erl diff --git a/apps/emqx/src/variform/emqx_variform_parser.yrl b/apps/emqx_utils/src/emqx_variform_parser.yrl similarity index 100% rename from apps/emqx/src/variform/emqx_variform_parser.yrl rename to apps/emqx_utils/src/emqx_variform_parser.yrl diff --git a/apps/emqx/src/variform/emqx_variform_scan.xrl b/apps/emqx_utils/src/emqx_variform_scan.xrl similarity index 100% rename from apps/emqx/src/variform/emqx_variform_scan.xrl rename to apps/emqx_utils/src/emqx_variform_scan.xrl diff --git a/apps/emqx/src/variform/emqx_variform_str.erl b/apps/emqx_utils/src/emqx_variform_str.erl similarity index 100% rename from apps/emqx/src/variform/emqx_variform_str.erl rename to apps/emqx_utils/src/emqx_variform_str.erl From 59a442cdb56899cde22cf57ab11615d90d472427 Mon Sep 17 00:00:00 2001 From: Kjell Winblad Date: Wed, 27 Mar 2024 11:11:16 +0100 Subject: [PATCH 060/234] feat(rule trace): add support for ruleid as a trace type --- apps/emqx/include/emqx_trace.hrl | 9 +- apps/emqx/src/emqx_trace/emqx_trace.erl | 6 ++ .../src/emqx_trace/emqx_trace_handler.erl | 14 ++- apps/emqx/test/emqx_trace_handler_SUITE.erl | 85 +++++++++++++++++-- .../src/emqx_mgmt_api_trace.erl | 11 ++- .../test/emqx_mgmt_api_trace_SUITE.erl | 67 ++++++++++++++- .../src/emqx_rule_runtime.erl | 3 + rel/i18n/emqx_mgmt_api_trace.hocon | 5 ++ 8 files changed, 188 insertions(+), 12 deletions(-) diff --git a/apps/emqx/include/emqx_trace.hrl b/apps/emqx/include/emqx_trace.hrl index 5c50fa706..476794223 100644 --- a/apps/emqx/include/emqx_trace.hrl +++ b/apps/emqx/include/emqx_trace.hrl @@ -20,9 +20,14 @@ -record(?TRACE, { name :: binary() | undefined | '_', - type :: clientid | topic | ip_address | undefined | '_', + type :: clientid | topic | ip_address | ruleid | undefined | '_', filter :: - emqx_types:topic() | emqx_types:clientid() | emqx_trace:ip_address() | undefined | '_', + emqx_types:topic() + | emqx_types:clientid() + | emqx_trace:ip_address() + | emqx_trace:ruleid() + | undefined + | '_', enable = true :: boolean() | '_', payload_encode = text :: hex | text | hidden | '_', extra = #{} :: map() | '_', diff --git a/apps/emqx/src/emqx_trace/emqx_trace.erl b/apps/emqx/src/emqx_trace/emqx_trace.erl index f3a5be084..8151c19b5 100644 --- a/apps/emqx/src/emqx_trace/emqx_trace.erl +++ b/apps/emqx/src/emqx_trace/emqx_trace.erl @@ -66,6 +66,9 @@ -export_type([ip_address/0]). -type ip_address() :: string(). +-export_type([ruleid/0]). +-type ruleid() :: binary(). + publish(#message{topic = <<"$SYS/", _/binary>>}) -> ignore; publish(#message{from = From, topic = Topic, payload = Payload}) when @@ -517,6 +520,9 @@ to_trace(#{type := ip_address, ip_address := Filter} = Trace, Rec) -> Error -> Error end; +to_trace(#{type := ruleid, ruleid := Filter} = Trace, Rec) -> + Trace0 = maps:without([type, ruleid], Trace), + to_trace(Trace0, Rec#?TRACE{type = ruleid, filter = Filter}); to_trace(#{type := Type}, _Rec) -> {error, io_lib:format("required ~s field", [Type])}; to_trace(#{payload_encode := PayloadEncode} = Trace, Rec) -> diff --git a/apps/emqx/src/emqx_trace/emqx_trace_handler.erl b/apps/emqx/src/emqx_trace/emqx_trace_handler.erl index 313826cde..3af543013 100644 --- a/apps/emqx/src/emqx_trace/emqx_trace_handler.erl +++ b/apps/emqx/src/emqx_trace/emqx_trace_handler.erl @@ -33,6 +33,7 @@ %% For logger handler filters callbacks -export([ + filter_ruleid/2, filter_clientid/2, filter_topic/2, filter_ip_address/2 @@ -133,6 +134,12 @@ uninstall(HandlerId) -> running() -> lists:foldl(fun filter_traces/2, [], emqx_logger:get_log_handlers(started)). +-spec filter_ruleid(logger:log_event(), {binary(), atom()}) -> logger:log_event() | stop. +filter_ruleid(#{meta := Meta = #{ruleid := RuleId}} = Log, {MatchId, _Name}) -> + filter_ret(RuleId =:= MatchId andalso is_trace(Meta), Log); +filter_ruleid(_Log, _ExpectId) -> + stop. + -spec filter_clientid(logger:log_event(), {binary(), atom()}) -> logger:log_event() | stop. filter_clientid(#{meta := Meta = #{clientid := ClientId}} = Log, {MatchId, _Name}) -> filter_ret(ClientId =:= MatchId andalso is_trace(Meta), Log); @@ -164,7 +171,9 @@ filters(#{type := clientid, filter := Filter, name := Name}) -> filters(#{type := topic, filter := Filter, name := Name}) -> [{topic, {fun ?MODULE:filter_topic/2, {ensure_bin(Filter), Name}}}]; filters(#{type := ip_address, filter := Filter, name := Name}) -> - [{ip_address, {fun ?MODULE:filter_ip_address/2, {ensure_list(Filter), Name}}}]. + [{ip_address, {fun ?MODULE:filter_ip_address/2, {ensure_list(Filter), Name}}}]; +filters(#{type := ruleid, filter := Filter, name := Name}) -> + [{ruleid, {fun ?MODULE:filter_ruleid/2, {ensure_bin(Filter), Name}}}]. formatter(#{type := _Type, payload_encode := PayloadEncode}) -> {emqx_trace_formatter, #{ @@ -184,7 +193,8 @@ filter_traces(#{id := Id, level := Level, dst := Dst, filters := Filters}, Acc) [{Type, {FilterFun, {Filter, Name}}}] when Type =:= topic orelse Type =:= clientid orelse - Type =:= ip_address + Type =:= ip_address orelse + Type =:= ruleid -> [Init#{type => Type, filter => Filter, name => Name, filter_fun => FilterFun} | Acc]; _ -> diff --git a/apps/emqx/test/emqx_trace_handler_SUITE.erl b/apps/emqx/test/emqx_trace_handler_SUITE.erl index 59a472f3e..85a9c056b 100644 --- a/apps/emqx/test/emqx_trace_handler_SUITE.erl +++ b/apps/emqx/test/emqx_trace_handler_SUITE.erl @@ -20,6 +20,7 @@ -compile(nowarn_export_all). -include_lib("eunit/include/eunit.hrl"). +-include_lib("snabbkaffe/include/test_macros.hrl"). -include_lib("common_test/include/ct.hrl"). -define(CLIENT, [ @@ -29,11 +30,12 @@ {password, <<"pass">>} ]). -all() -> [t_trace_clientid, t_trace_topic, t_trace_ip_address, t_trace_clientid_utf8]. +all() -> + [t_trace_clientid, t_trace_topic, t_trace_ip_address, t_trace_clientid_utf8, t_trace_rule_id]. init_per_suite(Config) -> Apps = emqx_cth_suite:start( - [emqx], + [emqx, emqx_rule_engine], #{work_dir => emqx_cth_suite:work_dir(Config)} ), [{apps, Apps} | Config]. @@ -205,6 +207,79 @@ t_trace_topic(_Config) -> ?assertEqual([], emqx_trace_handler:running()), emqtt:disconnect(T). +create_rule(Name, SQL) -> + Rule = emqx_rule_engine_SUITE:make_simple_rule(Name, SQL), + {ok, _} = emqx_rule_engine:create_rule(Rule). + +t_trace_rule_id(_Config) -> + %% Start MQTT Client + {ok, T} = emqtt:start_link(?CLIENT), + emqtt:connect(T), + %% Create rules + create_rule( + <<"test_rule_id_1">>, + <<"select 1 as rule_number from \"rule_1_topic\"">> + ), + create_rule( + <<"test_rule_id_2">>, + <<"select 2 as rule_number from \"rule_2_topic\"">> + ), + %% Start tracing + ok = emqx_trace_handler:install( + "CLI-RULE-1", ruleid, <<"test_rule_id_1">>, all, "tmp/rule_trace_1.log" + ), + ok = emqx_trace_handler:install( + "CLI-RULE-2", ruleid, <<"test_rule_id_2">>, all, "tmp/rule_trace_2.log" + ), + emqx_trace:check(), + ok = filesync("CLI-RULE-1", ruleid), + ok = filesync("CLI-RULE-2", ruleid), + + %% Verify the tracing file exits + ?assert(filelib:is_regular("tmp/rule_trace_1.log")), + ?assert(filelib:is_regular("tmp/rule_trace_2.log")), + + %% Get current traces + ?assertMatch( + [ + #{ + type := ruleid, + filter := <<"test_rule_id_1">>, + level := debug, + dst := "tmp/rule_trace_1.log", + name := <<"CLI-RULE-1">> + }, + #{ + type := ruleid, + filter := <<"test_rule_id_2">>, + name := <<"CLI-RULE-2">>, + level := debug, + dst := "tmp/rule_trace_2.log" + } + ], + emqx_trace_handler:running() + ), + + %% Trigger rule + emqtt:publish(T, <<"rule_1_topic">>, <<"my_traced_message">>), + ?retry( + 100, + 5, + begin + ok = filesync("CLI-RULE-1", ruleid), + {ok, Bin} = file:read_file("tmp/rule_trace_1.log"), + ?assertNotEqual(nomatch, binary:match(Bin, [<<"my_traced_message">>])) + end + ), + ok = filesync("CLI-RULE-2", ruleid), + ?assert(filelib:file_size("tmp/rule_trace_2.log") =:= 0), + + %% Stop tracing + ok = emqx_trace_handler:uninstall(ruleid, <<"CLI-RULE-1">>), + ok = emqx_trace_handler:uninstall(ruleid, <<"CLI-RULE-2">>), + ?assertEqual([], emqx_trace_handler:running()), + emqtt:disconnect(T). + t_trace_ip_address(_Config) -> {ok, T} = emqtt:start_link(?CLIENT), emqtt:connect(T), @@ -272,11 +347,11 @@ t_trace_ip_address(_Config) -> filesync(Name, Type) -> ct:sleep(50), - filesync(Name, Type, 3). + filesync(Name, Type, 5). %% sometime the handler process is not started yet. -filesync(_Name, _Type, 0) -> - ok; +filesync(Name, Type, 0) -> + ct:fail("Handler process not started ~p ~p", [Name, Type]); filesync(Name0, Type, Retry) -> Name = case is_binary(Name0) of diff --git a/apps/emqx_management/src/emqx_mgmt_api_trace.erl b/apps/emqx_management/src/emqx_mgmt_api_trace.erl index 5cdbc65ff..19edc229d 100644 --- a/apps/emqx_management/src/emqx_mgmt_api_trace.erl +++ b/apps/emqx_management/src/emqx_mgmt_api_trace.erl @@ -222,7 +222,7 @@ fields(trace) -> )}, {type, hoconsc:mk( - hoconsc:enum([clientid, topic, ip_address]), + hoconsc:enum([clientid, topic, ip_address, ruleid]), #{ description => ?DESC(filter_type), required => true, @@ -257,6 +257,15 @@ fields(trace) -> example => <<"127.0.0.1">> } )}, + {ruleid, + hoconsc:mk( + binary(), + #{ + description => ?DESC(ruleid_field), + required => false, + example => <<"my_rule">> + } + )}, {status, hoconsc:mk( hoconsc:enum([running, stopped, waiting]), diff --git a/apps/emqx_management/test/emqx_mgmt_api_trace_SUITE.erl b/apps/emqx_management/test/emqx_mgmt_api_trace_SUITE.erl index cb93bc9d6..ef7b5a191 100644 --- a/apps/emqx_management/test/emqx_mgmt_api_trace_SUITE.erl +++ b/apps/emqx_management/test/emqx_mgmt_api_trace_SUITE.erl @@ -122,6 +122,56 @@ t_http_test(_Config) -> unload(), ok. +t_http_test_rule_trace(_Config) -> + emqx_trace:clear(), + load(), + %% create + Name = atom_to_binary(?FUNCTION_NAME), + Trace = [ + {<<"name">>, Name}, + {<<"type">>, <<"ruleid">>}, + {<<"ruleid">>, Name} + ], + + {ok, Create} = request_api(post, api_path("trace"), Trace), + ?assertMatch(#{<<"name">> := Name}, json(Create)), + + {ok, List} = request_api(get, api_path("trace")), + [Data] = json(List), + ?assertEqual(Name, maps:get(<<"name">>, Data)), + + %% update + {ok, Update} = request_api(put, api_path(iolist_to_binary(["trace/", Name, "/stop"])), #{}), + ?assertEqual( + #{ + <<"enable">> => false, + <<"name">> => Name + }, + json(Update) + ), + {ok, List1} = request_api(get, api_path("trace")), + [Data1] = json(List1), + Node = atom_to_binary(node()), + ?assertMatch( + #{ + <<"status">> := <<"stopped">>, + <<"name">> := Name, + <<"log_size">> := #{Node := _}, + <<"start_at">> := _, + <<"end_at">> := _, + <<"type">> := <<"ruleid">>, + <<"ruleid">> := Name + }, + Data1 + ), + + %% delete + {ok, Delete} = request_api(delete, api_path(["trace/", Name])), + ?assertEqual(<<>>, Delete), + + unload(), + ok. + t_create_failed(_Config) -> load(), Trace = [{<<"type">>, <<"topic">>}, {<<"topic">>, <<"/x/y/z">>}], @@ -252,13 +302,16 @@ t_log_file(_Config) -> ok. create_trace(Name, ClientId, Start) -> + create_trace(Name, clientid, ClientId, Start). + +create_trace(Name, Type, TypeValue, Start) -> ?check_trace( #{timetrap => 900}, begin {ok, _} = emqx_trace:create([ {<<"name">>, Name}, - {<<"type">>, clientid}, - {<<"clientid">>, ClientId}, + {<<"type">>, Type}, + {atom_to_binary(Type), TypeValue}, {<<"start_at">>, Start} ]), ?block_until(#{?snk_kind := update_trace_done}) @@ -268,6 +321,16 @@ create_trace(Name, ClientId, Start) -> end ). +create_rule_trace(RuleId) -> + Now = erlang:system_time(second), + emqx_mgmt_api_trace_SUITE:create_trace(atom_to_binary(?FUNCTION_NAME), ruleid, RuleId, Now - 2). + +t_create_rule_trace(_Config) -> + load(), + create_rule_trace(atom_to_binary(?FUNCTION_NAME)), + unload(), + ok. + t_stream_log(_Config) -> emqx_trace:clear(), load(), diff --git a/apps/emqx_rule_engine/src/emqx_rule_runtime.erl b/apps/emqx_rule_engine/src/emqx_rule_runtime.erl index f51908772..9a307e2c3 100644 --- a/apps/emqx_rule_engine/src/emqx_rule_runtime.erl +++ b/apps/emqx_rule_engine/src/emqx_rule_runtime.erl @@ -69,6 +69,9 @@ apply_rule_discard_result(Rule, Columns, Envs) -> ok. apply_rule(Rule = #{id := RuleID}, Columns, Envs) -> + ?TRACE("APPLY_RULE", "rule_activated", #{ + ruleid => RuleID, input => Columns, environment => Envs + }), ok = emqx_metrics_worker:inc(rule_metrics, RuleID, 'matched'), clear_rule_payload(), try diff --git a/rel/i18n/emqx_mgmt_api_trace.hocon b/rel/i18n/emqx_mgmt_api_trace.hocon index 67462ab43..ba07d7d53 100644 --- a/rel/i18n/emqx_mgmt_api_trace.hocon +++ b/rel/i18n/emqx_mgmt_api_trace.hocon @@ -80,6 +80,11 @@ client_ip_addess.desc: client_ip_addess.label: """Client IP Address""" +ruleid.desc: +"""Specify the Rule ID if the trace type is 'ruleid'.""" +ruleid.label: +"""Rule ID""" + trace_status.desc: """trace status""" trace_status.label: From c57c36adb222d90b3c46cf66280be8e8565a6d6c Mon Sep 17 00:00:00 2001 From: Thales Macedo Garitezi Date: Thu, 4 Apr 2024 14:05:12 -0300 Subject: [PATCH 061/234] feat(ds): clear all checkpoints when (re)starting storage layer Fixes https://emqx.atlassian.net/browse/EMQX-12143 --- .../src/emqx_ds_storage_layer.erl | 26 +++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-) diff --git a/apps/emqx_durable_storage/src/emqx_ds_storage_layer.erl b/apps/emqx_durable_storage/src/emqx_ds_storage_layer.erl index 28ce1d943..4981c3fc1 100644 --- a/apps/emqx_durable_storage/src/emqx_ds_storage_layer.erl +++ b/apps/emqx_durable_storage/src/emqx_ds_storage_layer.erl @@ -500,6 +500,7 @@ init({ShardId, Options}) -> process_flag(trap_exit, true), logger:set_process_metadata(#{shard_id => ShardId, domain => [ds, storage_layer, shard]}), erase_schema_runtime(ShardId), + clear_all_checkpoints(ShardId), {ok, DB, CFRefs0} = rocksdb_open(ShardId, Options), {Schema, CFRefs} = case get_schema_persistent(DB) of @@ -567,6 +568,23 @@ terminate(_Reason, #s{db = DB, shard_id = ShardId}) -> %% Internal functions %%================================================================================ +-spec clear_all_checkpoints(shard_id()) -> ok. +clear_all_checkpoints(ShardId) -> + CheckpointBaseDir = checkpoints_dir(ShardId), + ok = filelib:ensure_path(CheckpointBaseDir), + {ok, AllFiles} = file:list_dir(CheckpointBaseDir), + CheckpointDirs = [Dir || Dir <- AllFiles, filelib:is_dir(Dir)], + lists:foreach( + fun(Dir) -> + logger:debug(#{ + msg => "ds_storage_deleting_previous_checkpoint", + dir => Dir + }), + ok = file:del_dir_r(Dir) + end, + CheckpointDirs + ). + -spec open_shard(shard_id(), rocksdb:db_handle(), cf_refs(), shard_schema()) -> shard(). open_shard(ShardId, DB, CFRefs, ShardSchema) -> @@ -777,9 +795,13 @@ rocksdb_open(Shard, Options) -> db_dir({DB, ShardId}) -> filename:join([emqx_ds:base_dir(), DB, binary_to_list(ShardId)]). +-spec checkpoints_dir(shard_id()) -> file:filename(). +checkpoints_dir({DB, ShardId}) -> + filename:join([emqx_ds:base_dir(), DB, checkpoints, binary_to_list(ShardId)]). + -spec checkpoint_dir(shard_id(), _Name :: file:name()) -> file:filename(). -checkpoint_dir({DB, ShardId}, Name) -> - filename:join([emqx_ds:base_dir(), DB, checkpoints, binary_to_list(ShardId), Name]). +checkpoint_dir(ShardId, Name) -> + filename:join([checkpoints_dir(ShardId), Name]). -spec update_last_until(Schema, emqx_ds:time()) -> Schema | {error, exists | overlaps_existing_generations} From 60cad74286dce5a0440f87125fc8ef2877fc4059 Mon Sep 17 00:00:00 2001 From: Thales Macedo Garitezi Date: Thu, 4 Apr 2024 13:38:50 -0300 Subject: [PATCH 062/234] feat(resource): non-blocking channel health checks Fixes https://emqx.atlassian.net/browse/EMQX-12015 Continuation of https://github.com/emqx/emqx/pull/12812 --- .../test/emqx_bridge_api_SUITE.erl | 6 + .../src/emqx_resource_manager.erl | 220 ++++++++++++++---- .../test/emqx_connector_demo.erl | 45 +++- .../test/emqx_resource_SUITE.erl | 49 ++++ changes/ce/fix-12830.en.md | 1 + 5 files changed, 274 insertions(+), 47 deletions(-) create mode 100644 changes/ce/fix-12830.en.md diff --git a/apps/emqx_bridge/test/emqx_bridge_api_SUITE.erl b/apps/emqx_bridge/test/emqx_bridge_api_SUITE.erl index 1971ad697..08b3270ea 100644 --- a/apps/emqx_bridge/test/emqx_bridge_api_SUITE.erl +++ b/apps/emqx_bridge/test/emqx_bridge_api_SUITE.erl @@ -858,6 +858,12 @@ do_start_stop_bridges(Type, Config) -> <<"status_reason">> := <<"connack_timeout">> } -> ok; + #{ + <<"node_status">> := [_, _ | _], + <<"status">> := <<"disconnected">>, + <<"status_reason">> := <<"connack_timeout">> + } -> + ok; #{ <<"node_status">> := [_], <<"status">> := <<"connecting">> diff --git a/apps/emqx_resource/src/emqx_resource_manager.erl b/apps/emqx_resource/src/emqx_resource_manager.erl index 06123a935..fcdf56202 100644 --- a/apps/emqx_resource/src/emqx_resource_manager.erl +++ b/apps/emqx_resource/src/emqx_resource_manager.erl @@ -61,7 +61,7 @@ -export([init/1, callback_mode/0, handle_event/4, terminate/3]). %% Internal exports. --export([worker_resource_health_check/1]). +-export([worker_resource_health_check/1, worker_channel_health_check/2]). % State record -record(data, { @@ -78,12 +78,24 @@ pid, added_channels = #{}, %% Reference to process performing resource health check. - hc_workers = #{resource => #{}, channel => #{}} :: #{ - resource | channel := #{{pid(), reference()} => true} + hc_workers = #{ + resource => #{}, + channel => #{ + pending => [], + previous_status => #{} + } + } :: #{ + resource := #{{pid(), reference()} => true}, + channel := #{ + {pid(), reference()} => channel_id(), + pending := [channel_id()], + previous_status := #{channel_id() => channel_status_map()} + } }, %% Callers waiting on health check - hc_pending_callers = #{resource => [], channel => []} :: #{ - resource | channel := [gen_server:from()] + hc_pending_callers = #{resource => [], channel => #{}} :: #{ + resource := [gen_server:from()], + channel := #{channel_id() => [gen_server:from()]} }, extra }). @@ -107,6 +119,12 @@ -define(state_disconnected, disconnected). -define(state_stopped, stopped). +-type state() :: + ?state_stopped + | ?state_disconnected + | ?state_connecting + | ?state_connected. + -define(IS_STATUS(ST), ST =:= ?status_connecting; ST =:= ?status_connected; ST =:= ?status_disconnected ). @@ -339,6 +357,7 @@ add_channel(ResId, ChannelId, Config) -> Result = safe_call(ResId, {add_channel, ChannelId, Config}, ?T_OPERATION), %% Wait for health_check to finish _ = health_check(ResId), + _ = channel_health_check(ResId, ChannelId), Result. remove_channel(ResId, ChannelId) -> @@ -538,11 +557,20 @@ handle_event( info, {'DOWN', Ref, process, Pid, Res}, State0, - Data0 = #data{hc_workers = #{resource := HCWorkers}} + Data0 = #data{hc_workers = #{resource := RHCWorkers}} ) when - is_map_key({Pid, Ref}, HCWorkers) + is_map_key({Pid, Ref}, RHCWorkers) -> handle_resource_health_check_worker_down(State0, Data0, {Pid, Ref}, Res); +handle_event( + info, + {'DOWN', Ref, process, Pid, Res}, + _State, + Data0 = #data{hc_workers = #{channel := CHCWorkers}} +) when + is_map_key({Pid, Ref}, CHCWorkers) +-> + handle_channel_health_check_worker_down(Data0, {Pid, Ref}, Res); % Ignore all other events handle_event(EventType, EventData, State, Data) -> ?SLOG( @@ -558,7 +586,7 @@ handle_event(EventType, EventData, State, Data) -> keep_state_and_data. log_status_consistency(Status, #data{status = Status} = Data) -> - log_cache_consistency(read_cache(Data#data.id), Data); + log_cache_consistency(read_cache(Data#data.id), remove_runtime_data(Data)); log_status_consistency(Status, Data) -> ?tp(warning, "inconsistent_status", #{ status => Status, @@ -869,7 +897,7 @@ handle_manual_resource_health_check(From, Data0) -> Data = Data0#data{hc_pending_callers = Pending}, start_resource_health_check(Data). -reply_pending_health_check_callers(Status, resource, Data0 = #data{hc_pending_callers = Pending0}) -> +reply_pending_resource_health_check_callers(Status, Data0 = #data{hc_pending_callers = Pending0}) -> #{resource := RPending} = Pending0, Actions = [{reply, From, {ok, Status}} || From <- RPending], Data = Data0#data{hc_pending_callers = Pending0#{resource := []}}, @@ -888,13 +916,13 @@ start_resource_health_check(#data{hc_workers = #{resource := HCWorkers}}) when keep_state_and_data; start_resource_health_check(#data{} = Data0) -> #data{hc_workers = HCWorkers0 = #{resource := RHCWorkers0}} = Data0, - WorkerRef = {_Pid, _Ref} = spawn_health_check_worker(Data0), + WorkerRef = {_Pid, _Ref} = spawn_resource_health_check_worker(Data0), HCWorkers = HCWorkers0#{resource := RHCWorkers0#{WorkerRef => true}}, Data = Data0#data{hc_workers = HCWorkers}, {keep_state, Data}. --spec spawn_health_check_worker(data()) -> {pid(), reference()}. -spawn_health_check_worker(#data{} = Data) -> +-spec spawn_resource_health_check_worker(data()) -> {pid(), reference()}. +spawn_resource_health_check_worker(#data{} = Data) -> spawn_monitor(?MODULE, worker_resource_health_check, [Data]). %% separated so it can be spec'ed and placate dialyzer tantrums... @@ -939,7 +967,7 @@ continue_with_health_check(#data{} = Data0, CurrentState, HCRes) -> continue_resource_health_check_connected(NewStatus, Data0) -> case NewStatus of ?status_connected -> - {Replies, Data1} = reply_pending_health_check_callers(NewStatus, resource, Data0), + {Replies, Data1} = reply_pending_resource_health_check_callers(NewStatus, Data0), Data2 = channels_health_check(?status_connected, Data1), Data = update_state(Data2, Data0), Actions = Replies ++ health_check_actions(Data), @@ -954,13 +982,13 @@ continue_resource_health_check_connected(NewStatus, Data0) -> %% subset of resource manager state... But there should be a conversion %% between the two here, as resource manager also has `stopped', which is %% not a valid status at the time of writing. - {Replies, Data} = reply_pending_health_check_callers(NewStatus, resource, Data0), + {Replies, Data} = reply_pending_resource_health_check_callers(NewStatus, Data0), {next_state, NewStatus, channels_health_check(NewStatus, Data), Replies} end. %% Continuation to be used when the current resource state is not `?state_connected'. continue_resource_health_check_not_connected(NewStatus, Data0) -> - {Replies, Data} = reply_pending_health_check_callers(NewStatus, resource, Data0), + {Replies, Data} = reply_pending_resource_health_check_callers(NewStatus, Data0), case NewStatus of ?status_connected -> {next_state, ?state_connected, channels_health_check(?status_connected, Data), Replies}; @@ -975,6 +1003,30 @@ continue_resource_health_check_not_connected(NewStatus, Data0) -> handle_manual_channel_health_check(From, #data{state = undefined}, _ChannelId) -> {keep_state_and_data, [{reply, From, channel_status({error, resource_disconnected})}]}; +handle_manual_channel_health_check( + From, + #data{ + added_channels = Channels, + hc_pending_callers = #{channel := CPending0} = Pending0, + hc_workers = #{channel := #{previous_status := PreviousStatus}} + } = Data0, + ChannelId +) when + is_map_key(ChannelId, Channels), + is_map_key(ChannelId, PreviousStatus) +-> + %% Ongoing health check. + CPending = maps:update_with( + ChannelId, + fun(OtherCallers) -> + [From | OtherCallers] + end, + [From], + CPending0 + ), + Pending = Pending0#{channel := CPending}, + Data = Data0#data{hc_pending_callers = Pending}, + {keep_state, Data}; handle_manual_channel_health_check( From, #data{added_channels = Channels} = _Data, @@ -982,6 +1034,7 @@ handle_manual_channel_health_check( ) when is_map_key(ChannelId, Channels) -> + %% No ongoing health check: reply with current status. {keep_state_and_data, [{reply, From, maps:get(ChannelId, Channels)}]}; handle_manual_channel_health_check( From, @@ -990,10 +1043,6 @@ handle_manual_channel_health_check( ) -> {keep_state_and_data, [{reply, From, channel_status({error, channel_not_found})}]}. -get_channel_status_channel_added(#data{id = ResId, mod = Mod, state = State}, ChannelId) -> - RawStatus = emqx_resource:call_channel_health_check(ResId, ChannelId, Mod, State), - channel_status(RawStatus). - -spec channels_health_check(resource_status(), data()) -> data(). channels_health_check(?status_connected = _ConnectorStatus, Data0) -> Channels = maps:to_list(Data0#data.added_channels), @@ -1009,8 +1058,7 @@ channels_health_check(?status_connected = _ConnectorStatus, Data0) -> get_config_for_channels(Data0, ChannelsNotAdded), Data1 = add_channels_in_list(ChannelsNotAddedWithConfigs, Data0), %% Now that we have done the adding, we can get the status of all channels - Data2 = channel_status_for_all_channels(Data1), - update_state(Data2, Data0); + trigger_health_check_for_added_channels(Data1); channels_health_check(?status_connecting = _ConnectorStatus, Data0) -> %% Whenever the resource is connecting: %% 1. Change the status of all added channels to connecting @@ -1105,41 +1153,117 @@ resource_not_connected_channel_error_msg(ResourceStatus, ChannelId, Data1) -> ) ). -channel_status_for_all_channels(Data) -> - Channels = maps:to_list(Data#data.added_channels), - AddedChannelsWithOldAndNewStatus = [ - {ChannelId, OldStatus, get_channel_status_channel_added(Data, ChannelId)} - || {ChannelId, OldStatus} <- Channels, +%% Currently, we only call resource channel health checks when the underlying resource is +%% `?status_connected'. +-spec trigger_health_check_for_added_channels(data()) -> data(). +trigger_health_check_for_added_channels(Data0 = #data{hc_workers = HCWorkers0}) -> + #{channel := CHCWorkers0} = HCWorkers0, + PreviousStatus = maps:from_list([ + {ChannelId, OldStatus} + || {ChannelId, OldStatus} <- maps:to_list(Data0#data.added_channels), channel_status_is_channel_added(OldStatus) - ], + ]), + ChannelsToCheck = maps:keys(PreviousStatus), + case ChannelsToCheck of + [] -> + %% Nothing to do. + Data0; + [ChannelId | Rest] -> + %% Shooting one check at a time. We could increase concurrency in the future. + CHCWorkers = CHCWorkers0#{pending := Rest, previous_status := PreviousStatus}, + Data1 = Data0#data{hc_workers = HCWorkers0#{channel := CHCWorkers}}, + start_channel_health_check(Data1, ChannelId) + end. + +-spec continue_channel_health_check_connected(data()) -> data(). +continue_channel_health_check_connected(Data0) -> + #data{hc_workers = HCWorkers0} = Data0, + #{channel := #{previous_status := PreviousStatus} = CHCWorkers0} = HCWorkers0, + CHCWorkers = CHCWorkers0#{previous_status := #{}}, + Data1 = Data0#data{hc_workers = HCWorkers0#{channel := CHCWorkers}}, %% Remove the added channels with a a status different from connected or connecting + CheckedChannels = [ + {ChannelId, NewStatus} + || {ChannelId, NewStatus} <- maps:to_list(Data0#data.added_channels), + is_map_key(ChannelId, PreviousStatus) + ], ChannelsToRemove = [ ChannelId - || {ChannelId, _, NewStatus} <- AddedChannelsWithOldAndNewStatus, + || {ChannelId, NewStatus} <- CheckedChannels, not channel_status_is_channel_added(NewStatus) ], - Data1 = remove_channels_in_list(ChannelsToRemove, Data, true), + Data = remove_channels_in_list(ChannelsToRemove, Data1, true), %% Raise/clear alarms lists:foreach( fun - ({ID, _OldStatus, #{status := ?status_connected}}) -> + ({ID, #{status := ?status_connected}}) -> _ = maybe_clear_alarm(ID); - ({ID, OldStatus, NewStatus}) -> + ({ID, NewStatus}) -> + OldStatus = maps:get(ID, PreviousStatus), _ = maybe_alarm(NewStatus, ID, NewStatus, OldStatus) end, - AddedChannelsWithOldAndNewStatus + CheckedChannels ), - %% Update the ChannelsMap - ChannelsMap = Data1#data.added_channels, - NewChannelsMap = - lists:foldl( - fun({ChannelId, _, NewStatus}, Acc) -> - maps:put(ChannelId, NewStatus, Acc) - end, - ChannelsMap, - AddedChannelsWithOldAndNewStatus - ), - Data1#data{added_channels = NewChannelsMap}. + Data. + +-spec start_channel_health_check(data(), channel_id()) -> data(). +start_channel_health_check(#data{} = Data0, ChannelId) -> + #data{hc_workers = HCWorkers0 = #{channel := CHCWorkers0}} = Data0, + WorkerRef = {_Pid, _Ref} = spawn_channel_health_check_worker(Data0, ChannelId), + HCWorkers = HCWorkers0#{channel := CHCWorkers0#{WorkerRef => ChannelId}}, + Data0#data{hc_workers = HCWorkers}. + +-spec spawn_channel_health_check_worker(data(), channel_id()) -> {pid(), reference()}. +spawn_channel_health_check_worker(#data{} = Data, ChannelId) -> + spawn_monitor(?MODULE, worker_channel_health_check, [Data, ChannelId]). + +%% separated so it can be spec'ed and placate dialyzer tantrums... +-spec worker_channel_health_check(data(), channel_id()) -> no_return(). +worker_channel_health_check(Data, ChannelId) -> + #data{id = ResId, mod = Mod, state = State} = Data, + RawStatus = emqx_resource:call_channel_health_check(ResId, ChannelId, Mod, State), + exit({ok, channel_status(RawStatus)}). + +-spec handle_channel_health_check_worker_down( + data(), {pid(), reference()}, {ok, channel_status_map()} +) -> + gen_statem:event_handler_result(state(), data()). +handle_channel_health_check_worker_down(Data0, WorkerRef, ExitResult) -> + #data{ + hc_workers = HCWorkers0 = #{channel := CHCWorkers0}, + added_channels = AddedChannels0 + } = Data0, + {ChannelId, CHCWorkers1} = maps:take(WorkerRef, CHCWorkers0), + case ExitResult of + {ok, NewStatus} -> + %% `emqx_resource:call_channel_health_check' catches all exceptions. + AddedChannels = maps:put(ChannelId, NewStatus, AddedChannels0) + end, + Data1 = Data0#data{added_channels = AddedChannels}, + {Replies, Data2} = reply_pending_channel_health_check_callers(ChannelId, NewStatus, Data1), + case CHCWorkers1 of + #{pending := [NextChannelId | Rest]} -> + CHCWorkers = CHCWorkers1#{pending := Rest}, + HCWorkers = HCWorkers0#{channel := CHCWorkers}, + Data3 = Data2#data{hc_workers = HCWorkers}, + Data = start_channel_health_check(Data3, NextChannelId), + {keep_state, update_state(Data, Data0), Replies}; + #{pending := []} -> + HCWorkers = HCWorkers0#{channel := CHCWorkers1}, + Data3 = Data2#data{hc_workers = HCWorkers}, + Data = continue_channel_health_check_connected(Data3), + {keep_state, update_state(Data, Data0), Replies} + end. + +reply_pending_channel_health_check_callers( + ChannelId, Status, Data0 = #data{hc_pending_callers = Pending0} +) -> + #{channel := CPending0} = Pending0, + Pending = maps:get(ChannelId, CPending0, []), + Actions = [{reply, From, Status} || From <- Pending], + CPending = maps:remove(ChannelId, CPending0), + Data = Data0#data{hc_pending_callers = Pending0#{channel := CPending}}, + {Actions, Data}. get_config_for_channels(Data0, ChannelsWithoutConfig) -> ResId = Data0#data.id, @@ -1181,8 +1305,14 @@ update_state(Data, _DataWas) -> remove_runtime_data(#data{} = Data0) -> Data0#data{ - hc_workers = #{resource => #{}, channel => #{}}, - hc_pending_callers = #{resource => [], channel => []} + hc_workers = #{ + resource => #{}, + channel => #{pending => [], previous_status => #{}} + }, + hc_pending_callers = #{ + resource => [], + channel => #{} + } }. health_check_interval(Opts) -> diff --git a/apps/emqx_resource/test/emqx_connector_demo.erl b/apps/emqx_resource/test/emqx_connector_demo.erl index d1ac5c2e6..754727e8c 100644 --- a/apps/emqx_resource/test/emqx_connector_demo.erl +++ b/apps/emqx_resource/test/emqx_connector_demo.erl @@ -31,7 +31,12 @@ on_query_async/4, on_batch_query/3, on_batch_query_async/4, - on_get_status/2 + on_get_status/2, + + on_add_channel/4, + on_remove_channel/3, + on_get_channels/1, + on_get_channel_status/3 ]). -export([counter_loop/0, set_callback_mode/1]). @@ -40,6 +45,7 @@ -export([roots/0]). -define(CM_KEY, {?MODULE, callback_mode}). +-define(PT_CHAN_KEY(CONN_RES_ID), {?MODULE, chans, CONN_RES_ID}). roots() -> [ @@ -71,12 +77,14 @@ on_start(InstId, #{name := Name} = Opts) -> {ok, Opts#{ id => InstId, stop_error => StopError, + channels => #{}, pid => spawn_counter_process(Name, Register) }}. on_stop(_InstId, #{stop_error := true}) -> {error, stop_error}; -on_stop(_InstId, #{pid := Pid}) -> +on_stop(InstId, #{pid := Pid}) -> + persistent_term:erase(?PT_CHAN_KEY(InstId)), stop_counter_process(Pid). on_query(_InstId, get_state, State) -> @@ -295,6 +303,31 @@ on_get_status(_InstId, #{pid := Pid}) -> false -> ?status_disconnected end. +on_add_channel(ConnResId, ConnSt0, ChanId, ChanCfg) -> + ConnSt = emqx_utils_maps:deep_put([channels, ChanId], ConnSt0, ChanCfg), + do_add_channel(ConnResId, ChanId, ChanCfg), + {ok, ConnSt}. + +on_remove_channel(ConnResId, ConnSt0, ChanId) -> + ConnSt = emqx_utils_maps:deep_remove([channels, ChanId], ConnSt0), + do_remove_channel(ConnResId, ChanId), + {ok, ConnSt}. + +on_get_channels(ConnResId) -> + persistent_term:get(?PT_CHAN_KEY(ConnResId), []). + +on_get_channel_status(_ConnResId, ChanId, #{channels := Chans}) -> + case Chans of + #{ChanId := #{health_check_delay := Delay}} -> + ?tp(connector_demo_channel_health_check_delay, #{}), + timer:sleep(Delay), + ?status_connected; + #{ChanId := _ChanCfg} -> + ?status_connected; + #{} -> + ?status_disconnected + end. + spawn_counter_process(Name, Register) -> Pid = spawn_link(?MODULE, counter_loop, []), true = maybe_register(Name, Pid, Register), @@ -455,3 +488,11 @@ make_random_reply(N) -> 3 -> {error, {unrecoverable_error, N}} end. + +do_add_channel(ConnResId, ChanId, ChanCfg) -> + Chans = persistent_term:get(?PT_CHAN_KEY(ConnResId), []), + persistent_term:put(?PT_CHAN_KEY(ConnResId), [{ChanId, ChanCfg} | Chans]). + +do_remove_channel(ConnResId, ChanId) -> + Chans = persistent_term:get(?PT_CHAN_KEY(ConnResId), []), + persistent_term:put(?PT_CHAN_KEY(ConnResId), proplists:delete(ChanId, Chans)). diff --git a/apps/emqx_resource/test/emqx_resource_SUITE.erl b/apps/emqx_resource/test/emqx_resource_SUITE.erl index a6cdaedb2..99e85424d 100644 --- a/apps/emqx_resource/test/emqx_resource_SUITE.erl +++ b/apps/emqx_resource/test/emqx_resource_SUITE.erl @@ -3141,6 +3141,55 @@ t_non_blocking_resource_health_check(_Config) -> ), ok. +t_non_blocking_channel_health_check(_Config) -> + ?check_trace( + begin + {ok, _} = + create( + ?ID, + ?DEFAULT_RESOURCE_GROUP, + ?TEST_RESOURCE, + #{name => test_resource, health_check_error => {delay, 500}}, + #{health_check_interval => 100} + ), + ChanId = <<"chan">>, + ok = + emqx_resource_manager:add_channel( + ?ID, + ChanId, + #{health_check_delay => 500} + ), + + %% concurrently attempt to health check the resource; should do it only once + %% for all callers + NumCallers = 20, + Expected = lists:duplicate( + NumCallers, + #{error => undefined, status => connected} + ), + ?assertEqual( + Expected, + emqx_utils:pmap( + fun(_) -> emqx_resource_manager:channel_health_check(?ID, ChanId) end, + lists:seq(1, NumCallers) + ) + ), + + NumCallers + end, + [ + log_consistency_prop(), + fun(NumCallers, Trace) -> + %% shouldn't have one health check per caller + SubTrace = ?of_kind(connector_demo_channel_health_check_delay, Trace), + ?assertMatch([_ | _], SubTrace), + ?assert(length(SubTrace) < (NumCallers div 2), #{trace => Trace}), + ok + end + ] + ), + ok. + %%------------------------------------------------------------------------------ %% Helpers %%------------------------------------------------------------------------------ diff --git a/changes/ce/fix-12830.en.md b/changes/ce/fix-12830.en.md new file mode 100644 index 000000000..5800a9bd3 --- /dev/null +++ b/changes/ce/fix-12830.en.md @@ -0,0 +1 @@ +Made channel (action/source) health checks non-blocking operations. This means that operations such as updating or removing an action/source data integration won't be blocked by a lengthy running health check. From ad52f7838e8de178842823f5a13ce4b527aea97d Mon Sep 17 00:00:00 2001 From: Andrew Mayorov Date: Tue, 2 Apr 2024 16:53:24 +0200 Subject: [PATCH 063/234] feat(dsrepl): add APIs to manage DB replication sites --- .../test/emqx_persistent_messages_SUITE.erl | 2 +- .../src/emqx_ds_builtin_db_sup.erl | 8 +- .../src/emqx_ds_replication_layer.erl | 3 +- .../src/emqx_ds_replication_layer_meta.erl | 290 ++++++++++++------ .../src/emqx_ds_replication_layer_shard.erl | 2 +- .../emqx_ds_replication_shard_allocator.erl | 56 ++-- .../test/emqx_ds_SUITE.erl | 2 +- 7 files changed, 230 insertions(+), 133 deletions(-) diff --git a/apps/emqx/test/emqx_persistent_messages_SUITE.erl b/apps/emqx/test/emqx_persistent_messages_SUITE.erl index 1a150d5b1..492fcaa6b 100644 --- a/apps/emqx/test/emqx_persistent_messages_SUITE.erl +++ b/apps/emqx/test/emqx_persistent_messages_SUITE.erl @@ -476,7 +476,7 @@ t_replication_options(_Config) -> resend_window := 60 } }, - emqx_ds_replication_layer_meta:get_options(?PERSISTENT_MESSAGE_DB) + emqx_ds_replication_layer_meta:db_config(?PERSISTENT_MESSAGE_DB) ), ?assertMatch( #{ diff --git a/apps/emqx_durable_storage/src/emqx_ds_builtin_db_sup.erl b/apps/emqx_durable_storage/src/emqx_ds_builtin_db_sup.erl index 79e2f6120..b230e4b89 100644 --- a/apps/emqx_durable_storage/src/emqx_ds_builtin_db_sup.erl +++ b/apps/emqx_durable_storage/src/emqx_ds_builtin_db_sup.erl @@ -116,7 +116,7 @@ init({#?db_sup{db = DB}, DefaultOpts}) -> Children = [ sup_spec(#?shards_sup{db = DB}, []), sup_spec(#?egress_sup{db = DB}, []), - shard_allocator_spec(DB, Opts) + shard_allocator_spec(DB) ], SupFlags = #{ strategy => one_for_all, @@ -148,7 +148,7 @@ init({#?shard_sup{db = DB, shard = Shard}, _}) -> intensity => 10, period => 100 }, - Opts = emqx_ds_replication_layer_meta:get_options(DB), + Opts = emqx_ds_replication_layer_meta:db_config(DB), Children = [ shard_storage_spec(DB, Shard, Opts), shard_replication_spec(DB, Shard, Opts) @@ -228,10 +228,10 @@ shard_replication_spec(DB, Shard, Opts) -> type => worker }. -shard_allocator_spec(DB, Opts) -> +shard_allocator_spec(DB) -> #{ id => shard_allocator, - start => {emqx_ds_replication_shard_allocator, start_link, [DB, Opts]}, + start => {emqx_ds_replication_shard_allocator, start_link, [DB]}, restart => permanent, type => worker }. diff --git a/apps/emqx_durable_storage/src/emqx_ds_replication_layer.erl b/apps/emqx_durable_storage/src/emqx_ds_replication_layer.erl index 14c2268b8..6ce86e9cc 100644 --- a/apps/emqx_durable_storage/src/emqx_ds_replication_layer.erl +++ b/apps/emqx_durable_storage/src/emqx_ds_replication_layer.erl @@ -189,8 +189,7 @@ add_generation(DB) -> -spec update_db_config(emqx_ds:db(), builtin_db_opts()) -> ok | {error, _}. update_db_config(DB, CreateOpts) -> - ok = emqx_ds_replication_layer_meta:update_db_config(DB, CreateOpts), - Opts = emqx_ds_replication_layer_meta:get_options(DB), + Opts = #{} = emqx_ds_replication_layer_meta:update_db_config(DB, CreateOpts), foreach_shard( DB, fun(Shard) -> ok = ra_update_config(DB, Shard, Opts) end diff --git a/apps/emqx_durable_storage/src/emqx_ds_replication_layer_meta.erl b/apps/emqx_durable_storage/src/emqx_ds_replication_layer_meta.erl index f84863c03..723675699 100644 --- a/apps/emqx_durable_storage/src/emqx_ds_replication_layer_meta.erl +++ b/apps/emqx_durable_storage/src/emqx_ds_replication_layer_meta.erl @@ -29,25 +29,39 @@ -export([ shards/1, my_shards/1, - allocate_shards/2, - replica_set/2, + allocate_shards/1, sites/0, node/1, - open_db/2, - get_options/1, - update_db_config/2, - drop_db/1, this_site/0, print_status/0 ]). +%% DB API: +-export([ + open_db/2, + db_config/1, + update_db_config/2, + drop_db/1 +]). + +%% Site / shard allocation: +-export([ + assign_db_sites/2, + replica_set_transitions/2, + update_replica_set/3, + replica_set/2, + target_set/2 +]). + %% gen_server -export([start_link/0, init/1, handle_call/3, handle_cast/2, handle_info/2, terminate/2]). %% internal exports: -export([ open_db_trans/2, - allocate_shards_trans/2, + allocate_shards_trans/1, + assign_db_sites_trans/2, + update_replica_set_trans/3, update_db_config_trans/2, drop_db_trans/1, claim_site/2, @@ -86,15 +100,20 @@ -record(?SHARD_TAB, { shard :: {emqx_ds:db(), emqx_ds_replication_layer:shard_id()}, + %% Sites that currently contain the data: + replica_set :: [site()], %% Sites that should contain the data when the cluster is in the %% stable state (no nodes are being added or removed from it): - replica_set :: [site()], + target_set :: [site()] | undefined, misc = #{} :: map() }). %% Persistent ID of the node (independent from the IP/FQDN): -type site() :: binary(). +%% Membership transition of shard's replica set: +-type transition() :: {add | del, site()}. + %% Peristent term key: -define(emqx_ds_builtin_site, emqx_ds_builtin_site). @@ -156,17 +175,17 @@ start_link() -> -spec shards(emqx_ds:db()) -> [emqx_ds_replication_layer:shard_id()]. shards(DB) -> - filter_shards(DB). + Recs = mnesia:dirty_match_object(?SHARD_TAB, ?SHARD_PAT({DB, '_'})), + [Shard || #?SHARD_TAB{shard = {_, Shard}} <- Recs]. -spec my_shards(emqx_ds:db()) -> [emqx_ds_replication_layer:shard_id()]. my_shards(DB) -> Site = this_site(), - filter_shards(DB, fun(#?SHARD_TAB{replica_set = ReplicaSet}) -> - lists:member(Site, ReplicaSet) - end). + Recs = mnesia:dirty_match_object(?SHARD_TAB, ?SHARD_PAT({DB, '_'})), + [Shard || #?SHARD_TAB{shard = {_, Shard}, replica_set = RS} <- Recs, lists:member(Site, RS)]. -allocate_shards(DB, Opts) -> - case mria:transaction(?SHARD, fun ?MODULE:allocate_shards_trans/2, [DB, Opts]) of +allocate_shards(DB) -> + case mria:transaction(?SHARD, fun ?MODULE:allocate_shards_trans/1, [DB]) of {atomic, Shards} -> {ok, Shards}; {aborted, {shards_already_allocated, Shards}} -> @@ -175,16 +194,6 @@ allocate_shards(DB, Opts) -> {error, #{reason => insufficient_sites_online, needed => Needed, sites => Sites}} end. --spec replica_set(emqx_ds:db(), emqx_ds_replication_layer:shard_id()) -> - {ok, [site()]} | {error, _}. -replica_set(DB, Shard) -> - case mnesia:dirty_read(?SHARD_TAB, {DB, Shard}) of - [#?SHARD_TAB{replica_set = ReplicaSet}] -> - {ok, ReplicaSet}; - [] -> - {error, no_shard} - end. - -spec sites() -> [site()]. sites() -> eval_qlc(qlc:q([Site || #?NODE_TAB{site = Site} <- mnesia:table(?NODE_TAB)])). @@ -198,8 +207,12 @@ node(Site) -> undefined end. --spec get_options(emqx_ds:db()) -> emqx_ds_replication_layer:builtin_db_opts(). -get_options(DB) -> +%%=============================================================================== +%% DB API +%%=============================================================================== + +-spec db_config(emqx_ds:db()) -> emqx_ds_replication_layer:builtin_db_opts(). +db_config(DB) -> case mnesia:dirty_read(?META_TAB, DB) of [#?META_TAB{db_props = Opts}] -> Opts; @@ -210,21 +223,64 @@ get_options(DB) -> -spec open_db(emqx_ds:db(), emqx_ds_replication_layer:builtin_db_opts()) -> emqx_ds_replication_layer:builtin_db_opts(). open_db(DB, DefaultOpts) -> - {atomic, Opts} = mria:transaction(?SHARD, fun ?MODULE:open_db_trans/2, [DB, DefaultOpts]), - Opts. + transaction(fun ?MODULE:open_db_trans/2, [DB, DefaultOpts]). -spec update_db_config(emqx_ds:db(), emqx_ds_replication_layer:builtin_db_opts()) -> - ok | {error, _}. + emqx_ds_replication_layer:builtin_db_opts() | {error, _}. update_db_config(DB, DefaultOpts) -> - {atomic, Opts} = mria:transaction(?SHARD, fun ?MODULE:update_db_config_trans/2, [ - DB, DefaultOpts - ]), - Opts. + transaction(fun ?MODULE:update_db_config_trans/2, [DB, DefaultOpts]). -spec drop_db(emqx_ds:db()) -> ok. drop_db(DB) -> - _ = mria:transaction(?SHARD, fun ?MODULE:drop_db_trans/1, [DB]), - ok. + transaction(fun ?MODULE:drop_db_trans/1, [DB]). + +-spec assign_db_sites(emqx_ds:db(), [site()]) -> ok. +assign_db_sites(DB, Sites) -> + case mria:transaction(?SHARD, fun ?MODULE:assign_db_sites_trans/2, [DB, Sites]) of + {atomic, ok} -> + ok; + {aborted, Reason} -> + {error, Reason} + end. + +-spec replica_set_transitions(emqx_ds:db(), emqx_ds_replication_layer:shard_id()) -> + [transition()] | undefined. +replica_set_transitions(DB, Shard) -> + case mnesia:dirty_read(?SHARD_TAB, {DB, Shard}) of + [#?SHARD_TAB{target_set = TargetSet, replica_set = ReplicaSet}] -> + compute_transitions(TargetSet, ReplicaSet); + [] -> + undefined + end. + +-spec update_replica_set(emqx_ds:db(), emqx_ds_replication_layer:shard_id(), transition()) -> ok. +update_replica_set(DB, Shard, Trans) -> + case mria:transaction(?SHARD, fun ?MODULE:update_replica_set_trans/3, [DB, Shard, Trans]) of + {atomic, ok} -> + ok; + {aborted, Reason} -> + {error, Reason} + end. + +-spec replica_set(emqx_ds:db(), emqx_ds_replication_layer:shard_id()) -> + [site()] | undefined. +replica_set(DB, Shard) -> + case mnesia:dirty_read(?SHARD_TAB, {DB, Shard}) of + [#?SHARD_TAB{replica_set = ReplicaSet}] -> + ReplicaSet; + [] -> + undefined + end. + +-spec target_set(emqx_ds:db(), emqx_ds_replication_layer:shard_id()) -> + [site()] | undefined. +target_set(DB, Shard) -> + case mnesia:dirty_read(?SHARD_TAB, {DB, Shard}) of + [#?SHARD_TAB{target_set = TargetSet}] -> + TargetSet; + [] -> + undefined + end. %%================================================================================ %% behavior callbacks @@ -268,19 +324,15 @@ open_db_trans(DB, CreateOpts) -> Opts end. --spec allocate_shards_trans(emqx_ds:db(), emqx_ds_replication_layer:builtin_db_opts()) -> [_Shard]. -allocate_shards_trans(DB, Opts) -> - NShards = maps:get(n_shards, Opts), - NSites = maps:get(n_sites, Opts), - ReplicationFactor = maps:get(replication_factor, Opts), - NReplicas = min(NSites, ReplicationFactor), - Shards = [integer_to_binary(I) || I <- lists:seq(0, NShards - 1)], - AllSites = mnesia:match_object(?NODE_TAB, ?NODE_PAT(), read), - case length(AllSites) of +-spec allocate_shards_trans(emqx_ds:db()) -> [emqx_ds_replication_layer:shard_id()]. +allocate_shards_trans(DB) -> + Opts = #{n_shards := NShards, n_sites := NSites} = db_config_trans(DB), + Nodes = mnesia:match_object(?NODE_TAB, ?NODE_PAT(), read), + case length(Nodes) of N when N >= NSites -> ok; _ -> - mnesia:abort({insufficient_sites_online, NSites, AllSites}) + mnesia:abort({insufficient_sites_online, NSites, Nodes}) end, case mnesia:match_object(?SHARD_TAB, ?SHARD_PAT({DB, '_'}), write) of [] -> @@ -289,18 +341,11 @@ allocate_shards_trans(DB, Opts) -> ShardsAllocated = [Shard || #?SHARD_TAB{shard = {_DB, Shard}} <- Records], mnesia:abort({shards_already_allocated, ShardsAllocated}) end, - {Allocation, _} = lists:mapfoldl( - fun(Shard, SSites) -> - {Sites, _} = emqx_utils_stream:consume(NReplicas, SSites), - {_, SRest} = emqx_utils_stream:consume(1, SSites), - {{Shard, Sites}, SRest} - end, - emqx_utils_stream:repeat(emqx_utils_stream:list(AllSites)), - Shards - ), + Shards = gen_shards(NShards), + Sites = [S || #?NODE_TAB{site = S} <- Nodes], + Allocation = compute_allocation(Shards, Sites, Opts), lists:map( - fun({Shard, Sites}) -> - ReplicaSet = [Site || #?NODE_TAB{site = Site} <- Sites], + fun({Shard, ReplicaSet}) -> Record = #?SHARD_TAB{ shard = {DB, Shard}, replica_set = ReplicaSet @@ -311,31 +356,71 @@ allocate_shards_trans(DB, Opts) -> Allocation ). +-spec assign_db_sites_trans(emqx_ds:db(), [site()]) -> ok. +assign_db_sites_trans(DB, Sites) -> + Opts = db_config_trans(DB), + case [S || S <- Sites, mnesia:read(?NODE_TAB, S, read) == []] of + [] -> + ok; + NonexistentSites -> + mnesia:abort({nonexistent_sites, NonexistentSites}) + end, + %% TODO + %% Optimize reallocation. The goals are: + %% 1. Minimize the number of membership transitions. + %% 2. Ensure that sites are responsible for roughly the same number of shards. + Shards = mnesia:match_object(?SHARD_TAB, ?SHARD_PAT({DB, '_'}), write), + Reallocation = compute_allocation(Shards, Sites, Opts), + lists:foreach( + fun({Record, ReplicaSet}) -> + ok = mnesia:write(Record#?SHARD_TAB{target_set = ReplicaSet}) + end, + Reallocation + ). + +update_replica_set_trans(DB, Shard, Trans) -> + case mnesia:read(?SHARD_TAB, {DB, Shard}, write) of + [Record = #?SHARD_TAB{replica_set = ReplicaSet0, target_set = TargetSet0}] -> + ReplicaSet = apply_transition(Trans, ReplicaSet0), + case lists:usort(TargetSet0) of + ReplicaSet -> + TargetSet = undefined; + TS -> + TargetSet = TS + end, + mnesia:write(Record#?SHARD_TAB{replica_set = ReplicaSet, target_set = TargetSet}); + [] -> + mnesia:abort({nonexistent_shard, {DB, Shard}}) + end. + -spec update_db_config_trans(emqx_ds:db(), emqx_ds_replication_layer:builtin_db_opts()) -> - ok | {error, database}. -update_db_config_trans(DB, CreateOpts) -> + ok | {error, _}. +update_db_config_trans(DB, UpdateOpts) -> case mnesia:wread({?META_TAB, DB}) of [#?META_TAB{db_props = Opts}] -> %% Since this is an update and not a reopen, %% we should keep the shard number and replication factor %% and not create a new shard server - #{ - n_shards := NShards, - replication_factor := ReplicationFactor - } = Opts, - + ChangeableOpts = maps:without([n_shards, n_sites, replication_factor], UpdateOpts), + EffectiveOpts = maps:merge(Opts, ChangeableOpts), mnesia:write(#?META_TAB{ db = DB, - db_props = CreateOpts#{ - n_shards := NShards, - replication_factor := ReplicationFactor - } + db_props = EffectiveOpts }), - ok; + EffectiveOpts; [] -> {error, no_database} end. +-spec db_config_trans(emqx_ds:db()) -> emqx_ds_replication_layer:builtin_db_opts(). +db_config_trans(DB) -> + case mnesia:read(?META_TAB, DB, read) of + [#?META_TAB{db_props = Config}] -> + Config; + [] -> + mnesia:abort({nonexistent_db, DB}) + end. + -spec drop_db_trans(emqx_ds:db()) -> ok. drop_db_trans(DB) -> mnesia:delete({?META_TAB, DB}), @@ -391,6 +476,38 @@ ensure_site() -> persistent_term:put(?emqx_ds_builtin_site, Site), ok. +compute_allocation(Shards, Sites, Opts) -> + NSites = length(Sites), + ReplicationFactor = maps:get(replication_factor, Opts), + NReplicas = min(NSites, ReplicationFactor), + ShardsSorted = lists:sort(Shards), + SitesSorted = lists:sort(Sites), + {Allocation, _} = lists:mapfoldl( + fun(Shard, SSites) -> + {ReplicaSet, _} = emqx_utils_stream:consume(NReplicas, SSites), + {_, SRest} = emqx_utils_stream:consume(1, SSites), + {{Shard, ReplicaSet}, SRest} + end, + emqx_utils_stream:repeat(emqx_utils_stream:list(SitesSorted)), + ShardsSorted + ), + Allocation. + +compute_transitions(undefined, _ReplicaSet) -> + []; +compute_transitions(TargetSet, ReplicaSet) -> + Additions = TargetSet -- ReplicaSet, + Deletions = ReplicaSet -- TargetSet, + intersperse([{add, S} || S <- Additions], [{del, S} || S <- Deletions]). + +apply_transition({add, S}, Sites) -> + lists:usort([S | Sites]); +apply_transition({del, S}, Sites) -> + lists:delete(S, Sites). + +gen_shards(NShards) -> + [integer_to_binary(I) || I <- lists:seq(0, NShards - 1)]. + eval_qlc(Q) -> case mnesia:is_transaction() of true -> @@ -400,29 +517,16 @@ eval_qlc(Q) -> Result end. -filter_shards(DB) -> - filter_shards(DB, const(true)). +transaction(Fun, Args) -> + {atomic, Result} = mria:transaction(?SHARD, Fun, Args), + Result. --spec filter_shards(emqx_ds:db(), fun((_) -> boolean())) -> - [emqx_ds_replication_layer:shard_id()]. -filter_shards(DB, Predicte) -> - filter_shards(DB, Predicte, fun(#?SHARD_TAB{shard = {_, ShardId}}) -> - ShardId - end). - -filter_shards(DB, Predicate, Mapper) -> - eval_qlc( - qlc:q([ - Mapper(Shard) - || #?SHARD_TAB{shard = {D, _}} = Shard <- mnesia:table( - ?SHARD_TAB - ), - D =:= DB, - Predicate(Shard) - ]) - ). - -const(Result) -> - fun(_) -> - Result - end. +%% @doc Intersperse elements of two lists. +%% Example: intersperse([1, 2], [3, 4, 5]) -> [1, 3, 2, 4, 5]. +-spec intersperse([X], [Y]) -> [X | Y]. +intersperse(L1, []) -> + L1; +intersperse([], L2) -> + L2; +intersperse([H1 | T1], L2) -> + [H1 | intersperse(L2, T1)]. diff --git a/apps/emqx_durable_storage/src/emqx_ds_replication_layer_shard.erl b/apps/emqx_durable_storage/src/emqx_ds_replication_layer_shard.erl index 62a6edab2..45739fbe3 100644 --- a/apps/emqx_durable_storage/src/emqx_ds_replication_layer_shard.erl +++ b/apps/emqx_durable_storage/src/emqx_ds_replication_layer_shard.erl @@ -44,7 +44,7 @@ start_link(DB, Shard, Opts) -> gen_server:start_link(?MODULE, {DB, Shard, Opts}, []). shard_servers(DB, Shard) -> - {ok, ReplicaSet} = emqx_ds_replication_layer_meta:replica_set(DB, Shard), + ReplicaSet = emqx_ds_replication_layer_meta:replica_set(DB, Shard), [ {server_name(DB, Shard, Site), emqx_ds_replication_layer_meta:node(Site)} || Site <- ReplicaSet diff --git a/apps/emqx_durable_storage/src/emqx_ds_replication_shard_allocator.erl b/apps/emqx_durable_storage/src/emqx_ds_replication_shard_allocator.erl index 6da33f09f..7393da692 100644 --- a/apps/emqx_durable_storage/src/emqx_ds_replication_shard_allocator.erl +++ b/apps/emqx_durable_storage/src/emqx_ds_replication_shard_allocator.erl @@ -16,7 +16,7 @@ -module(emqx_ds_replication_shard_allocator). --export([start_link/2]). +-export([start_link/1]). -export([n_shards/1]). -export([shard_meta/2]). @@ -35,8 +35,8 @@ %% -start_link(DB, Opts) -> - gen_server:start_link(?MODULE, {DB, Opts}, []). +start_link(DB) -> + gen_server:start_link(?MODULE, DB, []). n_shards(DB) -> Meta = persistent_term:get(?db_meta(DB)), @@ -49,22 +49,11 @@ shard_meta(DB, Shard) -> -define(ALLOCATE_RETRY_TIMEOUT, 1_000). -init({DB, Opts}) -> +init(DB) -> _ = erlang:process_flag(trap_exit, true), _ = logger:set_process_metadata(#{db => DB, domain => [ds, db, shard_allocator]}), - State = #{db => DB, opts => Opts, status => allocating}, - case allocate_shards(State) of - {ok, NState} -> - {ok, NState}; - {error, Data} -> - _ = logger:notice( - Data#{ - msg => "Shard allocation still in progress", - retry_in => ?ALLOCATE_RETRY_TIMEOUT - } - ), - {ok, State, ?ALLOCATE_RETRY_TIMEOUT} - end. + State = #{db => DB, status => allocating}, + handle_allocate_shards(State, ok). handle_call(_Call, _From, State) -> {reply, ignored, State}. @@ -73,18 +62,7 @@ handle_cast(_Cast, State) -> {noreply, State}. handle_info(timeout, State) -> - case allocate_shards(State) of - {ok, NState} -> - {noreply, NState}; - {error, Data} -> - _ = logger:notice( - Data#{ - msg => "Shard allocation still in progress", - retry_in => ?ALLOCATE_RETRY_TIMEOUT - } - ), - {noreply, State, ?ALLOCATE_RETRY_TIMEOUT} - end; + handle_allocate_shards(State, noreply); handle_info(_Info, State) -> {noreply, State}. @@ -96,8 +74,24 @@ terminate(_Reason, #{}) -> %% -allocate_shards(State = #{db := DB, opts := Opts}) -> - case emqx_ds_replication_layer_meta:allocate_shards(DB, Opts) of +handle_allocate_shards(State, Ret) -> + case allocate_shards(State) of + {ok, NState} -> + {Ret, NState}; + {error, Data} -> + _ = logger:notice( + Data#{ + msg => "Shard allocation still in progress", + retry_in => ?ALLOCATE_RETRY_TIMEOUT + } + ), + {Ret, State, ?ALLOCATE_RETRY_TIMEOUT} + end. + +%% + +allocate_shards(State = #{db := DB}) -> + case emqx_ds_replication_layer_meta:allocate_shards(DB) of {ok, Shards} -> logger:notice(#{msg => "Shards allocated", shards => Shards}), ok = start_shards(DB, emqx_ds_replication_layer_meta:my_shards(DB)), diff --git a/apps/emqx_durable_storage/test/emqx_ds_SUITE.erl b/apps/emqx_durable_storage/test/emqx_ds_SUITE.erl index 3df16dc1c..18053ee7d 100644 --- a/apps/emqx_durable_storage/test/emqx_ds_SUITE.erl +++ b/apps/emqx_durable_storage/test/emqx_ds_SUITE.erl @@ -53,7 +53,7 @@ t_00_smoke_open_drop(_Config) -> lists:foreach( fun(Shard) -> ?assertEqual( - {ok, [Site]}, emqx_ds_replication_layer_meta:replica_set(DB, Shard) + [Site], emqx_ds_replication_layer_meta:replica_set(DB, Shard) ) end, Shards From bb8ffee18c65e08f33f588570619da75b9f558db Mon Sep 17 00:00:00 2001 From: Andrew Mayorov Date: Tue, 2 Apr 2024 17:13:47 +0200 Subject: [PATCH 064/234] feat(dsrepl): add API to get current DB replication sites --- .../src/emqx_ds_replication_layer_meta.erl | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/apps/emqx_durable_storage/src/emqx_ds_replication_layer_meta.erl b/apps/emqx_durable_storage/src/emqx_ds_replication_layer_meta.erl index 723675699..5e0a2798b 100644 --- a/apps/emqx_durable_storage/src/emqx_ds_replication_layer_meta.erl +++ b/apps/emqx_durable_storage/src/emqx_ds_replication_layer_meta.erl @@ -49,6 +49,7 @@ assign_db_sites/2, replica_set_transitions/2, update_replica_set/3, + db_sites/1, replica_set/2, target_set/2 ]). @@ -243,6 +244,17 @@ assign_db_sites(DB, Sites) -> {error, Reason} end. +-spec db_sites(emqx_ds:db()) -> [site()]. +db_sites(DB) -> + Recs = mnesia:dirty_match_object(?SHARD_TAB, ?SHARD_PAT({DB, '_'})), + lists:foldl( + fun(#?SHARD_TAB{replica_set = RS}, Acc) -> + ordsets:union(ordsets:from_list(RS), Acc) + end, + ordsets:new(), + Recs + ). + -spec replica_set_transitions(emqx_ds:db(), emqx_ds_replication_layer:shard_id()) -> [transition()] | undefined. replica_set_transitions(DB, Shard) -> From df6c5b35fead5d9f778efc3b510aa6ac423fd019 Mon Sep 17 00:00:00 2001 From: Andrew Mayorov Date: Wed, 3 Apr 2024 15:02:32 +0200 Subject: [PATCH 065/234] feat(dsrepl): add more primitive operations to modify DB sites --- .../src/emqx_ds_replication_layer_meta.erl | 138 +++++++++++++----- 1 file changed, 103 insertions(+), 35 deletions(-) diff --git a/apps/emqx_durable_storage/src/emqx_ds_replication_layer_meta.erl b/apps/emqx_durable_storage/src/emqx_ds_replication_layer_meta.erl index 5e0a2798b..b81b21d61 100644 --- a/apps/emqx_durable_storage/src/emqx_ds_replication_layer_meta.erl +++ b/apps/emqx_durable_storage/src/emqx_ds_replication_layer_meta.erl @@ -46,6 +46,8 @@ %% Site / shard allocation: -export([ + join_db_site/2, + leave_db_site/2, assign_db_sites/2, replica_set_transitions/2, update_replica_set/3, @@ -62,6 +64,7 @@ open_db_trans/2, allocate_shards_trans/1, assign_db_sites_trans/2, + modify_db_sites_trans/2, update_replica_set_trans/3, update_db_config_trans/2, drop_db_trans/1, @@ -227,7 +230,7 @@ open_db(DB, DefaultOpts) -> transaction(fun ?MODULE:open_db_trans/2, [DB, DefaultOpts]). -spec update_db_config(emqx_ds:db(), emqx_ds_replication_layer:builtin_db_opts()) -> - emqx_ds_replication_layer:builtin_db_opts() | {error, _}. + emqx_ds_replication_layer:builtin_db_opts() | {error, nonexistent_db}. update_db_config(DB, DefaultOpts) -> transaction(fun ?MODULE:update_db_config_trans/2, [DB, DefaultOpts]). @@ -235,26 +238,36 @@ update_db_config(DB, DefaultOpts) -> drop_db(DB) -> transaction(fun ?MODULE:drop_db_trans/1, [DB]). --spec assign_db_sites(emqx_ds:db(), [site()]) -> ok. -assign_db_sites(DB, Sites) -> - case mria:transaction(?SHARD, fun ?MODULE:assign_db_sites_trans/2, [DB, Sites]) of - {atomic, ok} -> - ok; - {aborted, Reason} -> - {error, Reason} - end. +%%=============================================================================== +%% Site / shard allocation API +%%=============================================================================== +%% @doc Join a site to the set of sites the DB is replicated across. +-spec join_db_site(emqx_ds:db(), site()) -> + ok | {error, nonexistent_db | nonexistent_sites}. +join_db_site(DB, Site) -> + transaction(fun ?MODULE:modify_db_sites_trans/2, [DB, [{add, Site}]]). + +%% @doc Make a site leave the set of sites the DB is replicated across. +-spec leave_db_site(emqx_ds:db(), site()) -> + ok | {error, nonexistent_db | nonexistent_sites}. +leave_db_site(DB, Site) -> + transaction(fun ?MODULE:modify_db_sites_trans/2, [DB, [{del, Site}]]). + +%% @doc Assign a set of sites to the DB for replication. +-spec assign_db_sites(emqx_ds:db(), [site()]) -> + ok | {error, nonexistent_db | nonexistent_sites}. +assign_db_sites(DB, Sites) -> + transaction(fun ?MODULE:assign_db_sites_trans/2, [DB, Sites]). + +%% @doc List the sites the DB is replicated across. -spec db_sites(emqx_ds:db()) -> [site()]. db_sites(DB) -> Recs = mnesia:dirty_match_object(?SHARD_TAB, ?SHARD_PAT({DB, '_'})), - lists:foldl( - fun(#?SHARD_TAB{replica_set = RS}, Acc) -> - ordsets:union(ordsets:from_list(RS), Acc) - end, - ordsets:new(), - Recs - ). + list_db_sites(Recs). +%% @doc List the sequence of transitions that should be conducted in order to +%% bring the set of replicas for a DB shard in line with the target set. -spec replica_set_transitions(emqx_ds:db(), emqx_ds_replication_layer:shard_id()) -> [transition()] | undefined. replica_set_transitions(DB, Shard) -> @@ -265,6 +278,8 @@ replica_set_transitions(DB, Shard) -> undefined end. +%% @doc Update the set of replication sites for a shard. +%% To be called after a `transition()` has been conducted successfully. -spec update_replica_set(emqx_ds:db(), emqx_ds_replication_layer:shard_id(), transition()) -> ok. update_replica_set(DB, Shard, Trans) -> case mria:transaction(?SHARD, fun ?MODULE:update_replica_set_trans/3, [DB, Shard, Trans]) of @@ -274,6 +289,7 @@ update_replica_set(DB, Shard, Trans) -> {error, Reason} end. +%% @doc Get the current set of replication sites for a shard. -spec replica_set(emqx_ds:db(), emqx_ds_replication_layer:shard_id()) -> [site()] | undefined. replica_set(DB, Shard) -> @@ -284,6 +300,9 @@ replica_set(DB, Shard) -> undefined end. +%% @doc Get the target set of replication sites for a DB shard. +%% Target set is updated every time the set of replication sites for the DB changes. +%% See `join_db_site/2`, `leave_db_site/2`, `assign_db_sites/2`. -spec target_set(emqx_ds:db(), emqx_ds_replication_layer:shard_id()) -> [site()] | undefined. target_set(DB, Shard) -> @@ -390,6 +409,18 @@ assign_db_sites_trans(DB, Sites) -> Reallocation ). +-spec modify_db_sites_trans(emqx_ds:db(), [transition()]) -> ok. +modify_db_sites_trans(DB, Modifications) -> + Shards = mnesia:match_object(?SHARD_TAB, ?SHARD_PAT({DB, '_'}), write), + Sites0 = list_db_target_sites(Shards), + Sites = lists:foldl(fun apply_transition/2, Sites0, Modifications), + case Sites of + Sites0 -> + ok; + _Chagned -> + assign_db_sites_trans(DB, Sites) + end. + update_replica_set_trans(DB, Shard, Trans) -> case mnesia:read(?SHARD_TAB, {DB, Shard}, write) of [Record = #?SHARD_TAB{replica_set = ReplicaSet0, target_set = TargetSet0}] -> @@ -406,27 +437,26 @@ update_replica_set_trans(DB, Shard, Trans) -> end. -spec update_db_config_trans(emqx_ds:db(), emqx_ds_replication_layer:builtin_db_opts()) -> - ok | {error, _}. + emqx_ds_replication_layer:builtin_db_opts(). update_db_config_trans(DB, UpdateOpts) -> - case mnesia:wread({?META_TAB, DB}) of - [#?META_TAB{db_props = Opts}] -> - %% Since this is an update and not a reopen, - %% we should keep the shard number and replication factor - %% and not create a new shard server - ChangeableOpts = maps:without([n_shards, n_sites, replication_factor], UpdateOpts), - EffectiveOpts = maps:merge(Opts, ChangeableOpts), - mnesia:write(#?META_TAB{ - db = DB, - db_props = EffectiveOpts - }), - EffectiveOpts; - [] -> - {error, no_database} - end. + Opts = db_config_trans(DB, write), + %% Since this is an update and not a reopen, + %% we should keep the shard number and replication factor + %% and not create a new shard server + ChangeableOpts = maps:without([n_shards, n_sites, replication_factor], UpdateOpts), + EffectiveOpts = maps:merge(Opts, ChangeableOpts), + ok = mnesia:write(#?META_TAB{ + db = DB, + db_props = EffectiveOpts + }), + EffectiveOpts. -spec db_config_trans(emqx_ds:db()) -> emqx_ds_replication_layer:builtin_db_opts(). db_config_trans(DB) -> - case mnesia:read(?META_TAB, DB, read) of + db_config_trans(DB, read). + +db_config_trans(DB, LockType) -> + case mnesia:read(?META_TAB, DB, LockType) of [#?META_TAB{db_props = Config}] -> Config; [] -> @@ -488,6 +518,27 @@ ensure_site() -> persistent_term:put(?emqx_ds_builtin_site, Site), ok. +%% @doc Returns sorted list of sites shards are replicated across. +-spec list_db_sites([_Shard]) -> [site()]. +list_db_sites(Shards) -> + flatmap_sorted_set(fun get_shard_sites/1, Shards). + +-spec list_db_target_sites([_Shard]) -> [site()]. +list_db_target_sites(Shards) -> + flatmap_sorted_set(fun get_shard_target_sites/1, Shards). + +-spec get_shard_sites(_Shard) -> [site()]. +get_shard_sites(#?SHARD_TAB{replica_set = ReplicaSet}) -> + ReplicaSet. + +-spec get_shard_target_sites(_Shard) -> [site()]. +get_shard_target_sites(#?SHARD_TAB{target_set = Sites}) when is_list(Sites) -> + Sites; +get_shard_target_sites(#?SHARD_TAB{target_set = undefined} = Shard) -> + get_shard_sites(Shard). + +-spec compute_allocation([Shard], [Site], emqx_ds_replication_layer:builtin_db_opts()) -> + [{Shard, [Site, ...]}]. compute_allocation(Shards, Sites, Opts) -> NSites = length(Sites), ReplicationFactor = maps:get(replication_factor, Opts), @@ -512,6 +563,8 @@ compute_transitions(TargetSet, ReplicaSet) -> Deletions = ReplicaSet -- TargetSet, intersperse([{add, S} || S <- Additions], [{del, S} || S <- Deletions]). +%% @doc Apply a transition to a list of sites, preserving sort order. +-spec apply_transition(transition(), [site()]) -> [site()]. apply_transition({add, S}, Sites) -> lists:usort([S | Sites]); apply_transition({del, S}, Sites) -> @@ -530,8 +583,12 @@ eval_qlc(Q) -> end. transaction(Fun, Args) -> - {atomic, Result} = mria:transaction(?SHARD, Fun, Args), - Result. + case mria:transaction(?SHARD, Fun, Args) of + {atomic, Result} -> + Result; + {aborted, Reason} -> + {error, Reason} + end. %% @doc Intersperse elements of two lists. %% Example: intersperse([1, 2], [3, 4, 5]) -> [1, 3, 2, 4, 5]. @@ -542,3 +599,14 @@ intersperse([], L2) -> L2; intersperse([H1 | T1], L2) -> [H1 | intersperse(L2, T1)]. + +%% @doc Map list into a list of sets and return union, as a sorted list. +-spec flatmap_sorted_set(fun((X) -> [Y]), [X]) -> [Y]. +flatmap_sorted_set(Fun, L) -> + ordsets:to_list( + lists:foldl( + fun(X, Acc) -> ordsets:union(ordsets:from_list(Fun(X)), Acc) end, + ordsets:new(), + L + ) + ). From ca0eecb9d0e7824b28a1742710fa2421b50ce1c3 Mon Sep 17 00:00:00 2001 From: Ivan Dyachkov Date: Fri, 5 Apr 2024 11:09:02 +0200 Subject: [PATCH 066/234] ci: stop running codeql on release-55 and enable it for release-57 --- .github/workflows/codeql.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/codeql.yaml b/.github/workflows/codeql.yaml index 1b32e1174..7b9c14d5f 100644 --- a/.github/workflows/codeql.yaml +++ b/.github/workflows/codeql.yaml @@ -24,8 +24,8 @@ jobs: matrix: branch: - master - - release-55 - release-56 + - release-57 language: - cpp - python From be47fe49ad75d304aa32b5ed77fc799c64460b32 Mon Sep 17 00:00:00 2001 From: Ivan Dyachkov Date: Fri, 5 Apr 2024 13:31:33 +0200 Subject: [PATCH 067/234] chore: bump ecql version to 0.7.0 PR: https://github.com/emqx/ecql/pull/13 No functional changes, just switch gen_fsm to gen_statem. --- apps/emqx_bridge_cassandra/rebar.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/apps/emqx_bridge_cassandra/rebar.config b/apps/emqx_bridge_cassandra/rebar.config index e98146d78..13f95139c 100644 --- a/apps/emqx_bridge_cassandra/rebar.config +++ b/apps/emqx_bridge_cassandra/rebar.config @@ -2,7 +2,7 @@ {erl_opts, [debug_info]}. {deps, [ - {ecql, {git, "https://github.com/emqx/ecql.git", {tag, "v0.6.1"}}}, + {ecql, {git, "https://github.com/emqx/ecql.git", {tag, "v0.7.0"}}}, {emqx_connector, {path, "../../apps/emqx_connector"}}, {emqx_resource, {path, "../../apps/emqx_resource"}}, {emqx_bridge, {path, "../../apps/emqx_bridge"}} From d09787d1a63c4d85b979cec90b5e16b3029788e7 Mon Sep 17 00:00:00 2001 From: ieQu1 <99872536+ieQu1@users.noreply.github.com> Date: Fri, 5 Apr 2024 13:33:12 +0200 Subject: [PATCH 068/234] fix(ds): Fix return types in replication_layer_meta --- .../src/emqx_ds_replication_layer_meta.erl | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/apps/emqx_durable_storage/src/emqx_ds_replication_layer_meta.erl b/apps/emqx_durable_storage/src/emqx_ds_replication_layer_meta.erl index b81b21d61..2fdd1c39d 100644 --- a/apps/emqx_durable_storage/src/emqx_ds_replication_layer_meta.erl +++ b/apps/emqx_durable_storage/src/emqx_ds_replication_layer_meta.erl @@ -72,7 +72,7 @@ n_shards/1 ]). --export_type([site/0]). +-export_type([site/0, update_cluster_result/0]). -include_lib("stdlib/include/qlc.hrl"). -include_lib("stdlib/include/ms_transform.hrl"). @@ -118,6 +118,12 @@ %% Membership transition of shard's replica set: -type transition() :: {add | del, site()}. +-type update_cluster_result() :: + ok + | {error, {nonexistent_db, emqx_ds:db()}} + | {error, {nonexistent_sites, [site()]}} + | {error, _}. + %% Peristent term key: -define(emqx_ds_builtin_site, emqx_ds_builtin_site). @@ -243,20 +249,17 @@ drop_db(DB) -> %%=============================================================================== %% @doc Join a site to the set of sites the DB is replicated across. --spec join_db_site(emqx_ds:db(), site()) -> - ok | {error, nonexistent_db | nonexistent_sites}. +-spec join_db_site(emqx_ds:db(), site()) -> update_cluster_result(). join_db_site(DB, Site) -> transaction(fun ?MODULE:modify_db_sites_trans/2, [DB, [{add, Site}]]). %% @doc Make a site leave the set of sites the DB is replicated across. --spec leave_db_site(emqx_ds:db(), site()) -> - ok | {error, nonexistent_db | nonexistent_sites}. +-spec leave_db_site(emqx_ds:db(), site()) -> update_cluster_result(). leave_db_site(DB, Site) -> transaction(fun ?MODULE:modify_db_sites_trans/2, [DB, [{del, Site}]]). %% @doc Assign a set of sites to the DB for replication. --spec assign_db_sites(emqx_ds:db(), [site()]) -> - ok | {error, nonexistent_db | nonexistent_sites}. +-spec assign_db_sites(emqx_ds:db(), [site()]) -> update_cluster_result(). assign_db_sites(DB, Sites) -> transaction(fun ?MODULE:assign_db_sites_trans/2, [DB, Sites]). From a62db08676a17a52688a9968a37c52cb0c595f95 Mon Sep 17 00:00:00 2001 From: ieQu1 <99872536+ieQu1@users.noreply.github.com> Date: Thu, 4 Apr 2024 12:17:14 +0200 Subject: [PATCH 069/234] feat(ds): Add REST API for durable storage --- .../src/emqx_ds_replication_layer_meta.erl | 22 +- apps/emqx_management/src/emqx_mgmt_api_ds.erl | 490 ++++++++++++++++++ .../test/emqx_mgmt_api_ds_SUITE.erl | 180 +++++++ 3 files changed, 691 insertions(+), 1 deletion(-) create mode 100644 apps/emqx_management/src/emqx_mgmt_api_ds.erl create mode 100644 apps/emqx_management/test/emqx_mgmt_api_ds_SUITE.erl diff --git a/apps/emqx_durable_storage/src/emqx_ds_replication_layer_meta.erl b/apps/emqx_durable_storage/src/emqx_ds_replication_layer_meta.erl index 2fdd1c39d..66029d4ca 100644 --- a/apps/emqx_durable_storage/src/emqx_ds_replication_layer_meta.erl +++ b/apps/emqx_durable_storage/src/emqx_ds_replication_layer_meta.erl @@ -29,7 +29,9 @@ -export([ shards/1, my_shards/1, + shard_info/2, allocate_shards/1, + replica_set/2, sites/0, node/1, this_site/0, @@ -52,7 +54,6 @@ replica_set_transitions/2, update_replica_set/3, db_sites/1, - replica_set/2, target_set/2 ]). @@ -188,6 +189,25 @@ shards(DB) -> Recs = mnesia:dirty_match_object(?SHARD_TAB, ?SHARD_PAT({DB, '_'})), [Shard || #?SHARD_TAB{shard = {_, Shard}} <- Recs]. +-spec shard_info(emqx_ds:db(), emqx_ds_replication_layer:shard_id()) -> + #{replica_set := #{site() => #{status => up | joining}}} + | undefined. +shard_info(DB, Shard) -> + case mnesia:dirty_read(?SHARD_TAB, {DB, Shard}) of + [] -> + undefined; + [#?SHARD_TAB{replica_set = Replicas}] -> + ReplicaSet = maps:from_list([ + begin + %% TODO: + ReplInfo = #{status => up}, + {I, ReplInfo} + end + || I <- Replicas + ]), + #{replica_set => ReplicaSet} + end. + -spec my_shards(emqx_ds:db()) -> [emqx_ds_replication_layer:shard_id()]. my_shards(DB) -> Site = this_site(), diff --git a/apps/emqx_management/src/emqx_mgmt_api_ds.erl b/apps/emqx_management/src/emqx_mgmt_api_ds.erl new file mode 100644 index 000000000..acd6cf462 --- /dev/null +++ b/apps/emqx_management/src/emqx_mgmt_api_ds.erl @@ -0,0 +1,490 @@ +%%-------------------------------------------------------------------- +%% Copyright (c) 2024 EMQ Technologies Co., Ltd. All Rights Reserved. +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. +%%-------------------------------------------------------------------- +-module(emqx_mgmt_api_ds). + +-behaviour(minirest_api). + +-include_lib("emqx/include/logger.hrl"). +-include_lib("typerefl/include/types.hrl"). +-include_lib("hocon/include/hoconsc.hrl"). +-include_lib("emqx_utils/include/emqx_utils_api.hrl"). + +-import(hoconsc, [mk/2, ref/1, enum/1, array/1]). + +%% API: +-export([ + list_sites/2, + get_site/2, + list_dbs/2, + get_db/2, + db_replicas/2, + db_replica/2, + + update_db_sites/3, + join/3, + leave/3 +]). + +%% behavior callbacks: +-export([ + namespace/0, + api_spec/0, + schema/1, + paths/0, + fields/1 +]). + +%% internal exports: +-export([]). + +-export_type([]). + +%%================================================================================ +%% Type declarations +%%================================================================================ + +-define(TAGS, [<<"Durable storage">>]). + +%%================================================================================ +%% behavior callbacks +%%================================================================================ + +namespace() -> + undefined. + +api_spec() -> + emqx_dashboard_swagger:spec(?MODULE, #{check_schema => true}). + +paths() -> + [ + "/ds/sites", + "/ds/sites/:site", + "/ds/storages", + "/ds/storages/:ds", + "/ds/storages/:ds/replicas", + "/ds/storages/:ds/replicas/:site" + ]. + +schema("/ds/sites") -> + #{ + 'operationId' => list_sites, + get => + #{ + description => <<"List sites">>, + tags => ?TAGS, + responses => + #{ + 200 => mk(array(binary()), #{desc => <<"List sites">>}) + } + } + }; +schema("/ds/sites/:site") -> + #{ + 'operationId' => get_site, + get => + #{ + description => <<"Get sites">>, + parameters => [param_site_id()], + tags => ?TAGS, + responses => + #{ + 200 => mk(ref(site), #{desc => <<"Get information about the site">>}), + 404 => not_found(<<"Site">>) + } + } + }; +schema("/ds/storages") -> + #{ + 'operationId' => list_dbs, + get => + #{ + description => <<"List durable storages">>, + tags => ?TAGS, + responses => + #{ + 200 => mk(array(atom()), #{desc => <<"List durable storages">>}) + } + } + }; +schema("/ds/storages/:ds") -> + #{ + 'operationId' => get_db, + get => + #{ + description => <<"Get durable storage">>, + tags => ?TAGS, + parameters => [param_storage_id()], + responses => + #{ + 200 => mk(ref(db), #{desc => <<"Get information about a durable storage">>}), + 400 => not_found(<<"Durable storage">>) + } + } + }; +schema("/ds/storages/:ds/replicas") -> + Parameters = [param_storage_id()], + #{ + 'operationId' => db_replicas, + get => + #{ + description => <<"List replicas of the durable storage">>, + tags => ?TAGS, + parameters => Parameters, + responses => + #{ + 200 => mk(array(binary()), #{ + desc => <<"List sites that contain replicas of the durable storage">> + }), + 400 => not_found(<<"Durable storage">>) + } + }, + put => + #{ + description => <<"Update replicas of the durable storage">>, + tags => ?TAGS, + parameters => Parameters, + responses => + #{ + 202 => mk(array(binary()), #{}), + 400 => bad_request() + }, + 'requestBody' => mk(array(binary()), #{desc => <<"New list of sites">>}) + } + }; +schema("/ds/storages/:ds/replicas/:site") -> + Parameters = [param_storage_id(), param_site_id()], + #{ + 'operationId' => db_replica, + put => + #{ + description => <<"Add site as a replica for the durable storage">>, + tags => ?TAGS, + parameters => Parameters, + responses => + #{ + 202 => <<"OK">>, + 400 => bad_request(), + 404 => not_found(<<"Object">>) + } + }, + delete => + #{ + description => <<"Remove site as a replica for the durable storage">>, + tags => ?TAGS, + parameters => Parameters, + responses => + #{ + 202 => <<"OK">>, + 400 => bad_request(), + 404 => not_found(<<"Object">>) + } + } + }. + +fields(site) -> + [ + {node, + mk( + atom(), + #{ + desc => <<"Name of the EMQX handling the site">>, + example => <<"'emqx@example.com'">> + } + )}, + {up, + mk( + boolean(), + #{desc => <<"Site is up and running">>} + )}, + {shards, + mk( + array(ref(sites_shard)), + #{desc => <<"Durable storages that have replicas at the site">>} + )} + ]; +fields(sites_shard) -> + [ + {storage, + mk( + atom(), + #{ + desc => <<"Durable storage ID">>, + example => 'emqx_persistent_message' + } + )}, + {id, + mk( + binary(), + #{ + desc => <<"Shard ID">>, + example => <<"1">> + } + )}, + {status, + mk( + atom(), + #{ + desc => <<"Shard status">>, + example => up + } + )} + ]; +fields(db) -> + [ + {name, + mk( + atom(), + #{ + desc => <<"Name of the durable storage">>, + example => 'emqx_persistent_message' + } + )}, + {shards, + mk( + array(ref(db_shard)), + #{desc => <<"List of storage shards">>} + )} + ]; +fields(db_shard) -> + [ + {id, + mk( + binary(), + #{ + desc => <<"Shard ID">>, + example => <<"1">> + } + )}, + {replicas, + mk( + hoconsc:array(ref(db_site)), + #{desc => <<"List of sites containing replicas of the storage">>} + )} + ]; +fields(db_site) -> + [ + {site, + mk( + binary(), + #{ + desc => <<"Site ID">>, + example => example_site() + } + )}, + {status, + mk( + enum([up, joining]), + #{desc => <<"Status of the replica">>} + )} + ]. + +%%================================================================================ +%% Internal exports +%%================================================================================ + +list_sites(get, _Params) -> + {200, emqx_ds_replication_layer_meta:sites()}. + +get_site(get, #{bindings := #{site := Site}}) -> + case lists:member(Site, emqx_ds_replication_layer_meta:sites()) of + false -> + ?NOT_FOUND(<<"Site not found: ", Site/binary>>); + true -> + Node = emqx_ds_replication_layer_meta:node(Site), + IsUp = lists:member(Node, [node() | nodes()]), + Shards = shards_of_site(Site), + ?OK(#{ + node => Node, + up => IsUp, + shards => Shards + }) + end. + +list_dbs(get, _Params) -> + ?OK(dbs()). + +get_db(get, #{bindings := #{ds := DB}}) -> + ?OK(#{ + name => DB, + shards => list_shards(DB) + }). + +db_replicas(get, #{bindings := #{ds := DB}}) -> + Replicas = lists:flatmap( + fun(Shard) -> + #{replica_set := RS} = emqx_ds_replication_layer_meta:shard_info(DB, Shard), + maps:keys(RS) + end, + emqx_ds_replication_layer_meta:shards(DB) + ), + ?OK(lists:usort(Replicas)); +db_replicas(put, #{bindings := #{ds := DB}, body := Sites}) -> + case update_db_sites(DB, Sites, rest) of + ok -> + {202, <<"OK">>}; + {error, Description} -> + ?BAD_REQUEST(400, Description) + end. + +db_replica(put, #{bindings := #{ds := DB, site := Site}}) -> + case join(DB, Site, rest) of + ok -> + {202, <<"OK">>}; + {error, Description} -> + ?BAD_REQUEST(400, Description) + end; +db_replica(delete, #{bindings := #{ds := DB, site := Site}}) -> + case leave(DB, Site, rest) of + ok -> + {202, <<"OK">>} + %% {error, Description} -> + %% ?BAD_REQUEST(400, Description) + end. + +-spec update_db_sites(emqx_ds:db(), [emqx_ds_replication_layer_meta:site()], rest | cli) -> + ok | {error, binary()}. +update_db_sites(DB, Sites, Via) when is_list(Sites) -> + UnknownSites = Sites -- emqx_ds_replication_layer_meta:sites(), + case {UnknownSites, Sites} of + {[], [_ | _]} -> + ?SLOG(warning, #{ + msg => "durable_storage_rebalance_request", ds => DB, sites => Sites, via => Via + }), + %% TODO: Do stuff + ok; + {_, []} -> + {error, <<"Empty replica list">>}; + {UnknownSites, _} -> + Message = io_lib:format( + "Unknown sites: ~p", + [lists:map(fun binary_to_list/1, UnknownSites)] + ), + {error, iolist_to_binary(Message)} + end; +update_db_sites(_, _, _) -> + {error, <<"Bad type">>}. + +-spec join(emqx_ds:db(), emqx_ds_replication_layer_meta:site(), rest | cli) -> ok | {error, _}. +join(DB, Site, Via) -> + case lists:member(Site, emqx_ds_replication_layer_meta:sites()) of + true -> + ?SLOG(warning, #{ + msg => "durable_storage_join_request", ds => DB, site => Site, via => Via + }), + %% TODO: Do stuff + ok; + false -> + Message = io_lib:format("Unknown site: ~s", [Site]), + {error, iolist_to_binary(Message)} + end. + +-spec leave(emqx_ds:db(), emqx_ds_replication_layer_meta:site(), rest | cli) -> ok | {error, _}. +leave(DB, Site, Via) -> + %% TODO: Do stuff + ?SLOG(warning, #{ + msg => "durable_storage_leave_request", ds => DB, site => Site, via => Via + }), + ok. + +%%================================================================================ +%% Internal functions +%%================================================================================ + +%% site_info(Site) -> +%% #{}. + +not_found(What) -> + emqx_dashboard_swagger:error_codes(['NOT_FOUND'], <>). + +bad_request() -> + emqx_dashboard_swagger:error_codes(['BAD_REQUEST'], <<"Bad request">>). + +param_site_id() -> + Info = #{ + required => true, + in => path, + desc => <<"Site ID">>, + example => example_site() + }, + {site, mk(binary(), Info)}. + +param_storage_id() -> + Info = #{ + required => true, + in => path, + desc => <<"Durable storage ID">>, + example => emqx_persistent_message + }, + {ds, mk(enum(dbs()), Info)}. + +example_site() -> + try + emqx_ds_replication_layer_meta:this_site() + catch + _:_ -> + <<"AFA18CB1C22F0157">> + end. + +dbs() -> + [emqx_persistent_message]. + +shards_of_site(Site) -> + lists:flatmap( + fun({DB, Shard}) -> + case emqx_ds_replication_layer_meta:shard_info(DB, Shard) of + #{replica_set := #{Site := Info}} -> + [ + #{ + storage => DB, + id => Shard, + status => maps:get(status, Info) + } + ]; + _ -> + [] + end + end, + [ + {DB, Shard} + || DB <- dbs(), + Shard <- emqx_ds_replication_layer_meta:shards(DB) + ] + ). + +list_shards(DB) -> + [ + begin + #{replica_set := RS} = emqx_ds_replication_layer_meta:shard_info(DB, Shard), + Replicas = maps:fold( + fun(Site, #{status := Status}, Acc) -> + [ + #{ + site => Site, + status => Status + } + | Acc + ] + end, + [], + RS + ), + #{ + id => Shard, + replicas => Replicas + } + end + || Shard <- emqx_ds_replication_layer_meta:shards(DB) + ]. diff --git a/apps/emqx_management/test/emqx_mgmt_api_ds_SUITE.erl b/apps/emqx_management/test/emqx_mgmt_api_ds_SUITE.erl new file mode 100644 index 000000000..ee0544730 --- /dev/null +++ b/apps/emqx_management/test/emqx_mgmt_api_ds_SUITE.erl @@ -0,0 +1,180 @@ +%%-------------------------------------------------------------------- +%% Copyright (c) 2024 EMQ Technologies Co., Ltd. All Rights Reserved. +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. +%%-------------------------------------------------------------------- +-module(emqx_mgmt_api_ds_SUITE). + +-compile(export_all). +-compile(nowarn_export_all). + +-include_lib("eunit/include/eunit.hrl"). +-include_lib("common_test/include/ct.hrl"). + +-import(emqx_mgmt_api_test_util, [api_path/1, request_api/2, request_api_with_body/3]). + +all() -> + emqx_common_test_helpers:all(?MODULE). + +init_per_suite(Config) -> + Apps = emqx_cth_suite:start( + [ + {emqx, "session_persistence.enable = true"}, + emqx_management, + {emqx_dashboard, "dashboard.listeners.http { enable = true, bind = 18083 }"} + ], + #{work_dir => emqx_cth_suite:work_dir(Config)} + ), + {ok, _} = emqx_common_test_http:create_default_app(), + [{suite_apps, Apps} | Config]. + +end_per_suite(Config) -> + ok = emqx_cth_suite:stop(?config(suite_apps, Config)). + +init_per_testcase(_, Config) -> + Config. + +end_per_testcase(_, Config) -> + Config. + +t_get_sites(_) -> + Path = api_path(["ds", "sites"]), + {ok, Response} = request_api(get, Path), + ?assertEqual( + [emqx_ds_replication_layer_meta:this_site()], + emqx_utils_json:decode(Response, [return_maps]) + ). + +t_get_storages(_) -> + Path = api_path(["ds", "storages"]), + {ok, Response} = request_api(get, Path), + ?assertEqual( + [<<"emqx_persistent_message">>], + emqx_utils_json:decode(Response, [return_maps]) + ). + +t_get_site(_) -> + %% Unknown sites must result in error 404: + Path404 = api_path(["ds", "sites", "unknown_site"]), + ?assertMatch( + {error, {_, 404, _}}, + request_api(get, Path404) + ), + %% Valid path: + Path = api_path(["ds", "sites", emqx_ds_replication_layer_meta:this_site()]), + {ok, Response} = request_api(get, Path), + ThisNode = atom_to_binary(node()), + ?assertMatch( + #{ + <<"node">> := ThisNode, + <<"up">> := true, + <<"shards">> := + [ + #{ + <<"storage">> := <<"emqx_persistent_message">>, + <<"id">> := _, + <<"status">> := <<"up">> + } + | _ + ] + }, + emqx_utils_json:decode(Response, [return_maps]) + ). + +t_get_db(_) -> + %% Unknown DBs must result in error 400 (since the DS parameter is an enum): + Path400 = api_path(["ds", "storages", "unknown_ds"]), + ?assertMatch( + {error, {_, 400, _}}, + request_api(get, Path400) + ), + %% Valid path: + Path = api_path(["ds", "storages", "emqx_persistent_message"]), + {ok, Response} = request_api(get, Path), + ThisSite = emqx_ds_replication_layer_meta:this_site(), + ?assertMatch( + #{ + <<"name">> := <<"emqx_persistent_message">>, + <<"shards">> := + [ + #{ + <<"id">> := _, + <<"replicas">> := + [ + #{ + <<"site">> := ThisSite, + <<"status">> := <<"up">> + } + | _ + ] + } + | _ + ] + }, + emqx_utils_json:decode(Response) + ). + +t_get_replicas(_) -> + %% Unknown DBs must result in error 400 (since the DS parameter is an enum): + Path400 = api_path(["ds", "storages", "unknown_ds", "replicas"]), + ?assertMatch( + {error, {_, 400, _}}, + request_api(get, Path400) + ), + %% Valid path: + Path = api_path(["ds", "storages", "emqx_persistent_message", "replicas"]), + {ok, Response} = request_api(get, Path), + ThisSite = emqx_ds_replication_layer_meta:this_site(), + ?assertEqual( + [ThisSite], + emqx_utils_json:decode(Response) + ). + +t_put_replicas(_) -> + Path = api_path(["ds", "storages", "emqx_persistent_message", "replicas"]), + %% Error cases: + ?assertMatch( + {ok, 400, #{<<"message">> := <<"Unknown sites: [\"invalid_site\"]">>}}, + parse_error(request_api_with_body(put, Path, [<<"invalid_site">>])) + ), + %% Success case: + ?assertMatch( + {ok, 202, <<"OK">>}, + request_api_with_body(put, Path, [emqx_ds_replication_layer_meta:this_site()]) + ). + +t_join(_) -> + Path400 = api_path(["ds", "storages", "emqx_persistent_message", "replicas", "unknown_site"]), + ?assertMatch( + {error, {_, 400, _}}, + parse_error(request_api(put, Path400)) + ), + ThisSite = emqx_ds_replication_layer_meta:this_site(), + Path = api_path(["ds", "storages", "emqx_persistent_message", "replicas", ThisSite]), + ?assertMatch( + {ok, "OK"}, + request_api(put, Path) + ). + +t_leave(_) -> + ThisSite = emqx_ds_replication_layer_meta:this_site(), + Path = api_path(["ds", "storages", "emqx_persistent_message", "replicas", ThisSite]), + ?assertMatch( + {ok, "OK"}, + request_api(delete, Path) + ). + +parse_error({ok, Code, JSON}) -> + {ok, Code, emqx_utils_json:decode(JSON)}; +parse_error(Err) -> + Err. From 46261440cbc6d39023bd66843f2ba56e2415253e Mon Sep 17 00:00:00 2001 From: ieQu1 <99872536+ieQu1@users.noreply.github.com> Date: Thu, 4 Apr 2024 22:12:59 +0200 Subject: [PATCH 070/234] feat(ds): Add a CLI for managing DB replicas --- apps/emqx_management/src/emqx_mgmt_api_ds.erl | 2 +- apps/emqx_management/src/emqx_mgmt_cli.erl | 43 ++++++++++++++++++- 2 files changed, 43 insertions(+), 2 deletions(-) diff --git a/apps/emqx_management/src/emqx_mgmt_api_ds.erl b/apps/emqx_management/src/emqx_mgmt_api_ds.erl index acd6cf462..8e64a7de5 100644 --- a/apps/emqx_management/src/emqx_mgmt_api_ds.erl +++ b/apps/emqx_management/src/emqx_mgmt_api_ds.erl @@ -357,7 +357,7 @@ db_replica(delete, #{bindings := #{ds := DB, site := Site}}) -> -spec update_db_sites(emqx_ds:db(), [emqx_ds_replication_layer_meta:site()], rest | cli) -> ok | {error, binary()}. update_db_sites(DB, Sites, Via) when is_list(Sites) -> - UnknownSites = Sites -- emqx_ds_replication_layer_meta:sites(), + UnknownSites = lists:usort(Sites) -- emqx_ds_replication_layer_meta:sites(), case {UnknownSites, Sites} of {[], [_ | _]} -> ?SLOG(warning, #{ diff --git a/apps/emqx_management/src/emqx_mgmt_cli.erl b/apps/emqx_management/src/emqx_mgmt_cli.erl index ddbc60d5c..12dd23d77 100644 --- a/apps/emqx_management/src/emqx_mgmt_cli.erl +++ b/apps/emqx_management/src/emqx_mgmt_cli.erl @@ -810,9 +810,50 @@ ds(CMD) -> do_ds(["info"]) -> emqx_ds_replication_layer_meta:print_status(); +do_ds(["set_replicas", DBStr | SitesStr]) -> + case emqx_utils:safe_to_existing_atom(DBStr) of + {ok, DB} -> + Sites = lists:map(fun list_to_binary/1, SitesStr), + case emqx_mgmt_api_ds:update_db_sites(DB, Sites, cli) of + ok -> + emqx_ctl:print("ok~n"); + {error, Description} -> + emqx_ctl:print("Unable to update replicas: ~s~n", [Description]) + end; + {error, _} -> + emqx_ctl:print("Unknown durable storage") + end; +do_ds(["join", DBStr, Site]) -> + case emqx_utils:safe_to_existing_atom(DBStr) of + {ok, DB} -> + case emqx_mgmt_api_ds:join(DB, list_to_binary(Site), cli) of + ok -> + emqx_ctl:print("ok~n"); + {error, Description} -> + emqx_ctl:print("Unable to update replicas: ~s~n", [Description]) + end; + {error, _} -> + emqx_ctl:print("Unknown durable storage~n") + end; +do_ds(["leave", DBStr, Site]) -> + case emqx_utils:safe_to_existing_atom(DBStr) of + {ok, DB} -> + case emqx_mgmt_api_ds:leave(DB, list_to_binary(Site), cli) of + ok -> + emqx_ctl:print("ok~n"); + {error, Description} -> + emqx_ctl:print("Unable to update replicas: ~s~n", [Description]) + end; + {error, _} -> + emqx_ctl:print("Unknown durable storage~n") + end; do_ds(_) -> emqx_ctl:usage([ - {"ds info", "Show overview of the embedded durable storage state"} + {"ds info", "Show overview of the embedded durable storage state"}, + {"ds set_replicas ...", + "Change the replica set of the durable storage"}, + {"ds join ", "Add site to the replica set of the storage"}, + {"ds leave ", "Remove site from the replica set of the storage"} ]). %%-------------------------------------------------------------------- From 2504b8126b5cb551173bdb722e017f9bbff03db0 Mon Sep 17 00:00:00 2001 From: ieQu1 <99872536+ieQu1@users.noreply.github.com> Date: Fri, 5 Apr 2024 13:34:32 +0200 Subject: [PATCH 071/234] feat(ds): Pass mgmt_ds REST API calls to the application --- apps/emqx_management/src/emqx_mgmt_api_ds.erl | 57 ++++++++----------- .../test/emqx_mgmt_api_ds_SUITE.erl | 4 +- 2 files changed, 26 insertions(+), 35 deletions(-) diff --git a/apps/emqx_management/src/emqx_mgmt_api_ds.erl b/apps/emqx_management/src/emqx_mgmt_api_ds.erl index 8e64a7de5..c1a03feb4 100644 --- a/apps/emqx_management/src/emqx_mgmt_api_ds.erl +++ b/apps/emqx_management/src/emqx_mgmt_api_ds.erl @@ -349,55 +349,34 @@ db_replica(put, #{bindings := #{ds := DB, site := Site}}) -> db_replica(delete, #{bindings := #{ds := DB, site := Site}}) -> case leave(DB, Site, rest) of ok -> - {202, <<"OK">>} - %% {error, Description} -> - %% ?BAD_REQUEST(400, Description) + {202, <<"OK">>}; + {error, Description} -> + ?BAD_REQUEST(400, Description) end. -spec update_db_sites(emqx_ds:db(), [emqx_ds_replication_layer_meta:site()], rest | cli) -> ok | {error, binary()}. update_db_sites(DB, Sites, Via) when is_list(Sites) -> - UnknownSites = lists:usort(Sites) -- emqx_ds_replication_layer_meta:sites(), - case {UnknownSites, Sites} of - {[], [_ | _]} -> - ?SLOG(warning, #{ - msg => "durable_storage_rebalance_request", ds => DB, sites => Sites, via => Via - }), - %% TODO: Do stuff - ok; - {_, []} -> - {error, <<"Empty replica list">>}; - {UnknownSites, _} -> - Message = io_lib:format( - "Unknown sites: ~p", - [lists:map(fun binary_to_list/1, UnknownSites)] - ), - {error, iolist_to_binary(Message)} - end; + ?SLOG(warning, #{ + msg => "durable_storage_rebalance_request", ds => DB, sites => Sites, via => Via + }), + meta_result_to_binary(emqx_ds_replication_layer_meta:assign_db_sites(DB, Sites)); update_db_sites(_, _, _) -> {error, <<"Bad type">>}. -spec join(emqx_ds:db(), emqx_ds_replication_layer_meta:site(), rest | cli) -> ok | {error, _}. join(DB, Site, Via) -> - case lists:member(Site, emqx_ds_replication_layer_meta:sites()) of - true -> - ?SLOG(warning, #{ - msg => "durable_storage_join_request", ds => DB, site => Site, via => Via - }), - %% TODO: Do stuff - ok; - false -> - Message = io_lib:format("Unknown site: ~s", [Site]), - {error, iolist_to_binary(Message)} - end. + ?SLOG(warning, #{ + msg => "durable_storage_join_request", ds => DB, site => Site, via => Via + }), + meta_result_to_binary(emqx_ds_replication_layer_meta:join_db_site(DB, Site)). -spec leave(emqx_ds:db(), emqx_ds_replication_layer_meta:site(), rest | cli) -> ok | {error, _}. leave(DB, Site, Via) -> - %% TODO: Do stuff ?SLOG(warning, #{ msg => "durable_storage_leave_request", ds => DB, site => Site, via => Via }), - ok. + meta_result_to_binary(emqx_ds_replication_layer_meta:leave_db_site(DB, Site)). %%================================================================================ %% Internal functions @@ -488,3 +467,15 @@ list_shards(DB) -> end || Shard <- emqx_ds_replication_layer_meta:shards(DB) ]. + +meta_result_to_binary(ok) -> + ok; +meta_result_to_binary({error, {nonexistent_sites, UnknownSites}}) -> + Msg = ["Unknown sites: " | lists:join(", ", UnknownSites)], + {error, iolist_to_binary(Msg)}; +meta_result_to_binary({error, {nonexistent_db, DB}}) -> + IOList = io_lib:format("Unknown storage: ~p", [DB]), + {error, iolist_to_binary(IOList)}; +meta_result_to_binary({error, Err}) -> + IOList = io_lib:format("Error: ~p", [Err]), + {error, iolist_to_binary(IOList)}. diff --git a/apps/emqx_management/test/emqx_mgmt_api_ds_SUITE.erl b/apps/emqx_management/test/emqx_mgmt_api_ds_SUITE.erl index ee0544730..fef9276ca 100644 --- a/apps/emqx_management/test/emqx_mgmt_api_ds_SUITE.erl +++ b/apps/emqx_management/test/emqx_mgmt_api_ds_SUITE.erl @@ -144,7 +144,7 @@ t_put_replicas(_) -> Path = api_path(["ds", "storages", "emqx_persistent_message", "replicas"]), %% Error cases: ?assertMatch( - {ok, 400, #{<<"message">> := <<"Unknown sites: [\"invalid_site\"]">>}}, + {ok, 400, #{<<"message">> := <<"Unknown sites: invalid_site">>}}, parse_error(request_api_with_body(put, Path, [<<"invalid_site">>])) ), %% Success case: @@ -170,7 +170,7 @@ t_leave(_) -> ThisSite = emqx_ds_replication_layer_meta:this_site(), Path = api_path(["ds", "storages", "emqx_persistent_message", "replicas", ThisSite]), ?assertMatch( - {ok, "OK"}, + {error, {_, 400, _}}, request_api(delete, Path) ). From 64cc064ddb167b16c8015a22326a113b25839d1d Mon Sep 17 00:00:00 2001 From: ieQu1 <99872536+ieQu1@users.noreply.github.com> Date: Fri, 5 Apr 2024 17:39:42 +0200 Subject: [PATCH 072/234] chore: Add changelog entry for #12833 --- changes/feat-12833.en.md | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) create mode 100644 changes/feat-12833.en.md diff --git a/changes/feat-12833.en.md b/changes/feat-12833.en.md new file mode 100644 index 000000000..ef1d2fb30 --- /dev/null +++ b/changes/feat-12833.en.md @@ -0,0 +1,16 @@ +Added REST API endpoints and CLI commands for durable storage management. + +New REST endpoints: + +- `/ds/sites` +- `/ds/sites/:site` +- `/ds/storages` +- `/ds/storages/:ds` +- `/ds/storages/:ds/replicas` +- `/ds/storages/:ds/replicas/:site` + +New CLI commands: + +- `ds set_replicas` +- `ds join` +- `ds leave` From a07295d3bc3d4da81d77d5ba2f2e2c91e1cebb09 Mon Sep 17 00:00:00 2001 From: Andrew Mayorov Date: Fri, 5 Apr 2024 17:34:30 +0200 Subject: [PATCH 073/234] fix(ds): address shards in the supervisor properly --- apps/emqx_durable_storage/src/emqx_ds_builtin_db_sup.erl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/apps/emqx_durable_storage/src/emqx_ds_builtin_db_sup.erl b/apps/emqx_durable_storage/src/emqx_ds_builtin_db_sup.erl index c521164f4..195db7c34 100644 --- a/apps/emqx_durable_storage/src/emqx_ds_builtin_db_sup.erl +++ b/apps/emqx_durable_storage/src/emqx_ds_builtin_db_sup.erl @@ -74,7 +74,7 @@ start_egress({DB, Shard}) -> supervisor:start_child(?via(#?egress_sup{db = DB}), egress_spec(DB, Shard)). -spec stop_shard(emqx_ds_storage_layer:shard_id()) -> ok. -stop_shard(Shard = {DB, _}) -> +stop_shard({DB, Shard}) -> Sup = ?via(#?shards_sup{db = DB}), ok = supervisor:terminate_child(Sup, Shard), ok = supervisor:delete_child(Sup, Shard). @@ -212,7 +212,7 @@ sup_spec(Id, Options) -> shard_spec(DB, Shard) -> #{ - id => {shard, Shard}, + id => Shard, start => {?MODULE, start_link_sup, [#?shard_sup{db = DB, shard = Shard}, []]}, shutdown => infinity, restart => permanent, From d6058b7f51b23a61bbb11c749898b39114f5d4e5 Mon Sep 17 00:00:00 2001 From: Andrew Mayorov Date: Fri, 5 Apr 2024 17:36:57 +0200 Subject: [PATCH 074/234] feat(dsrepl): allow to subscribe to DB metadata changes Currently, only shard metadata changes are announced to the subscribers. --- .../src/emqx_ds_replication_layer_meta.erl | 63 ++++++++++++++++++- 1 file changed, 62 insertions(+), 1 deletion(-) diff --git a/apps/emqx_durable_storage/src/emqx_ds_replication_layer_meta.erl b/apps/emqx_durable_storage/src/emqx_ds_replication_layer_meta.erl index 66029d4ca..f27fa414e 100644 --- a/apps/emqx_durable_storage/src/emqx_ds_replication_layer_meta.erl +++ b/apps/emqx_durable_storage/src/emqx_ds_replication_layer_meta.erl @@ -57,6 +57,12 @@ target_set/2 ]). +%% Subscriptions to changes: +-export([ + subscribe/2, + unsubscribe/1 +]). + %% gen_server -export([start_link/0, init/1, handle_call/3, handle_cast/2, handle_info/2, terminate/2]). @@ -125,6 +131,9 @@ | {error, {nonexistent_sites, [site()]}} | {error, _}. +%% Subject of the subscription: +-type subject() :: emqx_ds:db(). + %% Peristent term key: -define(emqx_ds_builtin_site, emqx_ds_builtin_site). @@ -336,11 +345,21 @@ target_set(DB, Shard) -> undefined end. +%%================================================================================ + +subscribe(Pid, Subject) -> + gen_server:call(?SERVER, {subscribe, Pid, Subject}, infinity). + +unsubscribe(Pid) -> + gen_server:call(?SERVER, {unsubscribe, Pid}, infinity). + %%================================================================================ %% behavior callbacks %%================================================================================ --record(s, {}). +-record(s, { + subs = #{} :: #{pid() => {subject(), _Monitor :: reference()}} +}). init([]) -> process_flag(trap_exit, true), @@ -348,14 +367,24 @@ init([]) -> ensure_tables(), ensure_site(), S = #s{}, + {ok, _Node} = mnesia:subscribe({table, ?SHARD_TAB, simple}), {ok, S}. +handle_call({subscribe, Pid, Subject}, _From, S) -> + {reply, ok, handle_subscribe(Pid, Subject, S)}; +handle_call({unsubscribe, Pid}, _From, S) -> + {reply, ok, handle_unsubscribe(Pid, S)}; handle_call(_Call, _From, S) -> {reply, {error, unknown_call}, S}. handle_cast(_Cast, S) -> {noreply, S}. +handle_info({mnesia_table_event, {write, #?SHARD_TAB{shard = {DB, Shard}}, _}}, S) -> + ok = notify_subscribers(DB, {shard, DB, Shard}, S), + {noreply, S}; +handle_info({'DOWN', _MRef, process, Pid, _Reason}, S) -> + {noreply, handle_unsubscribe(Pid, S)}; handle_info(_Info, S) -> {noreply, S}. @@ -613,6 +642,38 @@ transaction(Fun, Args) -> {error, Reason} end. +%%==================================================================== + +handle_subscribe(Pid, Subject, S = #s{subs = Subs0}) -> + case maps:is_key(Pid, Subs0) of + false -> + MRef = erlang:monitor(process, Pid), + Subs = Subs0#{Pid => {Subject, MRef}}, + S#s{subs = Subs}; + true -> + S + end. + +handle_unsubscribe(Pid, S = #s{subs = Subs0}) -> + case maps:take(Pid, Subs0) of + {{_Subject, MRef}, Subs} -> + _ = erlang:demonitor(MRef, [flush]), + S#s{subs = Subs}; + error -> + S + end. + +notify_subscribers(EventSubject, Event, #s{subs = Subs}) -> + maps:foreach( + fun(Pid, {Subject, _MRef}) -> + Subject == EventSubject andalso + erlang:send(Pid, {changed, Event}) + end, + Subs + ). + +%%==================================================================== + %% @doc Intersperse elements of two lists. %% Example: intersperse([1, 2], [3, 4, 5]) -> [1, 3, 2, 4, 5]. -spec intersperse([X], [Y]) -> [X | Y]. From 556ffc78c9eac7ed42c1400eee2d28fc0c87417f Mon Sep 17 00:00:00 2001 From: Andrew Mayorov Date: Fri, 5 Apr 2024 17:39:17 +0200 Subject: [PATCH 075/234] feat(dsrepl): implement membership changes and rebalancing --- .../src/emqx_ds_replication_layer_shard.erl | 140 +++++++++- .../emqx_ds_replication_shard_allocator.erl | 259 ++++++++++++++++-- .../test/emqx_ds_replication_SUITE.erl | 244 ++++++++++++++--- 3 files changed, 561 insertions(+), 82 deletions(-) diff --git a/apps/emqx_durable_storage/src/emqx_ds_replication_layer_shard.erl b/apps/emqx_durable_storage/src/emqx_ds_replication_layer_shard.erl index 45739fbe3..a57e45dfd 100644 --- a/apps/emqx_durable_storage/src/emqx_ds_replication_layer_shard.erl +++ b/apps/emqx_durable_storage/src/emqx_ds_replication_layer_shard.erl @@ -21,6 +21,7 @@ %% Static server configuration -export([ shard_servers/2, + shard_server/3, local_server/2 ]). @@ -30,6 +31,14 @@ server/3 ]). +%% Membership +-export([ + add_local_server/2, + drop_local_server/2, + remove_server/3, + server_info/2 +]). + -behaviour(gen_server). -export([ init/1, @@ -38,21 +47,31 @@ terminate/2 ]). +-type server() :: ra:server_id(). + +-define(MEMBERSHIP_CHANGE_TIMEOUT, 30_000). + %% start_link(DB, Shard, Opts) -> gen_server:start_link(?MODULE, {DB, Shard, Opts}, []). +-spec shard_servers(emqx_ds:db(), emqx_ds_replication_layer:shard_id()) -> [server()]. shard_servers(DB, Shard) -> ReplicaSet = emqx_ds_replication_layer_meta:replica_set(DB, Shard), - [ - {server_name(DB, Shard, Site), emqx_ds_replication_layer_meta:node(Site)} - || Site <- ReplicaSet - ]. + [shard_server(DB, Shard, Site) || Site <- ReplicaSet]. +-spec shard_server( + emqx_ds:db(), + emqx_ds_replication_layer:shard_id(), + emqx_ds_replication_layer_meta:site() +) -> server(). +shard_server(DB, Shard, Site) -> + {server_name(DB, Shard, Site), emqx_ds_replication_layer_meta:node(Site)}. + +-spec local_server(emqx_ds:db(), emqx_ds_replication_layer:shard_id()) -> server(). local_server(DB, Shard) -> - Site = emqx_ds_replication_layer_meta:this_site(), - {server_name(DB, Shard, Site), node()}. + {server_name(DB, Shard, local_site()), node()}. cluster_name(DB, Shard) -> iolist_to_binary(io_lib:format("~s_~s", [DB, Shard])). @@ -61,6 +80,14 @@ server_name(DB, Shard, Site) -> DBBin = atom_to_binary(DB), binary_to_atom(<<"ds_", DBBin/binary, Shard/binary, "_", Site/binary>>). +server_uid(_DB, Shard) -> + %% NOTE + %% Each new "instance" of a server should have a unique identifier. Otherwise, + %% if some server migrates to another node during rebalancing, and then comes + %% back, `ra` will be very confused by it having the same UID as before. + Ts = integer_to_binary(erlang:system_time(microsecond)), + <>. + %% servers(DB, Shard, _Order = leader_preferred) -> @@ -118,11 +145,100 @@ get_local_server(DB, Shard) -> get_shard_servers(DB, Shard) -> maps:get(servers, emqx_ds_replication_shard_allocator:shard_meta(DB, Shard)). +local_site() -> + emqx_ds_replication_layer_meta:this_site(). + +%% + +add_local_server(DB, Shard) -> + %% NOTE + %% Adding local server as "promotable" member to the cluster, which means + %% that it will affect quorum until it is promoted to a voter, which in + %% turn happens when the server has caught up sufficiently with the log. + %% We also rely on this "membership" to understand when the server's + %% readiness. + ShardServers = shard_servers(DB, Shard), + LocalServer = local_server(DB, Shard), + ServerRecord = #{ + id => LocalServer, + membership => promotable, + uid => server_uid(DB, Shard) + }, + case ra:add_member(ShardServers, ServerRecord, ?MEMBERSHIP_CHANGE_TIMEOUT) of + {ok, _, _Leader} -> + ok; + {error, already_member} -> + ok; + {error, Reason} -> + {error, recoverable, Reason} + end. + +drop_local_server(DB, Shard) -> + LocalServer = local_server(DB, Shard), + case remove_server(DB, Shard, LocalServer) of + ok -> + ra:force_delete_server(DB, LocalServer); + {error, _, _Reason} = Error -> + Error + end. + +remove_server(DB, Shard, Server) -> + ShardServers = shard_servers(DB, Shard), + case ra:remove_member(ShardServers, Server, ?MEMBERSHIP_CHANGE_TIMEOUT) of + {ok, _, _Leader} -> + ok; + {error, not_member} -> + ok; + {error, Reason} -> + {error, recoverable, Reason} + end. + +server_info(readiness, Server) -> + %% NOTE + %% Server is ready if it's either the leader or a follower with voter "membership" + %% status (meaning it was promoted after catching up with the log). + case current_leader(Server) of + Server -> + ready; + Leader when Leader /= unknown -> + member_info(readiness, Server, Leader); + unknown -> + unknown + end; +server_info(leader, Server) -> + current_leader(Server). + +member_info(readiness, Server, Leader) -> + case ra:member_overview(Leader) of + {ok, #{cluster := Cluster}, _} -> + member_readiness(maps:get(Server, Cluster)); + _Error -> + unknown + end. + +current_leader(Server) -> + case ra:members(Server) of + {ok, _Servers, Leader} -> + Leader; + _Error -> + unknown + end. + +member_readiness(#{status := Status, voter_status := #{membership := Membership}}) -> + case Status of + normal when Membership =:= voter -> + ready; + _Other -> + {unready, Status, Membership} + end; +member_readiness(#{}) -> + unknown. + %% init({DB, Shard, Opts}) -> _ = process_flag(trap_exit, true), - _Meta = start_shard(DB, Shard, Opts), + ok = start_shard(DB, Shard, Opts), {ok, {DB, Shard}}. handle_call(_Call, _From, State) -> @@ -138,7 +254,6 @@ terminate(_Reason, {DB, Shard}) -> %% start_shard(DB, Shard, #{replication_options := ReplicationOpts}) -> - Site = emqx_ds_replication_layer_meta:this_site(), ClusterName = cluster_name(DB, Shard), LocalServer = local_server(DB, Shard), Servers = shard_servers(DB, Shard), @@ -157,7 +272,7 @@ start_shard(DB, Shard, #{replication_options := ReplicationOpts}) -> ), ok = ra:start_server(DB, #{ id => LocalServer, - uid => <>, + uid => server_uid(DB, Shard), cluster_name => ClusterName, initial_members => Servers, machine => Machine, @@ -188,12 +303,7 @@ start_shard(DB, Shard, #{replication_options := ReplicationOpts}) -> end; _ -> ok - end, - #{ - cluster_name => ClusterName, - servers => Servers, - local_server => LocalServer - }. + end. %% diff --git a/apps/emqx_durable_storage/src/emqx_ds_replication_shard_allocator.erl b/apps/emqx_durable_storage/src/emqx_ds_replication_shard_allocator.erl index 7393da692..4113fcedc 100644 --- a/apps/emqx_durable_storage/src/emqx_ds_replication_shard_allocator.erl +++ b/apps/emqx_durable_storage/src/emqx_ds_replication_shard_allocator.erl @@ -16,6 +16,8 @@ -module(emqx_ds_replication_shard_allocator). +-include_lib("snabbkaffe/include/trace.hrl"). + -export([start_link/1]). -export([n_shards/1]). @@ -30,9 +32,23 @@ terminate/2 ]). +-export([handle_transition/4]). + -define(db_meta(DB), {?MODULE, DB}). -define(shard_meta(DB, SHARD), {?MODULE, DB, SHARD}). +-define(ALLOCATE_RETRY_TIMEOUT, 1_000). + +-define(TRANS_RETRY_TIMEOUT, 5_000). +-define(REMOVE_REPLICA_DELAY, {10_000, 5_000}). + +-ifdef(TEST). +-undef(TRANS_RETRY_TIMEOUT). +-undef(REMOVE_REPLICA_DELAY). +-define(TRANS_RETRY_TIMEOUT, 1_000). +-define(REMOVE_REPLICA_DELAY, {4_000, 2_000}). +-endif. + %% start_link(DB) -> @@ -47,13 +63,11 @@ shard_meta(DB, Shard) -> %% --define(ALLOCATE_RETRY_TIMEOUT, 1_000). - init(DB) -> _ = erlang:process_flag(trap_exit, true), - _ = logger:set_process_metadata(#{db => DB, domain => [ds, db, shard_allocator]}), - State = #{db => DB, status => allocating}, - handle_allocate_shards(State, ok). + _ = logger:set_process_metadata(#{db => DB, domain => [emqx, ds, DB, shard_allocator]}), + State = #{db => DB, transitions => #{}, status => allocating}, + {ok, handle_allocate_shards(State)}. handle_call(_Call, _From, State) -> {reply, ignored, State}. @@ -61,12 +75,19 @@ handle_call(_Call, _From, State) -> handle_cast(_Cast, State) -> {noreply, State}. -handle_info(timeout, State) -> - handle_allocate_shards(State, noreply); +handle_info({timeout, _TRef, allocate}, State) -> + {noreply, handle_allocate_shards(State)}; +handle_info({changed, {shard, DB, Shard}}, State = #{db := DB}) -> + {noreply, handle_shard_changed(Shard, State)}; +handle_info({changed, _}, State) -> + {noreply, State}; +handle_info({'EXIT', Pid, Reason}, State) -> + {noreply, handle_exit(Pid, Reason, State)}; handle_info(_Info, State) -> {noreply, State}. -terminate(_Reason, #{db := DB, shards := Shards}) -> +terminate(_Reason, State = #{db := DB, shards := Shards}) -> + unsubscribe_db_changes(State), erase_db_meta(DB), erase_shards_meta(DB, Shards); terminate(_Reason, #{}) -> @@ -74,10 +95,11 @@ terminate(_Reason, #{}) -> %% -handle_allocate_shards(State, Ret) -> +handle_allocate_shards(State) -> case allocate_shards(State) of {ok, NState} -> - {Ret, NState}; + ok = subscribe_db_changes(State), + NState; {error, Data} -> _ = logger:notice( Data#{ @@ -85,15 +107,197 @@ handle_allocate_shards(State, Ret) -> retry_in => ?ALLOCATE_RETRY_TIMEOUT } ), - {Ret, State, ?ALLOCATE_RETRY_TIMEOUT} + _TRef = erlang:start_timer(?ALLOCATE_RETRY_TIMEOUT, self(), allocate), + State end. +subscribe_db_changes(#{db := DB}) -> + emqx_ds_replication_layer_meta:subscribe(self(), DB). + +unsubscribe_db_changes(_State) -> + emqx_ds_replication_layer_meta:unsubscribe(self()). + +%% + +handle_shard_changed(Shard, State = #{db := DB}) -> + ok = save_shard_meta(DB, Shard), + Transitions = emqx_ds_replication_layer_meta:replica_set_transitions(DB, Shard), + handle_shard_transitions(Shard, Transitions, State). + +handle_shard_transitions(Shard, Transitions, State = #{db := DB}) -> + ThisSite = emqx_ds_replication_layer_meta:this_site(), + case Transitions of + [] -> + %% We reached the target allocation. + State; + [Trans = {add, ThisSite} | _Rest] -> + ensure_transition_handler(Shard, Trans, fun trans_add_local/3, State); + [Trans = {del, ThisSite} | _Rest] -> + ensure_transition_handler(Shard, Trans, fun trans_drop_local/3, State); + [Trans = {del, Site} | _Rest] -> + ReplicaSet = emqx_ds_replication_layer_meta:replica_set(DB, Shard), + case lists:member(Site, ReplicaSet) of + true -> + %% NOTE + %% Putting this transition handler on separate "track" so that it + %% won't block any changes with higher priority (e.g. managing + %% local replicas). + Handler = fun trans_rm_unresponsive/3, + ensure_transition_handler(unresp, Shard, Trans, Handler, State); + false -> + State + end; + [_Trans | _Rest] -> + %% This site is not involved in the next queued transition. + State + end. + +handle_transition(DB, Shard, Trans, Fun) -> + logger:set_process_metadata(#{ + db => DB, + shard => Shard, + domain => [emqx, ds, DB, shard_transition] + }), + ?tp( + dsrepl_shard_transition_begin, + #{shard => Shard, db => DB, transition => Trans, pid => self()} + ), + erlang:apply(Fun, [DB, Shard, Trans]). + +trans_add_local(DB, Shard, {add, Site}) -> + logger:info(#{msg => "Adding new local shard replica", site => Site}), + do_add_local(membership, DB, Shard). + +do_add_local(membership = Stage, DB, Shard) -> + ok = start_shard(DB, Shard), + case emqx_ds_replication_layer_shard:add_local_server(DB, Shard) of + ok -> + do_add_local(readiness, DB, Shard); + {error, recoverable, Reason} -> + logger:warning(#{ + msg => "Shard membership change failed", + reason => Reason, + retry_in => ?TRANS_RETRY_TIMEOUT + }), + ok = timer:sleep(?TRANS_RETRY_TIMEOUT), + do_add_local(Stage, DB, Shard) + end; +do_add_local(readiness = Stage, DB, Shard) -> + LocalServer = emqx_ds_replication_layer_shard:local_server(DB, Shard), + case emqx_ds_replication_layer_shard:server_info(readiness, LocalServer) of + ready -> + logger:info(#{msg => "Local shard replica ready"}); + Status -> + logger:warning(#{ + msg => "Still waiting for local shard replica to be ready", + status => Status, + retry_in => ?TRANS_RETRY_TIMEOUT + }), + ok = timer:sleep(?TRANS_RETRY_TIMEOUT), + do_add_local(Stage, DB, Shard) + end. + +trans_drop_local(DB, Shard, {del, Site}) -> + logger:info(#{msg => "Dropping local shard replica", site => Site}), + do_drop_local(DB, Shard). + +do_drop_local(DB, Shard) -> + case emqx_ds_replication_layer_shard:drop_local_server(DB, Shard) of + ok -> + ok = emqx_ds_builtin_db_sup:stop_shard({DB, Shard}), + ok = emqx_ds_storage_layer:drop_shard({DB, Shard}), + logger:info(#{msg => "Local shard replica dropped"}); + {error, recoverable, Reason} -> + logger:warning(#{ + msg => "Shard membership change failed", + reason => Reason, + retry_in => ?TRANS_RETRY_TIMEOUT + }), + ok = timer:sleep(?TRANS_RETRY_TIMEOUT), + do_drop_local(DB, Shard) + end. + +trans_rm_unresponsive(DB, Shard, Trans = {del, Site}) -> + %% NOTE + %% Let the replica handle its own removal first, thus the delay. + ok = delay(?REMOVE_REPLICA_DELAY), + Transitions = emqx_ds_replication_layer_meta:replica_set_transitions(DB, Shard), + case Transitions of + [Trans | _] -> + logger:info(#{msg => "Removing unresponsive shard replica", site => Site}), + do_rm_unresponsive(DB, Shard, Site); + _Outdated -> + exit({shutdown, skipped}) + end. + +do_rm_unresponsive(DB, Shard, Site) -> + Server = emqx_ds_replication_layer_shard:shard_server(DB, Shard, Site), + case emqx_ds_replication_layer_shard:remove_server(DB, Shard, Server) of + ok -> + logger:info(#{msg => "Unresponsive shard replica removed"}); + {error, recoverable, Reason} -> + logger:warning(#{ + msg => "Shard membership change failed", + reason => Reason, + retry_in => ?TRANS_RETRY_TIMEOUT + }), + ok = timer:sleep(?TRANS_RETRY_TIMEOUT), + do_rm_unresponsive(DB, Shard, Site) + end. + +%% + +ensure_transition_handler(Shard, Trans, Handler, State) -> + ensure_transition_handler(Shard, Shard, Trans, Handler, State). + +ensure_transition_handler(Track, Shard, Trans, Handler, State = #{transitions := Ts}) -> + case maps:get(Track, Ts, undefined) of + undefined -> + Pid = start_transition_handler(Shard, Trans, Handler, State), + State#{transitions := Ts#{Track => {Shard, Trans, Pid}}}; + _AlreadyRunning -> + %% NOTE: Avoiding multiple transition handlers for the same shard for safety. + State + end. + +start_transition_handler(Shard, Trans, Handler, #{db := DB}) -> + proc_lib:spawn_link(?MODULE, handle_transition, [DB, Shard, Trans, Handler]). + +handle_exit(Pid, Reason, State = #{db := DB, transitions := Ts}) -> + case maps:to_list(maps:filter(fun(_, {_S, _T, P}) -> P == Pid end, Ts)) of + [{Track, {Shard, Trans, Pid}}] -> + ?tp( + dsrepl_shard_transition_end, + #{shard => Shard, db => DB, transition => Trans, pid => Pid, reason => Reason} + ), + ok = handle_transition_exit(Shard, Trans, Reason, State), + State#{transitions := maps:remove(Track, Ts)}; + [] -> + logger:warning(#{msg => "Unexpected exit signal", pid => Pid, reason => Reason}), + State + end. + +handle_transition_exit(Shard, Trans, normal, _State = #{db := DB}) -> + %% NOTE: This will trigger the next transition if any. + ok = emqx_ds_replication_layer_meta:update_replica_set(DB, Shard, Trans); +handle_transition_exit(_Shard, _Trans, {shutdown, skipped}, _State) -> + ok; +handle_transition_exit(Shard, Trans, Reason, _State) -> + logger:warning(#{ + msg => "Shard membership transition failed", + shard => Shard, + transition => Trans, + reason => Reason + }), + %% FIXME: retry + ok. + %% allocate_shards(State = #{db := DB}) -> case emqx_ds_replication_layer_meta:allocate_shards(DB) of {ok, Shards} -> - logger:notice(#{msg => "Shards allocated", shards => Shards}), + logger:info(#{msg => "Shards allocated", shards => Shards}), ok = start_shards(DB, emqx_ds_replication_layer_meta:my_shards(DB)), ok = start_egresses(DB, Shards), ok = save_db_meta(DB, Shards), @@ -104,25 +308,23 @@ allocate_shards(State = #{db := DB}) -> end. start_shards(DB, Shards) -> - ok = lists:foreach( - fun(Shard) -> - ok = emqx_ds_builtin_db_sup:ensure_shard({DB, Shard}) - end, - Shards - ), - ok = logger:info(#{msg => "Shards started", shards => Shards}), + lists:foreach(fun(Shard) -> start_shard(DB, Shard) end, Shards). + +start_shard(DB, Shard) -> + ok = emqx_ds_builtin_db_sup:ensure_shard({DB, Shard}), + ok = logger:info(#{msg => "Shard started", shard => Shard}), ok. start_egresses(DB, Shards) -> - ok = lists:foreach( - fun(Shard) -> - ok = emqx_ds_builtin_db_sup:ensure_egress({DB, Shard}) - end, - Shards - ), - logger:info(#{msg => "Egresses started", shards => Shards}), + lists:foreach(fun(Shard) -> start_egress(DB, Shard) end, Shards). + +start_egress(DB, Shard) -> + ok = emqx_ds_builtin_db_sup:ensure_egress({DB, Shard}), + ok = logger:info(#{msg => "Egress started", shard => Shard}), ok. +%% + save_db_meta(DB, Shards) -> persistent_term:put(?db_meta(DB), #{ shards => Shards, @@ -146,3 +348,8 @@ erase_shards_meta(DB, Shards) -> erase_shard_meta(DB, Shard) -> persistent_term:erase(?shard_meta(DB, Shard)). + +%% + +delay({MinDelay, Variance}) -> + timer:sleep(MinDelay + rand:uniform(Variance)). diff --git a/apps/emqx_durable_storage/test/emqx_ds_replication_SUITE.erl b/apps/emqx_durable_storage/test/emqx_ds_replication_SUITE.erl index 24e7cdafb..872169765 100644 --- a/apps/emqx_durable_storage/test/emqx_ds_replication_SUITE.erl +++ b/apps/emqx_durable_storage/test/emqx_ds_replication_SUITE.erl @@ -26,18 +26,45 @@ -define(DB, testdb). opts() -> - #{ - backend => builtin, - storage => {emqx_ds_storage_bitfield_lts, #{}}, - n_shards => 1, - n_sites => 3, - replication_factor => 3, - replication_options => #{ - wal_max_size_bytes => 128 * 1024, - wal_max_batch_size => 1024, - snapshot_interval => 128 - } - }. + opts(#{}). + +opts(Overrides) -> + maps:merge( + #{ + backend => builtin, + storage => {emqx_ds_storage_bitfield_lts, #{}}, + n_shards => 16, + n_sites => 1, + replication_factor => 3, + replication_options => #{ + wal_max_size_bytes => 64 * 1024, + wal_max_batch_size => 1024, + snapshot_interval => 128 + } + }, + Overrides + ). + +appspec(emqx_durable_storage) -> + {emqx_durable_storage, #{ + before_start => fun snabbkaffe:fix_ct_logging/0, + override_env => [{egress_flush_interval, 1}] + }}. + +t_replication_transfers_snapshots(init, Config) -> + Apps = [appspec(emqx_durable_storage)], + NodeSpecs = emqx_cth_cluster:mk_nodespecs( + [ + {t_replication_transfers_snapshots1, #{apps => Apps}}, + {t_replication_transfers_snapshots2, #{apps => Apps}}, + {t_replication_transfers_snapshots3, #{apps => Apps}} + ], + #{work_dir => emqx_cth_suite:work_dir(?FUNCTION_NAME, Config)} + ), + Nodes = emqx_cth_cluster:start(NodeSpecs), + [{nodes, Nodes}, {specs, NodeSpecs} | Config]; +t_replication_transfers_snapshots('end', Config) -> + ok = emqx_cth_cluster:stop(?config(nodes, Config)). t_replication_transfers_snapshots(Config) -> NMsgs = 4000, @@ -45,9 +72,10 @@ t_replication_transfers_snapshots(Config) -> _Specs = [_, SpecOffline | _] = ?config(specs, Config), %% Initialize DB on all nodes and wait for it to be online. + Opts = opts(#{n_shards => 1, n_sites => 3}), ?assertEqual( [{ok, ok} || _ <- Nodes], - erpc:multicall(Nodes, emqx_ds, open_db, [?DB, opts()]) + erpc:multicall(Nodes, emqx_ds, open_db, [?DB, Opts]) ), ?retry( 500, @@ -88,7 +116,7 @@ t_replication_transfers_snapshots(Config) -> Shard = hd(shards(NodeOffline, ?DB)), MessagesOffline = lists:keysort( #message.timestamp, - consume(NodeOffline, ?DB, Shard, ['#'], 0) + consume_shard(NodeOffline, ?DB, Shard, ['#'], 0) ), ?assertEqual( sample(40, Messages), @@ -99,26 +127,169 @@ t_replication_transfers_snapshots(Config) -> MessagesOffline ). +t_replication_rebalance(init, Config) -> + Apps = [appspec(emqx_durable_storage)], + Nodes = emqx_cth_cluster:start( + [ + {t_replication_rebalance1, #{apps => Apps}}, + {t_replication_rebalance2, #{apps => Apps}}, + {t_replication_rebalance3, #{apps => Apps}}, + {t_replication_rebalance4, #{apps => Apps}} + ], + #{work_dir => emqx_cth_suite:work_dir(?FUNCTION_NAME, Config)} + ), + [{nodes, Nodes} | Config]; +t_replication_rebalance('end', Config) -> + ok = emqx_cth_cluster:stop(?config(nodes, Config)). + +t_replication_rebalance(Config) -> + NMsgs = 800, + NClients = 5, + Nodes = [N1, N2, N3, N4] = ?config(nodes, Config), + + %% Initialize DB on the first node. + Opts = opts(#{n_shards => 16, n_sites => 1, replication_factor => 3}), + ?assertEqual(ok, erpc:call(N1, emqx_ds, open_db, [?DB, Opts])), + ?assertMatch( + Shards when length(Shards) == 16, + shards_online(N1, ?DB) + ), + + %% Open DB on the rest of the nodes. + ?assertEqual( + [{ok, ok} || _ <- [N2, N3, N4]], + erpc:multicall([N2, N3, N4], emqx_ds, open_db, [?DB, Opts]) + ), + + Sites = [S1, S2 | _Rest] = [ds_repl_meta(N, this_site) || N <- Nodes], + ct:pal("Sites: ~p~n", [Sites]), + + %% Only N1 should be responsible for all shards initially. + ?assertEqual( + [[S1] || _ <- Nodes], + [ds_repl_meta(N, db_sites, [?DB]) || N <- Nodes] + ), + + %% Fill the storage with messages and few additional generations. + %% This will force shards to trigger snapshot transfers during rebalance. + ClientMessages = emqx_utils:pmap( + fun(CID) -> + N = lists:nth(1 + (CID rem length(Nodes)), Nodes), + fill_storage(N, ?DB, NMsgs, #{client_id => integer_to_binary(CID)}) + end, + lists:seq(1, NClients), + infinity + ), + Messages1 = lists:sort(fun compare_message/2, lists:append(ClientMessages)), + + %% Join the second site to the DB replication sites. + ?assertEqual(ok, ds_repl_meta(N1, join_db_site, [?DB, S2])), + %% Should be no-op. + ?assertEqual(ok, ds_repl_meta(N2, join_db_site, [?DB, S2])), + ct:pal("Transitions (~p -> ~p): ~p~n", [[S1], [S1, S2], transitions(N1, ?DB)]), + + %% Fill in some more messages *during* the rebalance. + MessagesRB1 = fill_storage(N4, ?DB, NMsgs, #{client_id => <<"RB1">>}), + + ?retry(1000, 10, ?assertEqual([], transitions(N1, ?DB))), + + %% Now join the rest of the sites. + ?assertEqual(ok, ds_repl_meta(N2, assign_db_sites, [?DB, Sites])), + ct:pal("Transitions (~p -> ~p): ~p~n", [[S1, S2], Sites, transitions(N1, ?DB)]), + + %% Fill in some more messages *during* the rebalance. + MessagesRB2 = fill_storage(N4, ?DB, NMsgs, #{client_id => <<"RB2">>}), + + ?retry(1000, 10, ?assertEqual([], transitions(N2, ?DB))), + + %% Verify that each node is now responsible for 3/4 of the shards. + ?assertEqual( + [(16 * 3) div length(Nodes) || _ <- Nodes], + [n_shards_online(N, ?DB) || N <- Nodes] + ), + + %% Verify that the set of shard servers matches the target allocation. + Allocation = [ds_repl_meta(N, my_shards, [?DB]) || N <- Nodes], + ShardServers = [ + shard_server_info(N, ?DB, Shard, Site, readiness) + || {N, Site, Shards} <- lists:zip3(Nodes, Sites, Allocation), + Shard <- Shards + ], + ?assert( + lists:all(fun({_Server, Status}) -> Status == ready end, ShardServers), + ShardServers + ), + + %% Verify that the messages are preserved after the rebalance. + Messages = Messages1 ++ MessagesRB1 ++ MessagesRB2, + MessagesN4 = lists:sort(fun compare_message/2, consume(N4, ?DB, ['#'], 0)), + ?assertEqual(sample(20, Messages), sample(20, MessagesN4)), + ?assertEqual(Messages, MessagesN4), + + %% Scale down the cluster by removing the first node. + ?assertEqual(ok, ds_repl_meta(N1, leave_db_site, [?DB, S1])), + ct:pal("Transitions (~p -> ~p): ~p~n", [Sites, tl(Sites), transitions(N1, ?DB)]), + + ?retry(1000, 10, ?assertEqual([], transitions(N2, ?DB))), + + %% Verify that each node is now responsible for each shard. + ?assertEqual( + [0, 16, 16, 16], + [n_shards_online(N, ?DB) || N <- Nodes] + ), + + %% Verify that the messages are once again preserved after the rebalance. + MessagesN3 = lists:sort(fun compare_message/2, consume(N3, ?DB, ['#'], 0)), + ?assertEqual(sample(20, Messages), sample(20, MessagesN3)), + ?assertEqual(Messages, MessagesN3). + +%% + +shard_server_info(Node, DB, Shard, Site, Info) -> + Server = shard_server(Node, DB, Shard, Site), + {Server, ds_repl_shard(Node, server_info, [Info, Server])}. + +shard_server(Node, DB, Shard, Site) -> + ds_repl_shard(Node, shard_server, [DB, Shard, Site]). + +ds_repl_meta(Node, Fun) -> + ds_repl_meta(Node, Fun, []). + +ds_repl_meta(Node, Fun, Args) -> + erpc:call(Node, emqx_ds_replication_layer_meta, Fun, Args). + +ds_repl_shard(Node, Fun, Args) -> + erpc:call(Node, emqx_ds_replication_layer_shard, Fun, Args). + +transitions(Node, DB) -> + Shards = shards(Node, DB), + [{S, T} || S <- Shards, T <- ds_repl_meta(Node, replica_set_transitions, [DB, S])]. + shards(Node, DB) -> erpc:call(Node, emqx_ds_replication_layer_meta, shards, [DB]). shards_online(Node, DB) -> erpc:call(Node, emqx_ds_builtin_db_sup, which_shards, [DB]). +n_shards_online(Node, DB) -> + length(shards_online(Node, DB)). + fill_storage(Node, DB, NMsgs, Opts) -> fill_storage(Node, DB, NMsgs, 0, Opts). -fill_storage(Node, DB, NMsgs, I, Opts = #{p_addgen := PAddGen}) when I < NMsgs -> - R1 = push_message(Node, DB, I), +fill_storage(Node, DB, NMsgs, I, Opts) when I < NMsgs -> + PAddGen = maps:get(p_addgen, Opts, 0.001), + R1 = push_message(Node, DB, I, Opts), R2 = probably(PAddGen, fun() -> add_generation(Node, DB) end), R1 ++ R2 ++ fill_storage(Node, DB, NMsgs, I + 1, Opts); fill_storage(_Node, _DB, NMsgs, NMsgs, _Opts) -> []. -push_message(Node, DB, I) -> +push_message(Node, DB, I, Opts) -> Topic = emqx_topic:join([<<"topic">>, <<"foo">>, integer_to_binary(I)]), {Bytes, _} = rand:bytes_s(120, rand:seed_s(default, I)), - Message = message(Topic, Bytes, I * 100), + ClientId = maps:get(client_id, Opts, <>), + Message = message(ClientId, Topic, Bytes, I * 100), ok = erpc:call(Node, emqx_ds, store_batch, [DB, [Message], #{sync => true}]), [Message]. @@ -126,16 +297,22 @@ add_generation(Node, DB) -> ok = erpc:call(Node, emqx_ds, add_generation, [DB]), []. -message(Topic, Payload, PublishedAt) -> +message(ClientId, Topic, Payload, PublishedAt) -> #message{ - from = <>, + from = ClientId, topic = Topic, payload = Payload, timestamp = PublishedAt, id = emqx_guid:gen() }. -consume(Node, DB, Shard, TopicFilter, StartTime) -> +compare_message(M1, M2) -> + {M1#message.from, M1#message.timestamp} < {M2#message.from, M2#message.timestamp}. + +consume(Node, DB, TopicFilter, StartTime) -> + erpc:call(Node, emqx_ds_test_helpers, consume, [DB, TopicFilter, StartTime]). + +consume_shard(Node, DB, Shard, TopicFilter, StartTime) -> erpc:call(Node, emqx_ds_test_helpers, storage_consume, [{DB, Shard}, TopicFilter, StartTime]). probably(P, Fun) -> @@ -156,26 +333,11 @@ suite() -> [{timetrap, {seconds, 60}}]. all() -> emqx_common_test_helpers:all(?MODULE). -init_per_testcase(TCName, Config) -> - Apps = [ - {emqx_durable_storage, #{ - before_start => fun snabbkaffe:fix_ct_logging/0, - override_env => [{egress_flush_interval, 1}] - }} - ], - WorkDir = emqx_cth_suite:work_dir(TCName, Config), - NodeSpecs = emqx_cth_cluster:mk_nodespecs( - [ - {emqx_ds_replication_SUITE1, #{apps => Apps}}, - {emqx_ds_replication_SUITE2, #{apps => Apps}}, - {emqx_ds_replication_SUITE3, #{apps => Apps}} - ], - #{work_dir => WorkDir} - ), - Nodes = emqx_cth_cluster:start(NodeSpecs), +init_per_testcase(TCName, Config0) -> + Config = emqx_common_test_helpers:init_per_testcase(?MODULE, TCName, Config0), ok = snabbkaffe:start_trace(), - [{nodes, Nodes}, {specs, NodeSpecs} | Config]. + Config. -end_per_testcase(_TCName, Config) -> +end_per_testcase(TCName, Config) -> ok = snabbkaffe:stop(), - ok = emqx_cth_cluster:stop(?config(nodes, Config)). + emqx_common_test_helpers:end_per_testcase(?MODULE, TCName, Config). From ef705c228503ba570e0276f62e45af283203ec19 Mon Sep 17 00:00:00 2001 From: Kjell Winblad Date: Tue, 2 Apr 2024 18:00:27 +0200 Subject: [PATCH 076/234] feat: add apply rule API, clientid/ruleid tracing for rule and connector This commit adds: * Support for forwarding the rule id and client id to the connector so that events such as template rendered successfully can be traced. * HTTP API for for applying/activating a rule with the given context --- .../src/emqx_trace/emqx_trace_handler.erl | 14 +- .../src/emqx_bridge_http_connector.erl | 13 +- .../test/emqx_bridge_http_SUITE.erl | 248 +++++----- .../test/emqx_bridge_http_test_lib.erl | 161 +++++++ .../src/emqx_resource_buffer_worker.erl | 185 +++++-- .../src/emqx_rule_api_schema.erl | 45 +- .../src/emqx_rule_engine_api.erl | 38 ++ .../src/emqx_rule_runtime.erl | 248 +++++++--- .../src/emqx_rule_sqltester.erl | 70 ++- .../emqx_rule_engine_api_rule_apply_SUITE.erl | 451 ++++++++++++++++++ 10 files changed, 1232 insertions(+), 241 deletions(-) create mode 100644 apps/emqx_bridge_http/test/emqx_bridge_http_test_lib.erl create mode 100644 apps/emqx_rule_engine/test/emqx_rule_engine_api_rule_apply_SUITE.erl diff --git a/apps/emqx/src/emqx_trace/emqx_trace_handler.erl b/apps/emqx/src/emqx_trace/emqx_trace_handler.erl index 3af543013..c69809052 100644 --- a/apps/emqx/src/emqx_trace/emqx_trace_handler.erl +++ b/apps/emqx/src/emqx_trace/emqx_trace_handler.erl @@ -135,14 +135,22 @@ running() -> lists:foldl(fun filter_traces/2, [], emqx_logger:get_log_handlers(started)). -spec filter_ruleid(logger:log_event(), {binary(), atom()}) -> logger:log_event() | stop. -filter_ruleid(#{meta := Meta = #{ruleid := RuleId}} = Log, {MatchId, _Name}) -> - filter_ret(RuleId =:= MatchId andalso is_trace(Meta), Log); +filter_ruleid(#{meta := Meta = #{rule_id := RuleId}} = Log, {MatchId, _Name}) -> + RuleIDs = maps:get(rule_ids, Meta, #{}), + IsMatch = (RuleId =:= MatchId) orelse maps:get(MatchId, RuleIDs, false), + filter_ret(IsMatch andalso is_trace(Meta), Log); +filter_ruleid(#{meta := Meta = #{rule_ids := RuleIDs}} = Log, {MatchId, _Name}) -> + filter_ret(maps:get(MatchId, RuleIDs, false) andalso is_trace(Meta), Log); filter_ruleid(_Log, _ExpectId) -> stop. -spec filter_clientid(logger:log_event(), {binary(), atom()}) -> logger:log_event() | stop. filter_clientid(#{meta := Meta = #{clientid := ClientId}} = Log, {MatchId, _Name}) -> - filter_ret(ClientId =:= MatchId andalso is_trace(Meta), Log); + ClientIDs = maps:get(client_ids, Meta, #{}), + IsMatch = (ClientId =:= MatchId) orelse maps:get(MatchId, ClientIDs, false), + filter_ret(IsMatch andalso is_trace(Meta), Log); +filter_clientid(#{meta := Meta = #{client_ids := ClientIDs}} = Log, {MatchId, _Name}) -> + filter_ret(maps:get(MatchId, ClientIDs, false) andalso is_trace(Meta), Log); filter_clientid(_Log, _ExpectId) -> stop. diff --git a/apps/emqx_bridge_http/src/emqx_bridge_http_connector.erl b/apps/emqx_bridge_http/src/emqx_bridge_http_connector.erl index ae1e727ca..99222aa00 100644 --- a/apps/emqx_bridge_http/src/emqx_bridge_http_connector.erl +++ b/apps/emqx_bridge_http/src/emqx_bridge_http_connector.erl @@ -661,13 +661,22 @@ process_request_and_action(Request, ActionState, Msg) -> ), BodyTemplate = maps:get(body, ActionState), Body = render_request_body(BodyTemplate, RenderTmplFunc, Msg), - #{ + RenderResult = #{ method => Method, path => Path, body => Body, headers => Headers, request_timeout => maps:get(request_timeout, ActionState) - }. + }, + ?TRACE( + "QUERY_RENDER", + "http_connector_successfully_rendered_request", + #{ + request => Request, + render_result => RenderResult + } + ), + RenderResult. merge_proplist(Proplist1, Proplist2) -> lists:foldl( diff --git a/apps/emqx_bridge_http/test/emqx_bridge_http_SUITE.erl b/apps/emqx_bridge_http/test/emqx_bridge_http_SUITE.erl index 3da04012d..ab0d5bb55 100644 --- a/apps/emqx_bridge_http/test/emqx_bridge_http_SUITE.erl +++ b/apps/emqx_bridge_http/test/emqx_bridge_http_SUITE.erl @@ -30,8 +30,8 @@ -include_lib("snabbkaffe/include/snabbkaffe.hrl"). -include_lib("emqx/include/asserts.hrl"). --define(BRIDGE_TYPE, <<"webhook">>). --define(BRIDGE_NAME, atom_to_binary(?MODULE)). +-define(BRIDGE_TYPE, emqx_bridge_http_test_lib:bridge_type()). +-define(BRIDGE_NAME, emqx_bridge_http_test_lib:bridge_name()). all() -> emqx_common_test_helpers:all(?MODULE). @@ -73,21 +73,10 @@ suite() -> init_per_testcase(t_bad_bridge_config, Config) -> Config; -init_per_testcase(t_send_async_connection_timeout, Config) -> - HTTPPath = <<"/path">>, - ServerSSLOpts = false, - {ok, {HTTPPort, _Pid}} = emqx_bridge_http_connector_test_server:start_link( - _Port = random, HTTPPath, ServerSSLOpts - ), - ResponseDelayMS = 500, - ok = emqx_bridge_http_connector_test_server:set_handler( - success_http_handler(#{response_delay => ResponseDelayMS}) - ), - [ - {http_server, #{port => HTTPPort, path => HTTPPath}}, - {response_delay_ms, ResponseDelayMS} - | Config - ]; +init_per_testcase(Case, Config) when + Case =:= t_send_async_connection_timeout orelse Case =:= t_send_get_trace_messages +-> + emqx_bridge_http_test_lib:init_http_success_server(Config); init_per_testcase(t_path_not_found, Config) -> HTTPPath = <<"/nonexisting/path">>, ServerSSLOpts = false, @@ -115,7 +104,9 @@ init_per_testcase(t_bridge_probes_header_atoms, Config) -> {ok, {HTTPPort, _Pid}} = emqx_bridge_http_connector_test_server:start_link( _Port = random, HTTPPath, ServerSSLOpts ), - ok = emqx_bridge_http_connector_test_server:set_handler(success_http_handler()), + ok = emqx_bridge_http_connector_test_server:set_handler( + emqx_bridge_http_test_lib:success_http_handler() + ), [{http_server, #{port => HTTPPort, path => HTTPPath}} | Config]; init_per_testcase(_TestCase, Config) -> Server = start_http_server(#{response_delay_ms => 0}), @@ -126,7 +117,8 @@ end_per_testcase(TestCase, _Config) when TestCase =:= t_too_many_requests; TestCase =:= t_rule_action_expired; TestCase =:= t_bridge_probes_header_atoms; - TestCase =:= t_send_async_connection_timeout + TestCase =:= t_send_async_connection_timeout; + TestCase =:= t_send_get_trace_messages -> ok = emqx_bridge_http_connector_test_server:stop(), persistent_term:erase({?MODULE, times_called}), @@ -250,115 +242,8 @@ get_metrics(Name) -> Type = <<"http">>, emqx_bridge:get_metrics(Type, Name). -bridge_async_config(#{port := Port} = Config) -> - Type = maps:get(type, Config, ?BRIDGE_TYPE), - Name = maps:get(name, Config, ?BRIDGE_NAME), - Host = maps:get(host, Config, "localhost"), - Path = maps:get(path, Config, ""), - PoolSize = maps:get(pool_size, Config, 1), - QueryMode = maps:get(query_mode, Config, "async"), - ConnectTimeout = maps:get(connect_timeout, Config, "1s"), - RequestTimeout = maps:get(request_timeout, Config, "10s"), - ResumeInterval = maps:get(resume_interval, Config, "1s"), - HealthCheckInterval = maps:get(health_check_interval, Config, "200ms"), - ResourceRequestTTL = maps:get(resource_request_ttl, Config, "infinity"), - LocalTopic = - case maps:find(local_topic, Config) of - {ok, LT} -> - lists:flatten(["local_topic = \"", LT, "\""]); - error -> - "" - end, - ConfigString = io_lib:format( - "bridges.~s.~s {\n" - " url = \"http://~s:~p~s\"\n" - " connect_timeout = \"~p\"\n" - " enable = true\n" - %% local_topic - " ~s\n" - " enable_pipelining = 100\n" - " max_retries = 2\n" - " method = \"post\"\n" - " pool_size = ~p\n" - " pool_type = \"random\"\n" - " request_timeout = \"~s\"\n" - " body = \"${id}\"\n" - " resource_opts {\n" - " inflight_window = 100\n" - " health_check_interval = \"~s\"\n" - " max_buffer_bytes = \"1GB\"\n" - " query_mode = \"~s\"\n" - " request_ttl = \"~p\"\n" - " resume_interval = \"~s\"\n" - " start_after_created = \"true\"\n" - " start_timeout = \"5s\"\n" - " worker_pool_size = \"1\"\n" - " }\n" - " ssl {\n" - " enable = false\n" - " }\n" - "}\n", - [ - Type, - Name, - Host, - Port, - Path, - ConnectTimeout, - LocalTopic, - PoolSize, - RequestTimeout, - HealthCheckInterval, - QueryMode, - ResourceRequestTTL, - ResumeInterval - ] - ), - ct:pal(ConfigString), - parse_and_check(ConfigString, Type, Name). - -parse_and_check(ConfigString, BridgeType, Name) -> - {ok, RawConf} = hocon:binary(ConfigString, #{format => map}), - hocon_tconf:check_plain(emqx_bridge_schema, RawConf, #{required => false, atom_key => false}), - #{<<"bridges">> := #{BridgeType := #{Name := RetConfig}}} = RawConf, - RetConfig. - make_bridge(Config) -> - Type = ?BRIDGE_TYPE, - Name = ?BRIDGE_NAME, - BridgeConfig = bridge_async_config(Config#{ - name => Name, - type => Type - }), - {ok, _} = emqx_bridge:create( - Type, - Name, - BridgeConfig - ), - emqx_bridge_resource:bridge_id(Type, Name). - -success_http_handler() -> - success_http_handler(#{response_delay => 0}). - -success_http_handler(Opts) -> - ResponseDelay = maps:get(response_delay, Opts, 0), - TestPid = self(), - fun(Req0, State) -> - {ok, Body, Req} = cowboy_req:read_body(Req0), - Headers = cowboy_req:headers(Req), - ct:pal("http request received: ~p", [ - #{body => Body, headers => Headers, response_delay => ResponseDelay} - ]), - ResponseDelay > 0 andalso timer:sleep(ResponseDelay), - TestPid ! {http, Headers, Body}, - Rep = cowboy_req:reply( - 200, - #{<<"content-type">> => <<"text/plain">>}, - <<"hello">>, - Req - ), - {ok, Rep, State} - end. + emqx_bridge_http_test_lib:make_bridge(Config). not_found_http_handler() -> TestPid = self(), @@ -452,6 +337,103 @@ t_send_async_connection_timeout(Config) -> receive_request_notifications(MessageIDs, ResponseDelayMS, []), ok. +t_send_get_trace_messages(Config) -> + ResponseDelayMS = ?config(response_delay_ms, Config), + #{port := Port, path := Path} = ?config(http_server, Config), + BridgeID = make_bridge(#{ + port => Port, + path => Path, + pool_size => 1, + query_mode => "async", + connect_timeout => integer_to_list(ResponseDelayMS * 2) ++ "ms", + request_timeout => "10s", + resume_interval => "200ms", + health_check_interval => "200ms", + resource_request_ttl => "infinity" + }), + RuleTopic = iolist_to_binary([<<"my_rule_topic/">>, atom_to_binary(?FUNCTION_NAME)]), + SQL = <<"SELECT payload.id as id FROM \"", RuleTopic/binary, "\"">>, + {ok, #{<<"id">> := RuleId}} = + emqx_bridge_testlib:create_rule_and_action_http( + ?BRIDGE_TYPE, + RuleTopic, + Config, + #{sql => SQL} + ), + %% =================================== + %% Create trace for RuleId + %% =================================== + Now = erlang:system_time(second) - 10, + Start = Now, + End = Now + 60, + TraceName = atom_to_binary(?FUNCTION_NAME), + Trace = #{ + name => TraceName, + type => ruleid, + ruleid => RuleId, + start_at => Start, + end_at => End + }, + emqx_trace_SUITE:reload(), + ok = emqx_trace:clear(), + {ok, _} = emqx_trace:create(Trace), + %% =================================== + + ResourceId = emqx_bridge_resource:resource_id(BridgeID), + ?retry( + _Interval0 = 200, + _NAttempts0 = 20, + ?assertMatch({ok, connected}, emqx_resource_manager:health_check(ResourceId)) + ), + ?retry( + _Interval0 = 200, + _NAttempts0 = 20, + ?assertEqual(<<>>, read_rule_trace_file(TraceName, Now)) + ), + Msg = emqx_message:make(RuleTopic, <<"{\"id\": 1}">>), + emqx:publish(Msg), + ?retry( + _Interval = 500, + _NAttempts = 20, + ?assertMatch( + #{ + counters := #{ + 'matched' := 1, + 'actions.failed' := 0, + 'actions.failed.unknown' := 0, + 'actions.success' := 1, + 'actions.total' := 1 + } + }, + emqx_metrics_worker:get_metrics(rule_metrics, RuleId) + ) + ), + + ok = emqx_trace_handler_SUITE:filesync(TraceName, ruleid), + {ok, Bin} = file:read_file(emqx_trace:log_file(TraceName, Now)), + + ?retry( + _Interval0 = 200, + _NAttempts0 = 20, + begin + Bin = read_rule_trace_file(TraceName, Now), + ?assertNotEqual(nomatch, binary:match(Bin, [<<"rule_activated">>])), + ?assertNotEqual(nomatch, binary:match(Bin, [<<"SELECT_yielded_result">>])), + ?assertNotEqual(nomatch, binary:match(Bin, [<<"bridge_action">>])), + ?assertNotEqual(nomatch, binary:match(Bin, [<<"action_activated">>])), + ?assertNotEqual(nomatch, binary:match(Bin, [<<"successfully_rendered_request">>])), + ?assertNotEqual(nomatch, binary:match(Bin, [<<"QUERY_ASYNC">>])) + end + ), + emqx_trace:delete(TraceName), + ok. + +read_rule_trace_file(TraceName, From) -> + emqx_trace:check(), + ok = emqx_trace_handler_SUITE:filesync(TraceName, ruleid), + {ok, Bin} = file:read_file(emqx_trace:log_file(TraceName, From)), + Bin. + t_async_free_retries(Config) -> #{port := Port} = ?config(http_server, Config), _BridgeID = make_bridge(#{ @@ -518,7 +500,7 @@ t_async_common_retries(Config) -> ok. t_bad_bridge_config(_Config) -> - BridgeConfig = bridge_async_config(#{port => 12345}), + BridgeConfig = emqx_bridge_http_test_lib:bridge_async_config(#{port => 12345}), ?assertMatch( {ok, {{_, 201, _}, _Headers, #{ @@ -540,7 +522,7 @@ t_bad_bridge_config(_Config) -> t_start_stop(Config) -> #{port := Port} = ?config(http_server, Config), - BridgeConfig = bridge_async_config(#{ + BridgeConfig = emqx_bridge_http_test_lib:bridge_async_config(#{ type => ?BRIDGE_TYPE, name => ?BRIDGE_NAME, port => Port @@ -554,7 +536,7 @@ t_path_not_found(Config) -> begin #{port := Port, path := Path} = ?config(http_server, Config), MQTTTopic = <<"t/webhook">>, - BridgeConfig = bridge_async_config(#{ + BridgeConfig = emqx_bridge_http_test_lib:bridge_async_config(#{ type => ?BRIDGE_TYPE, name => ?BRIDGE_NAME, local_topic => MQTTTopic, @@ -593,7 +575,7 @@ t_too_many_requests(Config) -> begin #{port := Port, path := Path} = ?config(http_server, Config), MQTTTopic = <<"t/webhook">>, - BridgeConfig = bridge_async_config(#{ + BridgeConfig = emqx_bridge_http_test_lib:bridge_async_config(#{ type => ?BRIDGE_TYPE, name => ?BRIDGE_NAME, local_topic => MQTTTopic, @@ -633,7 +615,7 @@ t_rule_action_expired(Config) -> ?check_trace( begin RuleTopic = <<"t/webhook/rule">>, - BridgeConfig = bridge_async_config(#{ + BridgeConfig = emqx_bridge_http_test_lib:bridge_async_config(#{ type => ?BRIDGE_TYPE, name => ?BRIDGE_NAME, host => "non.existent.host", @@ -689,7 +671,7 @@ t_bridge_probes_header_atoms(Config) -> ?check_trace( begin LocalTopic = <<"t/local/topic">>, - BridgeConfig0 = bridge_async_config(#{ + BridgeConfig0 = emqx_bridge_http_test_lib:bridge_async_config(#{ type => ?BRIDGE_TYPE, name => ?BRIDGE_NAME, port => Port, diff --git a/apps/emqx_bridge_http/test/emqx_bridge_http_test_lib.erl b/apps/emqx_bridge_http/test/emqx_bridge_http_test_lib.erl new file mode 100644 index 000000000..4959a24c3 --- /dev/null +++ b/apps/emqx_bridge_http/test/emqx_bridge_http_test_lib.erl @@ -0,0 +1,161 @@ +%%-------------------------------------------------------------------- +%% Copyright (c) 2020-2024 EMQ Technologies Co., Ltd. All Rights Reserved. +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. +%%-------------------------------------------------------------------- + +-module(emqx_bridge_http_test_lib). + +-export([ + bridge_type/0, + bridge_name/0, + make_bridge/1, + bridge_async_config/1, + init_http_success_server/1, + success_http_handler/0 +]). + +-define(BRIDGE_TYPE, bridge_type()). +-define(BRIDGE_NAME, bridge_name()). + +bridge_type() -> + <<"webhook">>. + +bridge_name() -> + atom_to_binary(?MODULE). + +make_bridge(Config) -> + Type = ?BRIDGE_TYPE, + Name = ?BRIDGE_NAME, + BridgeConfig = bridge_async_config(Config#{ + name => Name, + type => Type + }), + {ok, _} = emqx_bridge:create( + Type, + Name, + BridgeConfig + ), + emqx_bridge_resource:bridge_id(Type, Name). + +bridge_async_config(#{port := Port} = Config) -> + Type = maps:get(type, Config, ?BRIDGE_TYPE), + Name = maps:get(name, Config, ?BRIDGE_NAME), + Host = maps:get(host, Config, "localhost"), + Path = maps:get(path, Config, ""), + PoolSize = maps:get(pool_size, Config, 1), + QueryMode = maps:get(query_mode, Config, "async"), + ConnectTimeout = maps:get(connect_timeout, Config, "1s"), + RequestTimeout = maps:get(request_timeout, Config, "10s"), + ResumeInterval = maps:get(resume_interval, Config, "1s"), + HealthCheckInterval = maps:get(health_check_interval, Config, "200ms"), + ResourceRequestTTL = maps:get(resource_request_ttl, Config, "infinity"), + LocalTopic = + case maps:find(local_topic, Config) of + {ok, LT} -> + lists:flatten(["local_topic = \"", LT, "\""]); + error -> + "" + end, + ConfigString = io_lib:format( + "bridges.~s.~s {\n" + " url = \"http://~s:~p~s\"\n" + " connect_timeout = \"~p\"\n" + " enable = true\n" + %% local_topic + " ~s\n" + " enable_pipelining = 100\n" + " max_retries = 2\n" + " method = \"post\"\n" + " pool_size = ~p\n" + " pool_type = \"random\"\n" + " request_timeout = \"~s\"\n" + " body = \"${id}\"\n" + " resource_opts {\n" + " inflight_window = 100\n" + " health_check_interval = \"~s\"\n" + " max_buffer_bytes = \"1GB\"\n" + " query_mode = \"~s\"\n" + " request_ttl = \"~p\"\n" + " resume_interval = \"~s\"\n" + " start_after_created = \"true\"\n" + " start_timeout = \"5s\"\n" + " worker_pool_size = \"1\"\n" + " }\n" + " ssl {\n" + " enable = false\n" + " }\n" + "}\n", + [ + Type, + Name, + Host, + Port, + Path, + ConnectTimeout, + LocalTopic, + PoolSize, + RequestTimeout, + HealthCheckInterval, + QueryMode, + ResourceRequestTTL, + ResumeInterval + ] + ), + ct:pal(ConfigString), + parse_and_check(ConfigString, Type, Name). + +parse_and_check(ConfigString, BridgeType, Name) -> + {ok, RawConf} = hocon:binary(ConfigString, #{format => map}), + hocon_tconf:check_plain(emqx_bridge_schema, RawConf, #{required => false, atom_key => false}), + #{<<"bridges">> := #{BridgeType := #{Name := RetConfig}}} = RawConf, + RetConfig. + +success_http_handler() -> + success_http_handler(#{response_delay => 0}). + +success_http_handler(Opts) -> + ResponseDelay = maps:get(response_delay, Opts, 0), + TestPid = self(), + fun(Req0, State) -> + {ok, Body, Req} = cowboy_req:read_body(Req0), + Headers = cowboy_req:headers(Req), + ct:pal("http request received: ~p", [ + #{body => Body, headers => Headers, response_delay => ResponseDelay} + ]), + ResponseDelay > 0 andalso timer:sleep(ResponseDelay), + TestPid ! {http, Headers, Body}, + Rep = cowboy_req:reply( + 200, + #{<<"content-type">> => <<"text/plain">>}, + <<"hello">>, + Req + ), + {ok, Rep, State} + end. + +init_http_success_server(Config) -> + HTTPPath = <<"/path">>, + ServerSSLOpts = false, + {ok, {HTTPPort, _Pid}} = emqx_bridge_http_connector_test_server:start_link( + _Port = random, HTTPPath, ServerSSLOpts + ), + ResponseDelayMS = 500, + ok = emqx_bridge_http_connector_test_server:set_handler( + success_http_handler(#{response_delay => ResponseDelayMS}) + ), + [ + {http_server, #{port => HTTPPort, path => HTTPPath}}, + {response_delay_ms, ResponseDelayMS}, + {bridge_name, ?BRIDGE_NAME} + | Config + ]. diff --git a/apps/emqx_resource/src/emqx_resource_buffer_worker.erl b/apps/emqx_resource/src/emqx_resource_buffer_worker.erl index 37f9369ff..6dfcde88c 100644 --- a/apps/emqx_resource/src/emqx_resource_buffer_worker.erl +++ b/apps/emqx_resource/src/emqx_resource_buffer_worker.erl @@ -64,8 +64,10 @@ -define(COLLECT_REQ_LIMIT, 1000). -define(SEND_REQ(FROM, REQUEST), {'$send_req', FROM, REQUEST}). --define(QUERY(FROM, REQUEST, SENT, EXPIRE_AT), {query, FROM, REQUEST, SENT, EXPIRE_AT}). --define(SIMPLE_QUERY(FROM, REQUEST), ?QUERY(FROM, REQUEST, false, infinity)). +-define(QUERY(FROM, REQUEST, SENT, EXPIRE_AT, TRACE_CTX), + {query, FROM, REQUEST, SENT, EXPIRE_AT, TRACE_CTX} +). +-define(SIMPLE_QUERY(FROM, REQUEST, TRACE_CTX), ?QUERY(FROM, REQUEST, false, infinity, TRACE_CTX)). -define(REPLY(FROM, SENT, RESULT), {reply, FROM, SENT, RESULT}). -define(INFLIGHT_ITEM(Ref, BatchOrQuery, IsRetriable, AsyncWorkerMRef), {Ref, BatchOrQuery, IsRetriable, AsyncWorkerMRef} @@ -77,7 +79,10 @@ -type id() :: binary(). -type index() :: pos_integer(). -type expire_at() :: infinity | integer(). --type queue_query() :: ?QUERY(reply_fun(), request(), HasBeenSent :: boolean(), expire_at()). +-type trace_context() :: map() | undefined. +-type queue_query() :: ?QUERY( + reply_fun(), request(), HasBeenSent :: boolean(), expire_at(), TraceCtx :: trace_context() +). -type request() :: term(). -type request_from() :: undefined | gen_statem:from(). -type timeout_ms() :: emqx_schema:timeout_duration_ms(). @@ -154,7 +159,10 @@ simple_sync_query(Id, Request, QueryOpts0) -> emqx_resource_metrics:matched_inc(Id), Ref = make_request_ref(), ReplyTo = maps:get(reply_to, QueryOpts0, undefined), - Result = call_query(force_sync, Id, Index, Ref, ?SIMPLE_QUERY(ReplyTo, Request), QueryOpts), + TraceCtx = maps:get(trace_ctx, QueryOpts0, undefined), + Result = call_query( + force_sync, Id, Index, Ref, ?SIMPLE_QUERY(ReplyTo, Request, TraceCtx), QueryOpts + ), _ = handle_query_result(Id, Result, _HasBeenSent = false), Result. @@ -167,8 +175,9 @@ simple_async_query(Id, Request, QueryOpts0) -> emqx_resource_metrics:matched_inc(Id), Ref = make_request_ref(), ReplyTo = maps:get(reply_to, QueryOpts0, undefined), + TraceCtx = maps:get(trace_ctx, QueryOpts0, undefined), Result = call_query( - async_if_possible, Id, Index, Ref, ?SIMPLE_QUERY(ReplyTo, Request), QueryOpts + async_if_possible, Id, Index, Ref, ?SIMPLE_QUERY(ReplyTo, Request, TraceCtx), QueryOpts ), _ = handle_query_result(Id, Result, _HasBeenSent = false), Result. @@ -439,10 +448,10 @@ retry_inflight_sync(Ref, QueryOrBatch, Data0) -> Result = call_query(force_sync, Id, Index, Ref, QueryOrBatch, QueryOpts), {ShouldAck, PostFn, DeltaCounters} = case QueryOrBatch of - ?QUERY(ReplyTo, _, HasBeenSent, _ExpireAt) -> + ?QUERY(ReplyTo, _, HasBeenSent, _ExpireAt, _TraceCtx) -> Reply = ?REPLY(ReplyTo, HasBeenSent, Result), reply_caller_defer_metrics(Id, Reply, QueryOpts); - [?QUERY(_, _, _, _) | _] = Batch -> + [?QUERY(_, _, _, _, _) | _] = Batch -> batch_reply_caller_defer_metrics(Id, Result, Batch, QueryOpts) end, Data1 = aggregate_counters(Data0, DeltaCounters), @@ -501,11 +510,13 @@ collect_and_enqueue_query_requests(Request0, Data0) -> ReplyFun = maps:get(async_reply_fun, Opts, undefined), HasBeenSent = false, ExpireAt = maps:get(expire_at, Opts), - ?QUERY(ReplyFun, Req, HasBeenSent, ExpireAt); + TraceCtx = maps:get(trace_ctx, Opts, undefined), + ?QUERY(ReplyFun, Req, HasBeenSent, ExpireAt, TraceCtx); (?SEND_REQ(ReplyTo, {query, Req, Opts})) -> HasBeenSent = false, ExpireAt = maps:get(expire_at, Opts), - ?QUERY(ReplyTo, Req, HasBeenSent, ExpireAt) + TraceCtx = maps:get(trace_ctx, Opts, undefined), + ?QUERY(ReplyTo, Req, HasBeenSent, ExpireAt, TraceCtx) end, Requests ), @@ -515,7 +526,7 @@ collect_and_enqueue_query_requests(Request0, Data0) -> reply_overflown([]) -> ok; -reply_overflown([?QUERY(ReplyTo, _Req, _HasBeenSent, _ExpireAt) | More]) -> +reply_overflown([?QUERY(ReplyTo, _Req, _HasBeenSent, _ExpireAt, _TraceCtx) | More]) -> do_reply_caller(ReplyTo, {error, buffer_overflow}), reply_overflown(More). @@ -630,7 +641,7 @@ do_flush( inflight_tid := InflightTID } = Data0, %% unwrap when not batching (i.e., batch size == 1) - [?QUERY(ReplyTo, _, HasBeenSent, _ExpireAt) = Request] = Batch, + [?QUERY(ReplyTo, _, HasBeenSent, _ExpireAt, _TraceCtx) = Request] = Batch, QueryOpts = #{inflight_tid => InflightTID, simple_query => false}, Result = call_query(async_if_possible, Id, Index, Ref, Request, QueryOpts), Reply = ?REPLY(ReplyTo, HasBeenSent, Result), @@ -824,14 +835,14 @@ batch_reply_caller_defer_metrics(Id, BatchResult, Batch, QueryOpts) -> expand_batch_reply(BatchResults, Batch) when is_list(BatchResults) -> lists:map( - fun({?QUERY(FROM, _REQUEST, SENT, _EXPIRE_AT), Result}) -> + fun({?QUERY(FROM, _REQUEST, SENT, _EXPIRE_AT, _TraceCtx), Result}) -> ?REPLY(FROM, SENT, Result) end, lists:zip(Batch, BatchResults) ); expand_batch_reply(BatchResult, Batch) -> lists:map( - fun(?QUERY(FROM, _REQUEST, SENT, _EXPIRE_AT)) -> + fun(?QUERY(FROM, _REQUEST, SENT, _EXPIRE_AT, _TraceCtx)) -> ?REPLY(FROM, SENT, BatchResult) end, Batch @@ -880,7 +891,7 @@ reply_dropped(_ReplyTo, _Result) -> -spec batch_reply_dropped([queue_query()], {error, late_reply | request_expired}) -> ok. batch_reply_dropped(Batch, Result) -> lists:foreach( - fun(?QUERY(ReplyTo, _CoreReq, _HasBeenSent, _ExpireAt)) -> + fun(?QUERY(ReplyTo, _CoreReq, _HasBeenSent, _ExpireAt, _TraceCtx)) -> reply_dropped(ReplyTo, Result) end, Batch @@ -1093,11 +1104,80 @@ call_query(QM, Id, Index, Ref, Query, QueryOpts) -> {ok, _Group, #{status := ?status_connecting, error := unhealthy_target}} -> {error, {unrecoverable_error, unhealthy_target}}; {ok, _Group, Resource} -> - do_call_query(QM, Id, Index, Ref, Query, QueryOpts, Resource); + set_rule_id_trace_meta_data(Query), + QueryResult = do_call_query(QM, Id, Index, Ref, Query, QueryOpts, Resource), + %% do_call_query does not throw an exception as the call to the + %% resource is wrapped in a try catch expression so we will always + %% unset the trace meta data + unset_rule_id_trace_meta_data(), + QueryResult; {error, not_found} -> ?RESOURCE_ERROR(not_found, "resource not found") end. +set_rule_id_trace_meta_data(Requests) when is_list(Requests) -> + %% Get the rule ids from requests + RuleIDs = lists:foldl(fun collect_rule_id/2, #{}, Requests), + ClientIDs = lists:foldl(fun collect_client_id/2, #{}, Requests), + StopAfterRender = lists:foldl(fun collect_stop_after_render/2, no_info, Requests), + StopAfterRenderVal = + case StopAfterRender of + only_true -> + logger:update_process_metadata(#{stop_action_after_render => false}), + true; + only_false -> + false; + mixed -> + ?TRACE( + warning, + "ACTION", + "mixed_stop_action_after_render_batch " + "(A batch will be sent to connector where some but " + "not all requests has stop_action_after_render set. " + "The batch will get assigned " + "stop_action_after_render = false)", + #{rule_ids => RuleIDs, client_ids => ClientIDs} + ), + false + end, + logger:update_process_metadata(#{ + rule_ids => RuleIDs, client_ids => ClientIDs, stop_action_after_render => StopAfterRenderVal + }), + ok; +set_rule_id_trace_meta_data(Request) -> + set_rule_id_trace_meta_data([Request]), + ok. + +collect_rule_id(?QUERY(_, _, _, _, #{rule_id := RuleId}), Acc) -> + Acc#{RuleId => true}; +collect_rule_id(?QUERY(_, _, _, _, _), Acc) -> + Acc. + +collect_client_id(?QUERY(_, _, _, _, #{clientid := ClientId}), Acc) -> + Acc#{ClientId => true}; +collect_client_id(?QUERY(_, _, _, _, _), Acc) -> + Acc. + +collect_stop_after_render(?QUERY(_, _, _, _, #{stop_action_after_render := true}), no_info) -> + only_true; +collect_stop_after_render(?QUERY(_, _, _, _, #{stop_action_after_render := true}), only_true) -> + only_true; +collect_stop_after_render(?QUERY(_, _, _, _, #{stop_action_after_render := true}), only_false) -> + mixed; +collect_stop_after_render(?QUERY(_, _, _, _, _), no_info) -> + only_false; +collect_stop_after_render(?QUERY(_, _, _, _, _), only_true) -> + mixed; +collect_stop_after_render(?QUERY(_, _, _, _, _), only_false) -> + only_false; +collect_stop_after_render(?QUERY(_, _, _, _, _), mixed) -> + mixed. + +unset_rule_id_trace_meta_data() -> + logger:update_process_metadata(#{ + rule_ids => #{}, client_ids => #{}, stop_action_after_render => false + }). + %% action:kafka_producer:myproducer1:connector:kafka_producer:mykakfaclient1 extract_connector_id(Id) when is_binary(Id) -> case binary:split(Id, <<":">>, [global]) of @@ -1208,7 +1288,15 @@ do_call_query(_QM, _Id, _Index, _Ref, _Query, _QueryOpts, _Data) -> ). apply_query_fun( - sync, Mod, Id, _Index, _Ref, ?QUERY(_, Request, _, _) = _Query, ResSt, Channels, QueryOpts + sync, + Mod, + Id, + _Index, + _Ref, + ?QUERY(_, Request, _, _, _TraceCtx) = _Query, + ResSt, + Channels, + QueryOpts ) -> ?tp(call_query, #{id => Id, mod => Mod, query => _Query, res_st => ResSt, call_mode => sync}), maybe_reply_to( @@ -1227,7 +1315,15 @@ apply_query_fun( QueryOpts ); apply_query_fun( - async, Mod, Id, Index, Ref, ?QUERY(_, Request, _, _) = Query, ResSt, Channels, QueryOpts + async, + Mod, + Id, + Index, + Ref, + ?QUERY(_, Request, _, _, _TraceCtx) = Query, + ResSt, + Channels, + QueryOpts ) -> ?tp(call_query_async, #{ id => Id, mod => Mod, query => Query, res_st => ResSt, call_mode => async @@ -1268,7 +1364,7 @@ apply_query_fun( Id, _Index, _Ref, - [?QUERY(_, FirstRequest, _, _) | _] = Batch, + [?QUERY(_, FirstRequest, _, _, _) | _] = Batch, ResSt, Channels, QueryOpts @@ -1276,7 +1372,9 @@ apply_query_fun( ?tp(call_batch_query, #{ id => Id, mod => Mod, batch => Batch, res_st => ResSt, call_mode => sync }), - Requests = lists:map(fun(?QUERY(_ReplyTo, Request, _, _ExpireAt)) -> Request end, Batch), + Requests = lists:map( + fun(?QUERY(_ReplyTo, Request, _, _ExpireAt, _TraceCtx)) -> Request end, Batch + ), maybe_reply_to( ?APPLY_RESOURCE( call_batch_query, @@ -1298,7 +1396,7 @@ apply_query_fun( Id, Index, Ref, - [?QUERY(_, FirstRequest, _, _) | _] = Batch, + [?QUERY(_, FirstRequest, _, _, _) | _] = Batch, ResSt, Channels, QueryOpts @@ -1321,7 +1419,7 @@ apply_query_fun( min_batch => minimize(Batch) }, Requests = lists:map( - fun(?QUERY(_ReplyTo, Request, _, _ExpireAt)) -> Request end, Batch + fun(?QUERY(_ReplyTo, Request, _, _ExpireAt, _TraceCtx)) -> Request end, Batch ), IsRetriable = false, AsyncWorkerMRef = undefined, @@ -1367,7 +1465,7 @@ handle_async_reply1( inflight_tid := InflightTID, resource_id := Id, buffer_worker := BufferWorkerPid, - min_query := ?QUERY(ReplyTo, _, _, ExpireAt) = _Query + min_query := ?QUERY(ReplyTo, _, _, ExpireAt, _TraceCtx) = _Query } = ReplyContext, Result ) -> @@ -1399,7 +1497,7 @@ do_handle_async_reply( request_ref := Ref, buffer_worker := BufferWorkerPid, inflight_tid := InflightTID, - min_query := ?QUERY(ReplyTo, _, Sent, _ExpireAt) = _Query + min_query := ?QUERY(ReplyTo, _, Sent, _ExpireAt, _TraceCtx) = _Query }, Result ) -> @@ -1486,13 +1584,13 @@ handle_async_batch_reply2([Inflight], ReplyContext, Results0, Now) -> %% So we just take the original flag from the ReplyContext batch %% and put it back to the batch found in inflight table %% which must have already been set to `false` - [?QUERY(_ReplyTo, _, HasBeenSent, _ExpireAt) | _] = Batch, + [?QUERY(_ReplyTo, _, HasBeenSent, _ExpireAt, _TraceCtx) | _] = Batch, {RealNotExpired0, RealExpired, Results} = sieve_expired_requests_with_results(RealBatch, Now, Results0), RealNotExpired = lists:map( - fun(?QUERY(ReplyTo, CoreReq, _HasBeenSent, ExpireAt)) -> - ?QUERY(ReplyTo, CoreReq, HasBeenSent, ExpireAt) + fun(?QUERY(ReplyTo, CoreReq, _HasBeenSent, ExpireAt, TraceCtx)) -> + ?QUERY(ReplyTo, CoreReq, HasBeenSent, ExpireAt, TraceCtx) end, RealNotExpired0 ), @@ -1678,7 +1776,10 @@ inflight_get_first_retriable(InflightTID, Now) -> case ets:select(InflightTID, MatchSpec, _Limit = 1) of '$end_of_table' -> none; - {[{Ref, Query = ?QUERY(_ReplyTo, _CoreReq, _HasBeenSent, ExpireAt)}], _Continuation} -> + { + [{Ref, Query = ?QUERY(_ReplyTo, _CoreReq, _HasBeenSent, ExpireAt, _TraceCtx)}], + _Continuation + } -> case is_expired(ExpireAt, Now) of true -> {expired, Ref, [Query]}; @@ -1714,7 +1815,7 @@ inflight_append(undefined, _InflightItem) -> ok; inflight_append( InflightTID, - ?INFLIGHT_ITEM(Ref, [?QUERY(_, _, _, _) | _] = Batch0, IsRetriable, AsyncWorkerMRef) + ?INFLIGHT_ITEM(Ref, [?QUERY(_, _, _, _, _) | _] = Batch0, IsRetriable, AsyncWorkerMRef) ) -> Batch = mark_as_sent(Batch0), InflightItem = ?INFLIGHT_ITEM(Ref, Batch, IsRetriable, AsyncWorkerMRef), @@ -1726,7 +1827,10 @@ inflight_append( inflight_append( InflightTID, ?INFLIGHT_ITEM( - Ref, ?QUERY(_ReplyTo, _Req, _HasBeenSent, _ExpireAt) = Query0, IsRetriable, AsyncWorkerMRef + Ref, + ?QUERY(_ReplyTo, _Req, _HasBeenSent, _ExpireAt, _TraceCtx) = Query0, + IsRetriable, + AsyncWorkerMRef ) ) -> Query = mark_as_sent(Query0), @@ -1790,9 +1894,13 @@ ack_inflight(undefined, _Ref, _BufferWorkerPid) -> ack_inflight(InflightTID, Ref, BufferWorkerPid) -> {Count, Removed} = case ets:take(InflightTID, Ref) of - [?INFLIGHT_ITEM(Ref, ?QUERY(_, _, _, _), _IsRetriable, _AsyncWorkerMRef)] -> + [?INFLIGHT_ITEM(Ref, ?QUERY(_, _, _, _, _), _IsRetriable, _AsyncWorkerMRef)] -> {1, true}; - [?INFLIGHT_ITEM(Ref, [?QUERY(_, _, _, _) | _] = Batch, _IsRetriable, _AsyncWorkerMRef)] -> + [ + ?INFLIGHT_ITEM( + Ref, [?QUERY(_, _, _, _, _) | _] = Batch, _IsRetriable, _AsyncWorkerMRef + ) + ] -> {length(Batch), true}; [] -> {0, false} @@ -1942,9 +2050,9 @@ do_collect_requests(Acc, Count, Limit) -> mark_as_sent(Batch) when is_list(Batch) -> lists:map(fun mark_as_sent/1, Batch); -mark_as_sent(?QUERY(ReplyTo, Req, _HasBeenSent, ExpireAt)) -> +mark_as_sent(?QUERY(ReplyTo, Req, _HasBeenSent, ExpireAt, TraceCtx)) -> HasBeenSent = true, - ?QUERY(ReplyTo, Req, HasBeenSent, ExpireAt). + ?QUERY(ReplyTo, Req, HasBeenSent, ExpireAt, TraceCtx). is_unrecoverable_error({error, {unrecoverable_error, _}}) -> true; @@ -1967,7 +2075,7 @@ is_async_return(_) -> sieve_expired_requests(Batch, Now) -> lists:partition( - fun(?QUERY(_ReplyTo, _CoreReq, _HasBeenSent, ExpireAt)) -> + fun(?QUERY(_ReplyTo, _CoreReq, _HasBeenSent, ExpireAt, _TraceCtx)) -> not is_expired(ExpireAt, Now) end, Batch @@ -1978,7 +2086,7 @@ sieve_expired_requests_with_results(Batch, Now, Results) when is_list(Results) - {RevNotExpiredBatch, RevNotExpiredResults, ExpiredBatch} = lists:foldl( fun( - {?QUERY(_ReplyTo, _CoreReq, _HasBeenSent, ExpireAt) = Query, Result}, + {?QUERY(_ReplyTo, _CoreReq, _HasBeenSent, ExpireAt, _TraceCtx) = Query, Result}, {NotExpAcc, ResAcc, ExpAcc} ) -> case not is_expired(ExpireAt, Now) of @@ -2026,15 +2134,16 @@ ensure_expire_at(#{timeout := TimeoutMS} = Opts) -> Opts#{expire_at => ExpireAt}. %% no need to keep the request for async reply handler -minimize(?QUERY(_, _, _, _) = Q) -> +minimize(?QUERY(_, _, _, _, _) = Q) -> do_minimize(Q); minimize(L) when is_list(L) -> lists:map(fun do_minimize/1, L). -ifdef(TEST). -do_minimize(?QUERY(_ReplyTo, _Req, _Sent, _ExpireAt) = Query) -> Query. +do_minimize(?QUERY(_ReplyTo, _Req, _Sent, _ExpireAt, _TraceCtx) = Query) -> Query. -else. -do_minimize(?QUERY(ReplyTo, _Req, Sent, ExpireAt)) -> ?QUERY(ReplyTo, [], Sent, ExpireAt). +do_minimize(?QUERY(ReplyTo, _Req, Sent, ExpireAt, TraceCtx)) -> + ?QUERY(ReplyTo, [], Sent, ExpireAt, TraceCtx). -endif. %% To avoid message loss due to misconfigurations, we adjust diff --git a/apps/emqx_rule_engine/src/emqx_rule_api_schema.erl b/apps/emqx_rule_engine/src/emqx_rule_api_schema.erl index d82951124..a24ef5bd0 100644 --- a/apps/emqx_rule_engine/src/emqx_rule_api_schema.erl +++ b/apps/emqx_rule_engine/src/emqx_rule_api_schema.erl @@ -54,7 +54,8 @@ roots() -> {"rule_creation", sc(ref("rule_creation"), #{desc => ?DESC("root_rule_creation")})}, {"rule_info", sc(ref("rule_info"), #{desc => ?DESC("root_rule_info")})}, {"rule_events", sc(ref("rule_events"), #{desc => ?DESC("root_rule_events")})}, - {"rule_test", sc(ref("rule_test"), #{desc => ?DESC("root_rule_test")})} + {"rule_test", sc(ref("rule_test"), #{desc => ?DESC("root_rule_test")})}, + {"rule_apply_test", sc(ref("rule_apply_test"), #{desc => ?DESC("root_apply_rule_test")})} ]. fields("rule_engine") -> @@ -124,6 +125,48 @@ fields("rule_test") -> )}, {"sql", sc(binary(), #{desc => ?DESC("test_sql"), required => true})} ]; +fields("rule_apply_test") -> + [ + {"context", + sc( + hoconsc:union([ + ref("ctx_pub"), + ref("ctx_sub"), + ref("ctx_unsub"), + ref("ctx_delivered"), + ref("ctx_acked"), + ref("ctx_dropped"), + ref("ctx_connected"), + ref("ctx_disconnected"), + ref("ctx_connack"), + ref("ctx_check_authz_complete"), + ref("ctx_bridge_mqtt"), + ref("ctx_delivery_dropped") + ]), + #{ + desc => ?DESC("test_context"), + default => #{} + } + )}, + {"environment", + sc( + typerefl:map(), + #{ + desc => + ?DESC("test_rule_environment"), + default => #{} + } + )}, + {"stop_action_after_template_render", + sc( + typerefl:boolean(), + #{ + desc => + ?DESC("stop_action_after_template_render"), + default => false + } + )} + ]; fields("metrics") -> [ {"matched", diff --git a/apps/emqx_rule_engine/src/emqx_rule_engine_api.erl b/apps/emqx_rule_engine/src/emqx_rule_engine_api.erl index 354e40c5f..c0514b82b 100644 --- a/apps/emqx_rule_engine/src/emqx_rule_engine_api.erl +++ b/apps/emqx_rule_engine/src/emqx_rule_engine_api.erl @@ -37,6 +37,7 @@ '/rule_test'/2, '/rules'/2, '/rules/:id'/2, + '/rules/:id/test'/2, '/rules/:id/metrics'/2, '/rules/:id/metrics/reset'/2 ]). @@ -145,6 +146,7 @@ paths() -> "/rule_test", "/rules", "/rules/:id", + "/rules/:id/test", "/rules/:id/metrics", "/rules/:id/metrics/reset" ]. @@ -161,6 +163,9 @@ rule_creation_schema() -> rule_test_schema() -> ref(emqx_rule_api_schema, "rule_test"). +rule_apply_test_schema() -> + ref(emqx_rule_api_schema, "rule_apply_test"). + rule_info_schema() -> ref(emqx_rule_api_schema, "rule_info"). @@ -258,6 +263,21 @@ schema("/rules/:id") -> } } }; +schema("/rules/:id/test") -> + #{ + 'operationId' => '/rules/:id/test', + post => #{ + tags => [<<"rules">>], + description => ?DESC("api8"), + summary => <<"Apply a rule with the given message and environment">>, + 'requestBody' => rule_apply_test_schema(), + responses => #{ + 400 => error_schema('BAD_REQUEST', "Invalid Parameters"), + 412 => error_schema('NOT_MATCH', "SQL Not Match"), + 200 => <<"Rule Applied">> + } + } + }; schema("/rules/:id/metrics") -> #{ 'operationId' => '/rules/:id/metrics', @@ -392,6 +412,24 @@ param_path_id() -> end ). +'/rules/:id/test'(post, #{body := Params, bindings := #{id := RuleId}}) -> + ?CHECK_PARAMS( + Params, + rule_apply_test, + begin + case emqx_rule_sqltester:apply_rule(RuleId, CheckedParams) of + {ok, Result} -> + {200, Result}; + {error, {parse_error, Reason}} -> + {400, #{code => 'BAD_REQUEST', message => err_msg(Reason)}}; + {error, nomatch} -> + {412, #{code => 'NOT_MATCH', message => <<"SQL Not Match">>}}; + {error, Reason} -> + {400, #{code => 'BAD_REQUEST', message => err_msg(Reason)}} + end + end + ). + '/rules/:id'(get, #{bindings := #{id := Id}}) -> case emqx_rule_engine:get_rule(Id) of {ok, Rule} -> diff --git a/apps/emqx_rule_engine/src/emqx_rule_runtime.erl b/apps/emqx_rule_engine/src/emqx_rule_runtime.erl index 9a307e2c3..1204ea5e5 100644 --- a/apps/emqx_rule_engine/src/emqx_rule_runtime.erl +++ b/apps/emqx_rule_engine/src/emqx_rule_runtime.erl @@ -69,9 +69,14 @@ apply_rule_discard_result(Rule, Columns, Envs) -> ok. apply_rule(Rule = #{id := RuleID}, Columns, Envs) -> - ?TRACE("APPLY_RULE", "rule_activated", #{ - ruleid => RuleID, input => Columns, environment => Envs - }), + set_process_trace_metadata(RuleID, Columns), + trace_rule_sql( + "rule_activated", + #{ + input => Columns, environment => Envs + }, + debug + ), ok = emqx_metrics_worker:inc(rule_metrics, RuleID, 'matched'), clear_rule_payload(), try @@ -80,48 +85,80 @@ apply_rule(Rule = #{id := RuleID}, Columns, Envs) -> %% ignore the errors if select or match failed _:Reason = {select_and_transform_error, Error} -> ok = emqx_metrics_worker:inc(rule_metrics, RuleID, 'failed.exception'), - ?SLOG(warning, #{ - msg => "SELECT_clause_exception", - rule_id => RuleID, - reason => Error - }), + trace_rule_sql( + "SELECT_clause_exception", + #{ + reason => Error + }, + warning + ), {error, Reason}; _:Reason = {match_conditions_error, Error} -> ok = emqx_metrics_worker:inc(rule_metrics, RuleID, 'failed.exception'), - ?SLOG(warning, #{ - msg => "WHERE_clause_exception", - rule_id => RuleID, - reason => Error - }), + trace_rule_sql( + "WHERE_clause_exception", + #{ + reason => Error + }, + warning + ), {error, Reason}; _:Reason = {select_and_collect_error, Error} -> ok = emqx_metrics_worker:inc(rule_metrics, RuleID, 'failed.exception'), - ?SLOG(warning, #{ - msg => "FOREACH_clause_exception", - rule_id => RuleID, - reason => Error - }), + trace_rule_sql( + "FOREACH_clause_exception", + #{ + reason => Error + }, + warning + ), {error, Reason}; _:Reason = {match_incase_error, Error} -> ok = emqx_metrics_worker:inc(rule_metrics, RuleID, 'failed.exception'), - ?SLOG(warning, #{ - msg => "INCASE_clause_exception", - rule_id => RuleID, - reason => Error - }), + trace_rule_sql( + "INCASE_clause_exception", + #{ + reason => Error + }, + warning + ), {error, Reason}; Class:Error:StkTrace -> ok = emqx_metrics_worker:inc(rule_metrics, RuleID, 'failed.exception'), - ?SLOG(error, #{ - msg => "apply_rule_failed", - rule_id => RuleID, - exception => Class, - reason => Error, - stacktrace => StkTrace - }), + trace_rule_sql( + "apply_rule_failed", + #{ + exception => Class, + reason => Error, + stacktrace => StkTrace + }, + warning + ), {error, {Error, StkTrace}} + after + reset_process_trace_metadata(Columns) end. +set_process_trace_metadata(RuleID, #{clientid := ClientID}) -> + logger:update_process_metadata(#{ + rule_id => RuleID, + clientid => ClientID + }); +set_process_trace_metadata(RuleID, _) -> + logger:update_process_metadata(#{ + rule_id => RuleID + }). + +reset_process_trace_metadata(#{clientid := _ClientID}) -> + Meta = logger:get_process_metadata(), + Meta1 = maps:remove(clientid, Meta), + Meta2 = maps:remove(rule_id, Meta1), + logger:set_process_metadata(Meta2); +reset_process_trace_metadata(_) -> + Meta = logger:get_process_metadata(), + Meta1 = maps:remove(rule_id, Meta), + logger:set_process_metadata(Meta1). + do_apply_rule( #{ id := RuleId, @@ -139,13 +176,18 @@ do_apply_rule( {ok, ColumnsAndSelected, FinalCollection} -> case FinalCollection of [] -> + trace_rule_sql("FOREACH_yielded_no_result"), ok = emqx_metrics_worker:inc(rule_metrics, RuleId, 'failed.no_result'); _ -> + trace_rule_sql( + "FOREACH_yielded_result", #{result => FinalCollection}, debug + ), ok = emqx_metrics_worker:inc(rule_metrics, RuleId, 'passed') end, NewEnvs = maps:merge(ColumnsAndSelected, Envs), {ok, [handle_action_list(RuleId, Actions, Coll, NewEnvs) || Coll <- FinalCollection]}; false -> + trace_rule_sql("FOREACH_yielded_no_result_no_match"), ok = emqx_metrics_worker:inc(rule_metrics, RuleId, 'failed.no_result'), {error, nomatch} end; @@ -162,9 +204,11 @@ do_apply_rule( ) -> case evaluate_select(Fields, Columns, Conditions) of {ok, Selected} -> + trace_rule_sql("SELECT_yielded_result", #{result => Selected}, debug), ok = emqx_metrics_worker:inc(rule_metrics, RuleId, 'passed'), {ok, handle_action_list(RuleId, Actions, Selected, maps:merge(Columns, Envs))}; false -> + trace_rule_sql("SELECT_yielded_no_result_no_match"), ok = emqx_metrics_worker:inc(rule_metrics, RuleId, 'failed.no_result'), {error, nomatch} end. @@ -348,37 +392,42 @@ handle_action_list(RuleId, Actions, Selected, Envs) -> handle_action(RuleId, ActId, Selected, Envs) -> ok = emqx_metrics_worker:inc(rule_metrics, RuleId, 'actions.total'), + trace_action(ActId, "activating_action"), try - do_handle_action(RuleId, ActId, Selected, Envs) + Result = do_handle_action(RuleId, ActId, Selected, Envs), + trace_action(ActId, "action_activated", #{result => Result}), + Result catch throw:out_of_service -> ok = emqx_metrics_worker:inc(rule_metrics, RuleId, 'actions.failed'), ok = emqx_metrics_worker:inc( rule_metrics, RuleId, 'actions.failed.out_of_service' ), - ?SLOG(warning, #{msg => "out_of_service", action => ActId}); + trace_action(ActId, "out_of_service", #{}, warning); Err:Reason:ST -> ok = emqx_metrics_worker:inc(rule_metrics, RuleId, 'actions.failed'), ok = emqx_metrics_worker:inc(rule_metrics, RuleId, 'actions.failed.unknown'), - ?SLOG(error, #{ - msg => "action_failed", - action => ActId, - exception => Err, - reason => Reason, - stacktrace => ST - }) + trace_action( + ActId, + "action_failed", + #{ + exception => Err, + reason => Reason, + stacktrace => ST + }, + error + ) end. -define(IS_RES_DOWN(R), R == stopped; R == not_connected; R == not_found; R == unhealthy_target). -do_handle_action(RuleId, {bridge, BridgeType, BridgeName, ResId}, Selected, _Envs) -> - ?TRACE( - "BRIDGE", - "bridge_action", - #{bridge_id => emqx_bridge_resource:bridge_id(BridgeType, BridgeName)} - ), - ReplyTo = {fun ?MODULE:inc_action_metrics/2, [RuleId], #{reply_dropped => true}}, +do_handle_action(RuleId, {bridge, BridgeType, BridgeName, ResId} = Action, Selected, _Envs) -> + trace_action_bridge("BRIDGE", Action, "bridge_action", #{}, debug), + TraceCtx = do_handle_action_get_trace_context(Action), + ReplyTo = {fun ?MODULE:inc_action_metrics/2, [TraceCtx], #{reply_dropped => true}}, case - emqx_bridge:send_message(BridgeType, BridgeName, ResId, Selected, #{reply_to => ReplyTo}) + emqx_bridge:send_message(BridgeType, BridgeName, ResId, Selected, #{ + reply_to => ReplyTo, trace_ctx => maps:remove(action_id, TraceCtx) + }) of {error, Reason} when Reason == bridge_not_found; Reason == bridge_stopped -> throw(out_of_service); @@ -388,23 +437,20 @@ do_handle_action(RuleId, {bridge, BridgeType, BridgeName, ResId}, Selected, _Env Result end; do_handle_action( - RuleId, - {bridge_v2, BridgeType, BridgeName}, + _RuleId, + {bridge_v2, BridgeType, BridgeName} = Action, Selected, _Envs ) -> - ?TRACE( - "BRIDGE", - "bridge_action", - #{bridge_id => {bridge_v2, BridgeType, BridgeName}} - ), - ReplyTo = {fun ?MODULE:inc_action_metrics/2, [RuleId], #{reply_dropped => true}}, + trace_action_bridge("BRIDGE", Action, "bridge_action", #{}, debug), + TraceCtx = do_handle_action_get_trace_context(Action), + ReplyTo = {fun ?MODULE:inc_action_metrics/2, [TraceCtx], #{reply_dropped => true}}, case emqx_bridge_v2:send_message( BridgeType, BridgeName, Selected, - #{reply_to => ReplyTo} + #{reply_to => ReplyTo, trace_ctx => maps:remove(action_id, TraceCtx)} ) of {error, Reason} when Reason == bridge_not_found; Reason == bridge_stopped -> @@ -414,13 +460,43 @@ do_handle_action( Result -> Result end; -do_handle_action(RuleId, #{mod := Mod, func := Func} = Action, Selected, Envs) -> +do_handle_action(_RuleId, #{mod := Mod, func := Func} = Action, Selected, Envs) -> + trace_action(Action, "call_action_function"), %% the function can also throw 'out_of_service' Args = maps:get(args, Action, []), Result = Mod:Func(Selected, Envs, Args), - inc_action_metrics(RuleId, Result), + TraceCtx = do_handle_action_get_trace_context(Action), + inc_action_metrics(TraceCtx, Result), + trace_action(Action, "call_action_function_result", #{result => Result}, debug), Result. +do_handle_action_get_trace_context(Action) -> + case logger:get_process_metadata() of + #{ + rule_id := RuleID, + clientid := ClientID + } -> + #{ + rule_id => RuleID, + clientid => ClientID, + action_id => Action + }; + #{ + rule_id := RuleID + } -> + #{ + rule_id => RuleID, + action_id => Action + } + end. + +action_info({bridge, BridgeType, BridgeName, _ResId}) -> + #{type => BridgeType, name => BridgeName}; +action_info({bridge_v2, BridgeType, BridgeName}) -> + #{type => BridgeType, name => BridgeName}; +action_info(FuncInfoMap) -> + FuncInfoMap. + eval({Op, _} = Exp, Context) when is_list(Context) andalso (Op == path orelse Op == var) -> case Context of [Columns] -> @@ -599,21 +675,31 @@ nested_put(Alias, Val, Columns0) -> Columns = ensure_decoded_payload(Alias, Columns0), emqx_rule_maps:nested_put(Alias, Val, Columns). -inc_action_metrics(RuleId, Result) -> - _ = do_inc_action_metrics(RuleId, Result), +inc_action_metrics(TraceCtx, Result) -> + _ = do_inc_action_metrics(TraceCtx, Result), Result. -do_inc_action_metrics(RuleId, {error, {recoverable_error, _}}) -> +do_inc_action_metrics( + #{rule_id := RuleId, action_id := ActId} = TraceContext, + {error, {recoverable_error, _}} +) -> + trace_action(ActId, "out_of_service", TraceContext), emqx_metrics_worker:inc(rule_metrics, RuleId, 'actions.failed.out_of_service'); -do_inc_action_metrics(RuleId, {error, {unrecoverable_error, _}}) -> +do_inc_action_metrics( + #{rule_id := RuleId, action_id := ActId} = TraceContext, + {error, {unrecoverable_error, _} = Reason} +) -> + trace_action(ActId, "action_failed", maps:merge(#{reason => Reason}, TraceContext)), emqx_metrics_worker:inc(rule_metrics, RuleId, 'actions.failed'), emqx_metrics_worker:inc(rule_metrics, RuleId, 'actions.failed.unknown'); -do_inc_action_metrics(RuleId, R) -> +do_inc_action_metrics(#{rule_id := RuleId, action_id := ActId} = TraceContext, R) -> case is_ok_result(R) of false -> + trace_action(ActId, "action_failed", maps:merge(#{reason => R}, TraceContext)), emqx_metrics_worker:inc(rule_metrics, RuleId, 'actions.failed'), emqx_metrics_worker:inc(rule_metrics, RuleId, 'actions.failed.unknown'); true -> + trace_action(ActId, "action_success", maps:merge(#{result => R}, TraceContext)), emqx_metrics_worker:inc(rule_metrics, RuleId, 'actions.success') end. @@ -661,3 +747,39 @@ parse_function_name(Module, Name) when is_binary(Name) -> end; parse_function_name(_Module, Name) when is_atom(Name) -> Name. + +trace_action(ActId, Message) -> + trace_action_bridge("ACTION", ActId, Message). + +trace_action(ActId, Message, Extra) -> + trace_action_bridge("ACTION", ActId, Message, Extra, debug). + +trace_action(ActId, Message, Extra, Level) -> + trace_action_bridge("ACTION", ActId, Message, Extra, Level). + +trace_action_bridge(Tag, ActId, Message) -> + trace_action_bridge(Tag, ActId, Message, #{}, debug). + +trace_action_bridge(Tag, ActId, Message, Extra, Level) -> + ?TRACE( + Level, + Tag, + Message, + maps:merge( + #{ + action_info => action_info(ActId) + }, + Extra + ) + ). + +trace_rule_sql(Message) -> + trace_rule_sql(Message, #{}, debug). + +trace_rule_sql(Message, Extra, Level) -> + ?TRACE( + Level, + "RULE_SQL_EXEC", + Message, + Extra + ). diff --git a/apps/emqx_rule_engine/src/emqx_rule_sqltester.erl b/apps/emqx_rule_engine/src/emqx_rule_sqltester.erl index 8212e3385..e72b0fcd0 100644 --- a/apps/emqx_rule_engine/src/emqx_rule_sqltester.erl +++ b/apps/emqx_rule_engine/src/emqx_rule_sqltester.erl @@ -20,9 +20,77 @@ test/1, get_selected_data/3, %% Some SQL functions return different results in the test environment - is_test_runtime_env/0 + is_test_runtime_env/0, + apply_rule/2 ]). +apply_rule( + RuleId, + #{ + context := Context, + environment := Env, + stop_action_after_template_render := StopAfterRender + } +) -> + {ok, Rule} = emqx_rule_engine:get_rule(RuleId), + InTopic = get_in_topic(Context), + EventTopics = maps:get(from, Rule, []), + case lists:all(fun is_publish_topic/1, EventTopics) of + true -> + %% test if the topic matches the topic filters in the rule + case emqx_topic:match_any(InTopic, EventTopics) of + true -> do_apply_matched_rule(Rule, Context, Env, StopAfterRender); + false -> {error, nomatch} + end; + false -> + case lists:member(InTopic, EventTopics) of + true -> + %% the rule is for both publish and events, test it directly + do_apply_matched_rule(Rule, Context, Env, StopAfterRender); + false -> + {error, nomatch} + end + end. + +do_apply_matched_rule(Rule, Context, Env, StopAfterRender) -> + update_process_trace_metadata(StopAfterRender), + Env1 = + case Env of + M when map_size(M) =:= 0 -> + %% Use the default environment if no environment is provided + default_apply_rule_environment(); + _ -> + Env + end, + ApplyRuleRes = emqx_rule_runtime:apply_rule(Rule, Context, Env1), + reset_trace_process_metadata(StopAfterRender), + ApplyRuleRes. + +update_process_trace_metadata(true = _StopAfterRender) -> + logger:update_process_trace_metadata(#{ + stop_action_after_render => true + }); +update_process_trace_metadata(false = _StopAfterRender) -> + ok. + +reset_trace_process_metadata(true = _StopAfterRender) -> + Meta = logger:get_process_metadata(), + NewMeta = maps:remove(stop_action_after_render, Meta), + logger:set_process_metadata(NewMeta); +reset_trace_process_metadata(false = _StopAfterRender) -> + ok. + +default_apply_rule_environment() -> + #{ + headers => #{ + protocol => mqtt, + username => undefined, + peerhost => {127, 0, 0, 1}, + proto_ver => 5, + properties => #{} + } + }. + -spec test(#{sql := binary(), context := map()}) -> {ok, map() | list()} | {error, term()}. test(#{sql := Sql, context := Context}) -> case emqx_rule_sqlparser:parse(Sql) of diff --git a/apps/emqx_rule_engine/test/emqx_rule_engine_api_rule_apply_SUITE.erl b/apps/emqx_rule_engine/test/emqx_rule_engine_api_rule_apply_SUITE.erl new file mode 100644 index 000000000..d9d15dba0 --- /dev/null +++ b/apps/emqx_rule_engine/test/emqx_rule_engine_api_rule_apply_SUITE.erl @@ -0,0 +1,451 @@ +%%-------------------------------------------------------------------- +%% Copyright (c) 2022-2024 EMQ Technologies Co., Ltd. All Rights Reserved. +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. +%%-------------------------------------------------------------------- + +-module(emqx_rule_engine_api_rule_apply_SUITE). + +-compile(nowarn_export_all). +-compile(export_all). + +-include_lib("eunit/include/eunit.hrl"). +-include_lib("common_test/include/ct.hrl"). +-include_lib("snabbkaffe/include/snabbkaffe.hrl"). + +-define(CONF_DEFAULT, <<"rule_engine {rules {}}">>). + +all() -> + emqx_common_test_helpers:all(?MODULE). + +init_per_suite(Config) -> + application:load(emqx_conf), + % ok = emqx_common_test_helpers:load_config(emqx_rule_engine_schema, ?CONF_DEFAULT), + % ok = emqx_common_test_helpers:start_apps([emqx_conf, emqx, emqx_rule_engine, emqx_bridge, emqx_bridge_http]), + + Apps = emqx_cth_suite:start( + [ + emqx, + emqx_conf, + emqx_connector, + emqx_bridge_http, + emqx_bridge, + emqx_rule_engine + ], + #{work_dir => emqx_cth_suite:work_dir(Config)} + ), + emqx_mgmt_api_test_util:init_suite(), + [{apps, Apps} | Config]. + +end_per_suite(Config) -> + Apps = ?config(apps, Config), + emqx_mgmt_api_test_util:end_suite(), + ok = emqx_cth_suite:stop(Apps), + ok. + +init_per_testcase(_Case, Config) -> + emqx_bridge_http_test_lib:init_http_success_server(Config). + +end_per_testcase(_TestCase, _Config) -> + ok = emqx_bridge_http_connector_test_server:stop(), + emqx_bridge_v2_testlib:delete_all_bridges(), + emqx_bridge_v2_testlib:delete_all_connectors(), + emqx_common_test_helpers:call_janitor(), + ok. + +t_basic_apply_rule_trace_ruleid(Config) -> + basic_apply_rule_test_helper(Config, ruleid). + +t_basic_apply_rule_trace_clientid(Config) -> + basic_apply_rule_test_helper(Config, clientid). + +basic_apply_rule_test_helper(Config, TraceType) -> + HTTPServerConfig = ?config(http_server, Config), + emqx_bridge_http_test_lib:make_bridge(HTTPServerConfig), + #{status := connected} = emqx_bridge_v2:health_check( + http, emqx_bridge_http_test_lib:bridge_name() + ), + %% Create Rule + RuleTopic = iolist_to_binary([<<"my_rule_topic/">>, atom_to_binary(?FUNCTION_NAME)]), + SQL = <<"SELECT payload.id as id FROM \"", RuleTopic/binary, "\"">>, + {ok, #{<<"id">> := RuleId}} = + emqx_bridge_testlib:create_rule_and_action_http( + http, + RuleTopic, + Config, + #{sql => SQL} + ), + ClientId = <<"c_emqx">>, + %% =================================== + %% Create trace for RuleId + %% =================================== + Now = erlang:system_time(second) - 10, + Start = Now, + End = Now + 60, + TraceName = atom_to_binary(?FUNCTION_NAME), + TraceValue = + case TraceType of + ruleid -> + RuleId; + clientid -> + ClientId + end, + Trace = #{ + name => TraceName, + type => TraceType, + TraceType => TraceValue, + start_at => Start, + end_at => End + }, + emqx_trace_SUITE:reload(), + ok = emqx_trace:clear(), + {ok, _} = emqx_trace:create(Trace), + %% =================================== + Context = #{ + clientid => ClientId, + event_type => message_publish, + payload => <<"{\"msg\": \"hello\"}">>, + qos => 1, + topic => RuleTopic, + username => <<"u_emqx">> + }, + Params = #{ + % body => #{ + <<"context">> => Context + % } + }, + emqx_trace:check(), + ok = emqx_trace_handler_SUITE:filesync(TraceName, TraceType), + {ok, _} = file:read_file(emqx_trace:log_file(TraceName, Now)), + ?assertMatch({ok, _}, call_apply_rule_api(RuleId, Params)), + ?retry( + _Interval0 = 200, + _NAttempts0 = 20, + begin + Bin = read_rule_trace_file(TraceName, TraceType, Now), + ?assertNotEqual(nomatch, binary:match(Bin, [<<"rule_activated">>])), + ?assertNotEqual(nomatch, binary:match(Bin, [<<"SELECT_yielded_result">>])), + ?assertNotEqual(nomatch, binary:match(Bin, [<<"bridge_action">>])), + ?assertNotEqual(nomatch, binary:match(Bin, [<<"action_activated">>])), + ?assertNotEqual(nomatch, binary:match(Bin, [<<"successfully_rendered_request">>])), + ?assertNotEqual(nomatch, binary:match(Bin, [<<"QUERY_ASYNC">>])) + end + ), + emqx_trace:delete(TraceName), + ok. + +%% Helper Functions + +% t_ctx_pub(_) -> +% SQL = <<"SELECT payload.msg as msg, clientid, username, payload, topic, qos FROM \"t/#\"">>, +% Context = #{ +% clientid => <<"c_emqx">>, +% event_type => message_publish, +% payload => <<"{\"msg\": \"hello\"}">>, +% qos => 1, +% topic => <<"t/a">>, +% username => <<"u_emqx">> +% }, +% Expected = Context#{msg => <<"hello">>}, +% do_test(SQL, Context, Expected). + +% t_ctx_sub(_) -> +% SQL = <<"SELECT clientid, username, topic, qos FROM \"$events/session_subscribed\"">>, +% Context = #{ +% clientid => <<"c_emqx">>, +% event_type => session_subscribed, +% qos => 1, +% topic => <<"t/a">>, +% username => <<"u_emqx">> +% }, + +% do_test(SQL, Context, Context). + +% t_ctx_unsub(_) -> +% SQL = <<"SELECT clientid, username, topic, qos FROM \"$events/session_unsubscribed\"">>, +% Context = #{ +% clientid => <<"c_emqx">>, +% event_type => session_unsubscribed, +% qos => 1, +% topic => <<"t/a">>, +% username => <<"u_emqx">> +% }, +% do_test(SQL, Context, Context). + +% t_ctx_delivered(_) -> +% SQL = +% <<"SELECT from_clientid, from_username, topic, qos, node, timestamp FROM \"$events/message_delivered\"">>, +% Context = #{ +% clientid => <<"c_emqx_2">>, +% event_type => message_delivered, +% from_clientid => <<"c_emqx_1">>, +% from_username => <<"u_emqx_1">>, +% payload => <<"{\"msg\": \"hello\"}">>, +% qos => 1, +% topic => <<"t/a">>, +% username => <<"u_emqx_2">> +% }, +% Expected = check_result([from_clientid, from_username, topic, qos], [node, timestamp], Context), +% do_test(SQL, Context, Expected). + +% t_ctx_acked(_) -> +% SQL = +% <<"SELECT from_clientid, from_username, topic, qos, node, timestamp FROM \"$events/message_acked\"">>, + +% Context = #{ +% clientid => <<"c_emqx_2">>, +% event_type => message_acked, +% from_clientid => <<"c_emqx_1">>, +% from_username => <<"u_emqx_1">>, +% payload => <<"{\"msg\": \"hello\"}">>, +% qos => 1, +% topic => <<"t/a">>, +% username => <<"u_emqx_2">> +% }, + +% Expected = with_node_timestampe([from_clientid, from_username, topic, qos], Context), + +% do_test(SQL, Context, Expected). + +% t_ctx_droped(_) -> +% SQL = <<"SELECT reason, topic, qos, node, timestamp FROM \"$events/message_dropped\"">>, +% Topic = <<"t/a">>, +% QoS = 1, +% Reason = <<"no_subscribers">>, +% Context = #{ +% clientid => <<"c_emqx">>, +% event_type => message_dropped, +% payload => <<"{\"msg\": \"hello\"}">>, +% qos => QoS, +% reason => Reason, +% topic => Topic, +% username => <<"u_emqx">> +% }, + +% Expected = with_node_timestampe([reason, topic, qos], Context), +% do_test(SQL, Context, Expected). + +% t_ctx_connected(_) -> +% SQL = +% <<"SELECT clientid, username, keepalive, is_bridge FROM \"$events/client_connected\"">>, + +% Context = +% #{ +% clean_start => true, +% clientid => <<"c_emqx">>, +% event_type => client_connected, +% is_bridge => false, +% peername => <<"127.0.0.1:52918">>, +% username => <<"u_emqx">> +% }, +% Expected = check_result([clientid, username, keepalive, is_bridge], [], Context), +% do_test(SQL, Context, Expected). + +% t_ctx_disconnected(_) -> +% SQL = +% <<"SELECT clientid, username, reason, disconnected_at, node FROM \"$events/client_disconnected\"">>, + +% Context = +% #{ +% clientid => <<"c_emqx">>, +% event_type => client_disconnected, +% reason => <<"normal">>, +% username => <<"u_emqx">> +% }, +% Expected = check_result([clientid, username, reason], [disconnected_at, node], Context), +% do_test(SQL, Context, Expected). + +% t_ctx_connack(_) -> +% SQL = +% <<"SELECT clientid, username, reason_code, node FROM \"$events/client_connack\"">>, + +% Context = +% #{ +% clean_start => true, +% clientid => <<"c_emqx">>, +% event_type => client_connack, +% reason_code => <<"sucess">>, +% username => <<"u_emqx">> +% }, +% Expected = check_result([clientid, username, reason_code], [node], Context), +% do_test(SQL, Context, Expected). + +% t_ctx_check_authz_complete(_) -> +% SQL = +% << +% "SELECT clientid, username, topic, action, result,\n" +% "authz_source, node FROM \"$events/client_check_authz_complete\"" +% >>, + +% Context = +% #{ +% action => <<"publish">>, +% clientid => <<"c_emqx">>, +% event_type => client_check_authz_complete, +% result => <<"allow">>, +% topic => <<"t/1">>, +% username => <<"u_emqx">> +% }, +% Expected = check_result( +% [clientid, username, topic, action], +% [authz_source, node, result], +% Context +% ), + +% do_test(SQL, Context, Expected). + +% t_ctx_delivery_dropped(_) -> +% SQL = +% <<"SELECT from_clientid, from_username, reason, topic, qos FROM \"$events/delivery_dropped\"">>, + +% Context = +% #{ +% clientid => <<"c_emqx_2">>, +% event_type => delivery_dropped, +% from_clientid => <<"c_emqx_1">>, +% from_username => <<"u_emqx_1">>, +% payload => <<"{\"msg\": \"hello\"}">>, +% qos => 1, +% reason => <<"queue_full">>, +% topic => <<"t/a">>, +% username => <<"u_emqx_2">> +% }, +% Expected = check_result([from_clientid, from_username, reason, qos, topic], [], Context), +% do_test(SQL, Context, Expected). + +% t_mongo_date_function_should_return_string_in_test_env(_) -> +% SQL = +% <<"SELECT mongo_date() as mongo_date FROM \"$events/client_check_authz_complete\"">>, +% Context = +% #{ +% action => <<"publish">>, +% clientid => <<"c_emqx">>, +% event_type => client_check_authz_complete, +% result => <<"allow">>, +% topic => <<"t/1">>, +% username => <<"u_emqx">> +% }, +% CheckFunction = fun(Result) -> +% MongoDate = maps:get(mongo_date, Result), +% %% Use regex to match the expected string +% MatchResult = re:run(MongoDate, <<"ISODate\\([0-9]{4}-[0-9]{2}-[0-9]{2}T.*\\)">>), +% ?assertMatch({match, _}, MatchResult), +% ok +% end, +% do_test(SQL, Context, CheckFunction). + +% do_test(SQL, Context, Expected0) -> +% Res = emqx_rule_engine_api:'/rule_test'( +% post, +% test_rule_params(SQL, Context) +% ), +% ?assertMatch({200, _}, Res), +% {200, Result0} = Res, +% Result = emqx_utils_maps:unsafe_atom_key_map(Result0), +% case is_function(Expected0) of +% false -> +% Expected = maps:without([event_type], Expected0), +% ?assertMatch(Expected, Result, Expected); +% _ -> +% Expected0(Result) +% end, +% ok. + +% test_rule_params(Sql, Context) -> +% #{ +% body => #{ +% <<"context">> => Context, +% <<"sql">> => Sql +% } +% }. + +% with_node_timestampe(Keys, Context) -> +% check_result(Keys, [node, timestamp], Context). + +% check_result(Keys, Exists, Context) -> +% Log = fun(Format, Args) -> +% lists:flatten(io_lib:format(Format, Args)) +% end, + +% Base = maps:with(Keys, Context), + +% fun(Result) -> +% maps:foreach( +% fun(Key, Value) -> +% ?assertEqual( +% Value, +% maps:get(Key, Result, undefined), +% Log("Key:~p value error~nResult:~p~n", [Key, Result]) +% ) +% end, +% Base +% ), + +% NotExists = fun(Key) -> Log("Key:~p not exists in result:~p~n", [Key, Result]) end, +% lists:foreach( +% fun(Key) -> +% Find = maps:find(Key, Result), +% Formatter = NotExists(Key), +% ?assertMatch({ok, _}, Find, Formatter), +% ?assertNotMatch({ok, undefined}, Find, Formatter), +% ?assertNotMatch({ok, <<"undefined">>}, Find, Formatter) +% end, +% Exists +% ), + +% ?assertEqual(erlang:length(Keys) + erlang:length(Exists), maps:size(Result), Result) +% end. + +call_apply_rule_api(RuleId, Params) -> + Method = post, + Path = emqx_mgmt_api_test_util:api_path(["rules", RuleId, "test"]), + ct:pal("sql test (http):\n ~p", [Params]), + Res = request(Method, Path, Params), + ct:pal("sql test (http) result:\n ~p", [Res]), + Res. + +request(Method, Path, Params) -> + AuthHeader = emqx_mgmt_api_test_util:auth_header_(), + Opts = #{return_all => true}, + case emqx_mgmt_api_test_util:request_api(Method, Path, "", AuthHeader, Params, Opts) of + {ok, {Status, Headers, Body0}} -> + Body = maybe_json_decode(Body0), + {ok, {Status, Headers, Body}}; + {error, {Status, Headers, Body0}} -> + Body = + case emqx_utils_json:safe_decode(Body0, [return_maps]) of + {ok, Decoded0 = #{<<"message">> := Msg0}} -> + Msg = maybe_json_decode(Msg0), + Decoded0#{<<"message">> := Msg}; + {ok, Decoded0} -> + Decoded0; + {error, _} -> + Body0 + end, + {error, {Status, Headers, Body}}; + Error -> + Error + end. + +maybe_json_decode(X) -> + case emqx_utils_json:safe_decode(X, [return_maps]) of + {ok, Decoded} -> Decoded; + {error, _} -> X + end. + +read_rule_trace_file(TraceName, TraceType, From) -> + emqx_trace:check(), + ok = emqx_trace_handler_SUITE:filesync(TraceName, TraceType), + {ok, Bin} = file:read_file(emqx_trace:log_file(TraceName, From)), + io_lib:format("MYTRACE:~n~s", [Bin]), + Bin. From 5479932190270b704f56d957910fd4e7eefb5db7 Mon Sep 17 00:00:00 2001 From: Kjell Winblad Date: Sat, 6 Apr 2024 17:21:12 +0200 Subject: [PATCH 077/234] feat(apply rule test): make option to stop action after render work This commit makes the apply rule HTTP API option to stop an action work for the HTTP action, and adds infrastructure that makes it easy to add this functionality to other actions. --- apps/emqx/src/emqx_trace/emqx_trace.erl | 22 +- .../src/emqx_bridge_http_connector.erl | 59 +++- .../src/emqx_rule_runtime.erl | 10 +- .../src/emqx_rule_sqltester.erl | 2 +- .../emqx_rule_engine_api_rule_apply_SUITE.erl | 297 ++---------------- 5 files changed, 109 insertions(+), 281 deletions(-) diff --git a/apps/emqx/src/emqx_trace/emqx_trace.erl b/apps/emqx/src/emqx_trace/emqx_trace.erl index 8151c19b5..408644128 100644 --- a/apps/emqx/src/emqx_trace/emqx_trace.erl +++ b/apps/emqx/src/emqx_trace/emqx_trace.erl @@ -28,7 +28,8 @@ subscribe/3, unsubscribe/2, log/3, - log/4 + log/4, + rendered_action_template/2 ]). -export([ @@ -86,6 +87,25 @@ unsubscribe(<<"$SYS/", _/binary>>, _SubOpts) -> unsubscribe(Topic, SubOpts) -> ?TRACE("UNSUBSCRIBE", "unsubscribe", #{topic => Topic, sub_opts => SubOpts}). +rendered_action_template(ActionID, RenderResult) -> + Msg = io_lib:format("action_template_rendered(~s)", [ActionID]), + TraceResult = ?TRACE("QUERY_RENDER", Msg, RenderResult), + case logger:get_process_metadata() of + #{stop_action_after_render := true} -> + %% We throw an unrecoverable error to stop action before the + %% resource is called/modified + StopMsg = io_lib:format( + "action_stopped_after_render(~s): " + "Action stopped after template render due to test setting.", + [ActionID] + ), + MsgBin = iolist_to_binary(StopMsg), + error({unrecoverable_error, MsgBin}); + _ -> + ok + end, + TraceResult. + log(List, Msg, Meta) -> log(debug, List, Msg, Meta). diff --git a/apps/emqx_bridge_http/src/emqx_bridge_http_connector.erl b/apps/emqx_bridge_http/src/emqx_bridge_http_connector.erl index 99222aa00..46b3d5e1f 100644 --- a/apps/emqx_bridge_http/src/emqx_bridge_http_connector.erl +++ b/apps/emqx_bridge_http/src/emqx_bridge_http_connector.erl @@ -371,6 +371,29 @@ on_query( } ), NRequest = formalize_request(Method, BasePath, Request), + case NRequest of + {Path, Headers} -> + emqx_trace:rendered_action_template( + InstId, + #{ + path => Path, + method => Method, + headers => Headers, + timeout => Timeout + } + ); + {Path, Headers, Body} -> + emqx_trace:rendered_action_template( + InstId, + #{ + path => Path, + method => Method, + headers => Headers, + timeout => Timeout, + body => Body + } + ) + end, Worker = resolve_pool_worker(State, KeyOrNum), Result0 = ehttpc:request( Worker, @@ -480,6 +503,29 @@ on_query_async( } ), NRequest = formalize_request(Method, BasePath, Request), + case NRequest of + {Path, Headers} -> + emqx_trace:rendered_action_template( + InstId, + #{ + path => Path, + method => Method, + headers => Headers, + timeout => Timeout + } + ); + {Path, Headers, Body} -> + emqx_trace:rendered_action_template( + InstId, + #{ + path => Path, + method => Method, + headers => Headers, + timeout => Timeout, + body => Body + } + ) + end, MaxAttempts = maps:get(max_attempts, State, 3), Context = #{ attempt => 1, @@ -661,22 +707,13 @@ process_request_and_action(Request, ActionState, Msg) -> ), BodyTemplate = maps:get(body, ActionState), Body = render_request_body(BodyTemplate, RenderTmplFunc, Msg), - RenderResult = #{ + #{ method => Method, path => Path, body => Body, headers => Headers, request_timeout => maps:get(request_timeout, ActionState) - }, - ?TRACE( - "QUERY_RENDER", - "http_connector_successfully_rendered_request", - #{ - request => Request, - render_result => RenderResult - } - ), - RenderResult. + }. merge_proplist(Proplist1, Proplist2) -> lists:foldl( diff --git a/apps/emqx_rule_engine/src/emqx_rule_runtime.erl b/apps/emqx_rule_engine/src/emqx_rule_runtime.erl index 1204ea5e5..f2c89e8ed 100644 --- a/apps/emqx_rule_engine/src/emqx_rule_runtime.erl +++ b/apps/emqx_rule_engine/src/emqx_rule_runtime.erl @@ -471,7 +471,9 @@ do_handle_action(_RuleId, #{mod := Mod, func := Func} = Action, Selected, Envs) Result. do_handle_action_get_trace_context(Action) -> - case logger:get_process_metadata() of + Metadata = logger:get_process_metadata(), + StopAfterRender = maps:get(stop_action_after_render, Metadata, false), + case Metadata of #{ rule_id := RuleID, clientid := ClientID @@ -479,14 +481,16 @@ do_handle_action_get_trace_context(Action) -> #{ rule_id => RuleID, clientid => ClientID, - action_id => Action + action_id => Action, + stop_action_after_render => StopAfterRender }; #{ rule_id := RuleID } -> #{ rule_id => RuleID, - action_id => Action + action_id => Action, + stop_action_after_render => StopAfterRender } end. diff --git a/apps/emqx_rule_engine/src/emqx_rule_sqltester.erl b/apps/emqx_rule_engine/src/emqx_rule_sqltester.erl index e72b0fcd0..fc4d2614f 100644 --- a/apps/emqx_rule_engine/src/emqx_rule_sqltester.erl +++ b/apps/emqx_rule_engine/src/emqx_rule_sqltester.erl @@ -67,7 +67,7 @@ do_apply_matched_rule(Rule, Context, Env, StopAfterRender) -> ApplyRuleRes. update_process_trace_metadata(true = _StopAfterRender) -> - logger:update_process_trace_metadata(#{ + logger:update_process_metadata(#{ stop_action_after_render => true }); update_process_trace_metadata(false = _StopAfterRender) -> diff --git a/apps/emqx_rule_engine/test/emqx_rule_engine_api_rule_apply_SUITE.erl b/apps/emqx_rule_engine/test/emqx_rule_engine_api_rule_apply_SUITE.erl index d9d15dba0..31a462de3 100644 --- a/apps/emqx_rule_engine/test/emqx_rule_engine_api_rule_apply_SUITE.erl +++ b/apps/emqx_rule_engine/test/emqx_rule_engine_api_rule_apply_SUITE.erl @@ -64,12 +64,15 @@ end_per_testcase(_TestCase, _Config) -> ok. t_basic_apply_rule_trace_ruleid(Config) -> - basic_apply_rule_test_helper(Config, ruleid). + basic_apply_rule_test_helper(Config, ruleid, false). t_basic_apply_rule_trace_clientid(Config) -> - basic_apply_rule_test_helper(Config, clientid). + basic_apply_rule_test_helper(Config, clientid, false). -basic_apply_rule_test_helper(Config, TraceType) -> +t_basic_apply_rule_trace_ruleid_stop_after_render(Config) -> + basic_apply_rule_test_helper(Config, ruleid, true). + +basic_apply_rule_test_helper(Config, TraceType, StopAfterRender) -> HTTPServerConfig = ?config(http_server, Config), emqx_bridge_http_test_lib:make_bridge(HTTPServerConfig), #{status := connected} = emqx_bridge_v2:health_check( @@ -121,7 +124,8 @@ basic_apply_rule_test_helper(Config, TraceType) -> }, Params = #{ % body => #{ - <<"context">> => Context + <<"context">> => Context, + <<"stop_action_after_template_render">> => StopAfterRender % } }, emqx_trace:check(), @@ -133,279 +137,42 @@ basic_apply_rule_test_helper(Config, TraceType) -> _NAttempts0 = 20, begin Bin = read_rule_trace_file(TraceName, TraceType, Now), + io:format("THELOG:~n~s", [Bin]), ?assertNotEqual(nomatch, binary:match(Bin, [<<"rule_activated">>])), ?assertNotEqual(nomatch, binary:match(Bin, [<<"SELECT_yielded_result">>])), ?assertNotEqual(nomatch, binary:match(Bin, [<<"bridge_action">>])), ?assertNotEqual(nomatch, binary:match(Bin, [<<"action_activated">>])), - ?assertNotEqual(nomatch, binary:match(Bin, [<<"successfully_rendered_request">>])), + ?assertNotEqual(nomatch, binary:match(Bin, [<<"action_template_rendered">>])), ?assertNotEqual(nomatch, binary:match(Bin, [<<"QUERY_ASYNC">>])) end ), + case StopAfterRender of + true -> + ?retry( + _Interval0 = 200, + _NAttempts0 = 20, + begin + Bin = read_rule_trace_file(TraceName, TraceType, Now), + io:format("THELOG2:~n~s", [Bin]), + ?assertNotEqual(nomatch, binary:match(Bin, [<<"action_failed">>])) + end + ); + false -> + ?retry( + _Interval0 = 200, + _NAttempts0 = 20, + begin + Bin = read_rule_trace_file(TraceName, TraceType, Now), + io:format("THELOG3:~n~s", [Bin]), + ?assertNotEqual(nomatch, binary:match(Bin, [<<"action_success">>])) + end + ) + end, emqx_trace:delete(TraceName), ok. %% Helper Functions -% t_ctx_pub(_) -> -% SQL = <<"SELECT payload.msg as msg, clientid, username, payload, topic, qos FROM \"t/#\"">>, -% Context = #{ -% clientid => <<"c_emqx">>, -% event_type => message_publish, -% payload => <<"{\"msg\": \"hello\"}">>, -% qos => 1, -% topic => <<"t/a">>, -% username => <<"u_emqx">> -% }, -% Expected = Context#{msg => <<"hello">>}, -% do_test(SQL, Context, Expected). - -% t_ctx_sub(_) -> -% SQL = <<"SELECT clientid, username, topic, qos FROM \"$events/session_subscribed\"">>, -% Context = #{ -% clientid => <<"c_emqx">>, -% event_type => session_subscribed, -% qos => 1, -% topic => <<"t/a">>, -% username => <<"u_emqx">> -% }, - -% do_test(SQL, Context, Context). - -% t_ctx_unsub(_) -> -% SQL = <<"SELECT clientid, username, topic, qos FROM \"$events/session_unsubscribed\"">>, -% Context = #{ -% clientid => <<"c_emqx">>, -% event_type => session_unsubscribed, -% qos => 1, -% topic => <<"t/a">>, -% username => <<"u_emqx">> -% }, -% do_test(SQL, Context, Context). - -% t_ctx_delivered(_) -> -% SQL = -% <<"SELECT from_clientid, from_username, topic, qos, node, timestamp FROM \"$events/message_delivered\"">>, -% Context = #{ -% clientid => <<"c_emqx_2">>, -% event_type => message_delivered, -% from_clientid => <<"c_emqx_1">>, -% from_username => <<"u_emqx_1">>, -% payload => <<"{\"msg\": \"hello\"}">>, -% qos => 1, -% topic => <<"t/a">>, -% username => <<"u_emqx_2">> -% }, -% Expected = check_result([from_clientid, from_username, topic, qos], [node, timestamp], Context), -% do_test(SQL, Context, Expected). - -% t_ctx_acked(_) -> -% SQL = -% <<"SELECT from_clientid, from_username, topic, qos, node, timestamp FROM \"$events/message_acked\"">>, - -% Context = #{ -% clientid => <<"c_emqx_2">>, -% event_type => message_acked, -% from_clientid => <<"c_emqx_1">>, -% from_username => <<"u_emqx_1">>, -% payload => <<"{\"msg\": \"hello\"}">>, -% qos => 1, -% topic => <<"t/a">>, -% username => <<"u_emqx_2">> -% }, - -% Expected = with_node_timestampe([from_clientid, from_username, topic, qos], Context), - -% do_test(SQL, Context, Expected). - -% t_ctx_droped(_) -> -% SQL = <<"SELECT reason, topic, qos, node, timestamp FROM \"$events/message_dropped\"">>, -% Topic = <<"t/a">>, -% QoS = 1, -% Reason = <<"no_subscribers">>, -% Context = #{ -% clientid => <<"c_emqx">>, -% event_type => message_dropped, -% payload => <<"{\"msg\": \"hello\"}">>, -% qos => QoS, -% reason => Reason, -% topic => Topic, -% username => <<"u_emqx">> -% }, - -% Expected = with_node_timestampe([reason, topic, qos], Context), -% do_test(SQL, Context, Expected). - -% t_ctx_connected(_) -> -% SQL = -% <<"SELECT clientid, username, keepalive, is_bridge FROM \"$events/client_connected\"">>, - -% Context = -% #{ -% clean_start => true, -% clientid => <<"c_emqx">>, -% event_type => client_connected, -% is_bridge => false, -% peername => <<"127.0.0.1:52918">>, -% username => <<"u_emqx">> -% }, -% Expected = check_result([clientid, username, keepalive, is_bridge], [], Context), -% do_test(SQL, Context, Expected). - -% t_ctx_disconnected(_) -> -% SQL = -% <<"SELECT clientid, username, reason, disconnected_at, node FROM \"$events/client_disconnected\"">>, - -% Context = -% #{ -% clientid => <<"c_emqx">>, -% event_type => client_disconnected, -% reason => <<"normal">>, -% username => <<"u_emqx">> -% }, -% Expected = check_result([clientid, username, reason], [disconnected_at, node], Context), -% do_test(SQL, Context, Expected). - -% t_ctx_connack(_) -> -% SQL = -% <<"SELECT clientid, username, reason_code, node FROM \"$events/client_connack\"">>, - -% Context = -% #{ -% clean_start => true, -% clientid => <<"c_emqx">>, -% event_type => client_connack, -% reason_code => <<"sucess">>, -% username => <<"u_emqx">> -% }, -% Expected = check_result([clientid, username, reason_code], [node], Context), -% do_test(SQL, Context, Expected). - -% t_ctx_check_authz_complete(_) -> -% SQL = -% << -% "SELECT clientid, username, topic, action, result,\n" -% "authz_source, node FROM \"$events/client_check_authz_complete\"" -% >>, - -% Context = -% #{ -% action => <<"publish">>, -% clientid => <<"c_emqx">>, -% event_type => client_check_authz_complete, -% result => <<"allow">>, -% topic => <<"t/1">>, -% username => <<"u_emqx">> -% }, -% Expected = check_result( -% [clientid, username, topic, action], -% [authz_source, node, result], -% Context -% ), - -% do_test(SQL, Context, Expected). - -% t_ctx_delivery_dropped(_) -> -% SQL = -% <<"SELECT from_clientid, from_username, reason, topic, qos FROM \"$events/delivery_dropped\"">>, - -% Context = -% #{ -% clientid => <<"c_emqx_2">>, -% event_type => delivery_dropped, -% from_clientid => <<"c_emqx_1">>, -% from_username => <<"u_emqx_1">>, -% payload => <<"{\"msg\": \"hello\"}">>, -% qos => 1, -% reason => <<"queue_full">>, -% topic => <<"t/a">>, -% username => <<"u_emqx_2">> -% }, -% Expected = check_result([from_clientid, from_username, reason, qos, topic], [], Context), -% do_test(SQL, Context, Expected). - -% t_mongo_date_function_should_return_string_in_test_env(_) -> -% SQL = -% <<"SELECT mongo_date() as mongo_date FROM \"$events/client_check_authz_complete\"">>, -% Context = -% #{ -% action => <<"publish">>, -% clientid => <<"c_emqx">>, -% event_type => client_check_authz_complete, -% result => <<"allow">>, -% topic => <<"t/1">>, -% username => <<"u_emqx">> -% }, -% CheckFunction = fun(Result) -> -% MongoDate = maps:get(mongo_date, Result), -% %% Use regex to match the expected string -% MatchResult = re:run(MongoDate, <<"ISODate\\([0-9]{4}-[0-9]{2}-[0-9]{2}T.*\\)">>), -% ?assertMatch({match, _}, MatchResult), -% ok -% end, -% do_test(SQL, Context, CheckFunction). - -% do_test(SQL, Context, Expected0) -> -% Res = emqx_rule_engine_api:'/rule_test'( -% post, -% test_rule_params(SQL, Context) -% ), -% ?assertMatch({200, _}, Res), -% {200, Result0} = Res, -% Result = emqx_utils_maps:unsafe_atom_key_map(Result0), -% case is_function(Expected0) of -% false -> -% Expected = maps:without([event_type], Expected0), -% ?assertMatch(Expected, Result, Expected); -% _ -> -% Expected0(Result) -% end, -% ok. - -% test_rule_params(Sql, Context) -> -% #{ -% body => #{ -% <<"context">> => Context, -% <<"sql">> => Sql -% } -% }. - -% with_node_timestampe(Keys, Context) -> -% check_result(Keys, [node, timestamp], Context). - -% check_result(Keys, Exists, Context) -> -% Log = fun(Format, Args) -> -% lists:flatten(io_lib:format(Format, Args)) -% end, - -% Base = maps:with(Keys, Context), - -% fun(Result) -> -% maps:foreach( -% fun(Key, Value) -> -% ?assertEqual( -% Value, -% maps:get(Key, Result, undefined), -% Log("Key:~p value error~nResult:~p~n", [Key, Result]) -% ) -% end, -% Base -% ), - -% NotExists = fun(Key) -> Log("Key:~p not exists in result:~p~n", [Key, Result]) end, -% lists:foreach( -% fun(Key) -> -% Find = maps:find(Key, Result), -% Formatter = NotExists(Key), -% ?assertMatch({ok, _}, Find, Formatter), -% ?assertNotMatch({ok, undefined}, Find, Formatter), -% ?assertNotMatch({ok, <<"undefined">>}, Find, Formatter) -% end, -% Exists -% ), - -% ?assertEqual(erlang:length(Keys) + erlang:length(Exists), maps:size(Result), Result) -% end. - call_apply_rule_api(RuleId, Params) -> Method = post, Path = emqx_mgmt_api_test_util:api_path(["rules", RuleId, "test"]), From 9d1a69aaa9ca77fce31d913847a70d763d3bc15e Mon Sep 17 00:00:00 2001 From: Shawn <506895667@qq.com> Date: Wed, 3 Apr 2024 18:39:32 +0800 Subject: [PATCH 078/234] fix: cannot import retained messages --- apps/emqx_bridge/src/emqx_bridge_v2.erl | 4 ++-- apps/emqx_retainer/src/emqx_retainer.app.src | 2 +- .../src/emqx_retainer_mnesia.erl | 9 +++++++++ changes/ce/fix-12826.en.md | 20 ++++--------------- 4 files changed, 16 insertions(+), 19 deletions(-) diff --git a/apps/emqx_bridge/src/emqx_bridge_v2.erl b/apps/emqx_bridge/src/emqx_bridge_v2.erl index 10d597d36..e6feac7bd 100644 --- a/apps/emqx_bridge/src/emqx_bridge_v2.erl +++ b/apps/emqx_bridge/src/emqx_bridge_v2.erl @@ -1036,9 +1036,9 @@ import_config(RawConf) -> SourceRes = emqx_bridge:import_config( RawConf, <<"sources">>, ?ROOT_KEY_SOURCES, config_key_path_sources() ), - combine_import_results([ActionRes, SourceRes]). + group_import_results([ActionRes, SourceRes]). -combine_import_results(Results0) -> +group_import_results(Results0) -> Results = lists:foldr( fun ({ok, OkRes}, {OkAcc, ErrAcc}) -> diff --git a/apps/emqx_retainer/src/emqx_retainer.app.src b/apps/emqx_retainer/src/emqx_retainer.app.src index 248cc9310..4a8b3cdc3 100644 --- a/apps/emqx_retainer/src/emqx_retainer.app.src +++ b/apps/emqx_retainer/src/emqx_retainer.app.src @@ -2,7 +2,7 @@ {application, emqx_retainer, [ {description, "EMQX Retainer"}, % strict semver, bump manually! - {vsn, "5.0.21"}, + {vsn, "5.0.22"}, {modules, []}, {registered, [emqx_retainer_sup]}, {applications, [kernel, stdlib, emqx, emqx_ctl]}, diff --git a/apps/emqx_retainer/src/emqx_retainer_mnesia.erl b/apps/emqx_retainer/src/emqx_retainer_mnesia.erl index bdc1f2c67..7e2a73a09 100644 --- a/apps/emqx_retainer/src/emqx_retainer_mnesia.erl +++ b/apps/emqx_retainer/src/emqx_retainer_mnesia.erl @@ -17,6 +17,7 @@ -module(emqx_retainer_mnesia). -behaviour(emqx_retainer). +-behaviour(emqx_db_backup). -include("emqx_retainer.hrl"). -include_lib("emqx/include/logger.hrl"). @@ -54,6 +55,8 @@ -export([populate_index_meta/0]). -export([reindex/3]). +-export([backup_tables/0]). + -record(retained_message, {topic, msg, expiry_time}). -record(retained_index, {key, expiry_time}). -record(retained_index_meta, {key, read_indices, write_indices, reindexing, extra}). @@ -73,6 +76,12 @@ topics() -> [emqx_topic:join(I) || I <- mnesia:dirty_all_keys(?TAB_MESSAGE)]. +%%-------------------------------------------------------------------- +%% Data backup +%%-------------------------------------------------------------------- +backup_tables() -> + [?TAB_MESSAGE]. + %%-------------------------------------------------------------------- %% emqx_retainer callbacks %%-------------------------------------------------------------------- diff --git a/changes/ce/fix-12826.en.md b/changes/ce/fix-12826.en.md index 51255059d..28829cf87 100644 --- a/changes/ce/fix-12826.en.md +++ b/changes/ce/fix-12826.en.md @@ -1,18 +1,6 @@ -Cannot import `sources` from backup files. +Fixed an issue that prevented importing source data integrations and retained messages. -Before the fix, the following configs in backup files cannot be imported: +Before the fix: -``` -sources { - mqtt { - source_c384b174 { - connector = source_connector_c8287217 - enable = true - parameters { - qos = 0 - topic = "t/#" - } - } - } -} -``` +- source data integrations are ignored from the backup file +- importing the `mnesia` table for retained messages are not supported From 1c81c79a2cb5f6713e250a8b1f795845e01fef54 Mon Sep 17 00:00:00 2001 From: Shawn <506895667@qq.com> Date: Sun, 7 Apr 2024 17:24:26 +0800 Subject: [PATCH 079/234] chore: add testcase for importing retained msgs and sources --- .../test/emqx_mgmt_data_backup_SUITE.erl | 23 ++++++++++++++++++ ...emqx-export-4.4.24-retainer-mqttsub.tar.gz | Bin 0 -> 2352 bytes 2 files changed, 23 insertions(+) create mode 100644 apps/emqx_management/test/emqx_mgmt_data_backup_SUITE_data/emqx-export-4.4.24-retainer-mqttsub.tar.gz diff --git a/apps/emqx_management/test/emqx_mgmt_data_backup_SUITE.erl b/apps/emqx_management/test/emqx_mgmt_data_backup_SUITE.erl index 36a838743..e1d0a2512 100644 --- a/apps/emqx_management/test/emqx_mgmt_data_backup_SUITE.erl +++ b/apps/emqx_management/test/emqx_mgmt_data_backup_SUITE.erl @@ -18,6 +18,7 @@ -compile(export_all). -compile(nowarn_export_all). +-include_lib("emqx_utils/include/emqx_message.hrl"). -include_lib("eunit/include/eunit.hrl"). -include_lib("common_test/include/ct.hrl"). -include_lib("snabbkaffe/include/snabbkaffe.hrl"). @@ -86,6 +87,28 @@ t_empty_export_import(_Config) -> ?assertEqual(Exp, emqx_mgmt_data_backup:import(FileName)), ?assertEqual(ExpRawConf, emqx:get_raw_config([])). +t_cluster_hocon_import_mqtt_subscribers_retainer_messages(Config) -> + FNameEmqx44 = "emqx-export-4.4.24-retainer-mqttsub.tar.gz", + BackupFile = filename:join(?config(data_dir, Config), FNameEmqx44), + Exp = {ok, #{db_errors => #{}, config_errors => #{}}}, + ?assertEqual(Exp, emqx_mgmt_data_backup:import(BackupFile)), + RawConfAfterImport = emqx:get_raw_config([]), + %% verify that MQTT sources are imported + ?assertMatch( + #{<<"sources">> := #{<<"mqtt">> := Sources}} when map_size(Sources) > 0, + RawConfAfterImport + ), + %% verify that retainer messages are imported + ?assertMatch( + {ok, [#message{payload = <<"test-payload">>}]}, + emqx_retainer:read_message(<<"test-retained-message/1">>) + ), + %% Export and import again + {ok, #{filename := FileName}} = emqx_mgmt_data_backup:export(), + ?assertEqual(Exp, emqx_mgmt_data_backup:import(FileName)), + ?assertEqual(RawConfAfterImport, emqx:get_raw_config([])), + ok. + t_cluster_hocon_export_import(Config) -> RawConfBeforeImport = emqx:get_raw_config([]), BootstrapFile = filename:join(?config(data_dir, Config), ?BOOTSTRAP_BACKUP), diff --git a/apps/emqx_management/test/emqx_mgmt_data_backup_SUITE_data/emqx-export-4.4.24-retainer-mqttsub.tar.gz b/apps/emqx_management/test/emqx_mgmt_data_backup_SUITE_data/emqx-export-4.4.24-retainer-mqttsub.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..67133f19d7e2958933f5fef09d07ccbfd64883e5 GIT binary patch literal 2352 zcmV-03D5Q)iwFR|V-jWn1MONtY$Hh(ZO4wCbu!Gb(r8bhmS>ikLAKLvw{3Q$MYBNI zIcx;GGl+rN&?d;Ify!a07`O7fv880)(_e;=oFrR^kF2k>J7s zB(Ci8tGc>lcbqt$buw(|A1Utszv}<`|5v^GRW|PL?pN@>=Mul7RkdnOs}SrX%fY15 z-}QYuXs=5jlN3d1H5xMiY&Q9)Qq#guL9(hf6-{fZ&AKKlYO7UOB)Rc%4WEVv)JFt{ zVq1g#;R)@aS*LS?T=)@xenJSJBL9XBLBgcka}C#d3L6lQrluX6|31TRsS1 z$};td<#cb}8r`an^*F9Go#;)=^24bA)+_}Biiv~zSZH9c@?$yTjtvC3N2Lb@{t?{8 z`_N*RDZ{o{noMCTd<)EP-bFhIHN0D^I~R>Ybnco9+l-NSYl9A z%+krWU~h6-E(9`SeFxm1GGB+~l8I=q?IL29M&&qt<%gx~_r7&+{aR^jR6W{L;7duZ zwW+Af)R7y0xT7eEZhLDkkt@xNAQ2EI4Z&> z?O?-q3FY9-c5#8yA$IF=1D(nAcswKq^ER=}F4p_5$t@t8MvxvY8{H$gW9@@9)lkS;ChJCz$GT%5h9-J+B&r!v6gDYBd+i2doZ4tYO!9(V%T^r9g&FIumYm3i90e9q6iHRN5do|2w+qI@o=<}v4b5`gy!fj*GK!h z=wRx7OeyN}Jx>WE4p9b%E^_H+S2x?y7k$)?>%CHM%-RL4#ihRt0Ix9ZTza8`tKg||CynncAz zR15G)8$W@Zbm;_x%>nKP3;Oi~@iHn+G}(1&D29iGC=7Hc4WAPYIHrVh{+JN6oWsm@ z0d0H>3o{TU&ZE^E-vD{#>!EyjW1{nD?71^=3{fKyEP`B2Hrx`QCT3ytMElYkQ3pAm zEFI$Z<;~ac-;awc-<56GfMkXk8I5CFjYW6uWQBEa8^a*b()M(;9;bjA9nPoZyCsU~OQwJ;?{*1W?OyHkq1A zn9qr!KI`K70?=vBf14ZkUwP`opEUFQrzrJS;{2yJlzMvpI}gI9C6Ec@BJ846z*M8! zWV`6I9$rdkOs84@eFswuJ^eM{-1mPqwWet4{oi@e9QkKWb>w+-+n9m>s;V@T_kXRr zmg4_;P-ZEcGahWN!sFfcH(z_@d%b*6wB4@W!^p%WD6;dV z9M^&4;T^~Axzz7v+me+3Sz9W+daW(3?S2-FMGf)JG-MKEU~R7OHm+k2$2u1I!3hqYm>`KM zLIxDLlpk-w2so+-#PTtSR{Ch$h38?g%nyseTDBY$?=w9)>hwMKaETY^tj_h}=zG|7 zm0$zR=ttZtBHxD!c;JU4F9^>I$g}hvJPaWakan-LYWoy{xk$y}6zH#jpSQyBD05%zGbx z#HfIhQHD+hQ71sJCA5U3?d@kfhf{ibniu~gVRY4T8XGh4zYd_E!2gEUYNYso9&|SN z|NYN_=D#NK|LW8kG`Sf}Zxinfs~p7v{Vu`|aV2?gcyy=82Fv{3Bgpn?H&_*SeZJMl z3kUyS{ro`${$KphoZ$a{nCjdKDPHQszZ|>oW)Fonad$w`cCQ!|L@UO>BIU0>{+D(4 zC;z@9NyRsp|AHj3krv8VcWhp750@rXiE*T;rGQ!XRUOiYdGk z-xWJ~>HQzP{mXYUOoN1&n#^o}a#Q`8qMor=+h(+;~Kok}nh-mg46NguSB;xR=1;pXnNpTS4VC`NI`yR5r9;((V zTBlxnNo#6T!l9mWo?nIK>qi@@Ss8OQqfT}G4|ji9V6LydEoD%Kv9qeG3dhZXClKVQ z=lNiTe_dj@c93HmHoRj@_+rhWCkVz3t9T{)vV&<}R-3if7lP;FuUSwo8NQ%+h!os= z9;c~Jt@FIDahbT!hrq3U@MJu2DgCd~y!BrjIgtGMY|J|UE1Ht1|LU5er1jr<(Am^~ zdl}ID+hqN>IyQqQtG;o`#kvK^$4(_GyX7Do^xR;nJ!GJcsOtnvaJ*k0IKE{sytw-y z|Cy+``%ZeYPbsE(@!z96bK97W|H<$FYOO|k{y!Hw8~nF_3YvdE{r(qb(6swsU|-Br z{&V1&4q`H#cD&CssbvP2Xb*)RvrFJ$8Snc9EqojJ_l`G5hrp7dRqUV3!M%AKY0f<|2>KSpFT2!CO3kk@(a2na7eeDj(c=R$OVN^ z3R^<-d)e?S!5l@lAFNqarvm_Z)@tGV!^%It5*-tMJntQFN}oiU8~+V^ZW}Y}|620< zzgkPJrTBjybT;_^2;MU99`pMT!=C#455vBgXg}^=0_>N Date: Sun, 7 Apr 2024 17:26:15 +0800 Subject: [PATCH 080/234] fix(iotdb): correctly handle undefined value of bool type --- apps/emqx_bridge_iotdb/src/emqx_bridge_iotdb_connector.erl | 1 + 1 file changed, 1 insertion(+) diff --git a/apps/emqx_bridge_iotdb/src/emqx_bridge_iotdb_connector.erl b/apps/emqx_bridge_iotdb/src/emqx_bridge_iotdb_connector.erl index f68ed02e3..92316f0cf 100644 --- a/apps/emqx_bridge_iotdb/src/emqx_bridge_iotdb_connector.erl +++ b/apps/emqx_bridge_iotdb/src/emqx_bridge_iotdb_connector.erl @@ -526,6 +526,7 @@ replace_var(Val, _Data) -> convert_bool(B) when is_boolean(B) -> B; convert_bool(null) -> null; +convert_bool(undefined) -> null; convert_bool(1) -> true; convert_bool(0) -> false; convert_bool(<<"1">>) -> true; From e89dc32c907f88bd872979ea653a61d9bcfb48a0 Mon Sep 17 00:00:00 2001 From: Shawn <506895667@qq.com> Date: Sun, 7 Apr 2024 18:23:59 +0800 Subject: [PATCH 081/234] ci: run emqx_management both with ee and ce profile --- .../test/emqx_mgmt_data_backup_SUITE.erl | 43 +++++++++++-------- scripts/find-apps.sh | 4 ++ 2 files changed, 28 insertions(+), 19 deletions(-) diff --git a/apps/emqx_management/test/emqx_mgmt_data_backup_SUITE.erl b/apps/emqx_management/test/emqx_mgmt_data_backup_SUITE.erl index e1d0a2512..fee392479 100644 --- a/apps/emqx_management/test/emqx_mgmt_data_backup_SUITE.erl +++ b/apps/emqx_management/test/emqx_mgmt_data_backup_SUITE.erl @@ -88,25 +88,30 @@ t_empty_export_import(_Config) -> ?assertEqual(ExpRawConf, emqx:get_raw_config([])). t_cluster_hocon_import_mqtt_subscribers_retainer_messages(Config) -> - FNameEmqx44 = "emqx-export-4.4.24-retainer-mqttsub.tar.gz", - BackupFile = filename:join(?config(data_dir, Config), FNameEmqx44), - Exp = {ok, #{db_errors => #{}, config_errors => #{}}}, - ?assertEqual(Exp, emqx_mgmt_data_backup:import(BackupFile)), - RawConfAfterImport = emqx:get_raw_config([]), - %% verify that MQTT sources are imported - ?assertMatch( - #{<<"sources">> := #{<<"mqtt">> := Sources}} when map_size(Sources) > 0, - RawConfAfterImport - ), - %% verify that retainer messages are imported - ?assertMatch( - {ok, [#message{payload = <<"test-payload">>}]}, - emqx_retainer:read_message(<<"test-retained-message/1">>) - ), - %% Export and import again - {ok, #{filename := FileName}} = emqx_mgmt_data_backup:export(), - ?assertEqual(Exp, emqx_mgmt_data_backup:import(FileName)), - ?assertEqual(RawConfAfterImport, emqx:get_raw_config([])), + case emqx_release:edition() of + ce -> + ok; + ee -> + FNameEmqx44 = "emqx-export-4.4.24-retainer-mqttsub.tar.gz", + BackupFile = filename:join(?config(data_dir, Config), FNameEmqx44), + Exp = {ok, #{db_errors => #{}, config_errors => #{}}}, + ?assertEqual(Exp, emqx_mgmt_data_backup:import(BackupFile)), + RawConfAfterImport = emqx:get_raw_config([]), + %% verify that MQTT sources are imported + ?assertMatch( + #{<<"sources">> := #{<<"mqtt">> := Sources}} when map_size(Sources) > 0, + RawConfAfterImport + ), + %% verify that retainer messages are imported + ?assertMatch( + {ok, [#message{payload = <<"test-payload">>}]}, + emqx_retainer:read_message(<<"test-retained-message/1">>) + ), + %% Export and import again + {ok, #{filename := FileName}} = emqx_mgmt_data_backup:export(), + ?assertEqual(Exp, emqx_mgmt_data_backup:import(FileName)), + ?assertEqual(RawConfAfterImport, emqx:get_raw_config([])) + end, ok. t_cluster_hocon_export_import(Config) -> diff --git a/scripts/find-apps.sh b/scripts/find-apps.sh index 89f0a66e5..908f22d9c 100755 --- a/scripts/find-apps.sh +++ b/scripts/find-apps.sh @@ -101,6 +101,10 @@ matrix() { entries+=("$(format_app_entry "$app" 1 emqx "$runner")") entries+=("$(format_app_entry "$app" 1 emqx-enterprise "$runner")") ;; + apps/emqx_management) + entries+=("$(format_app_entry "$app" 1 emqx "$runner")") + entries+=("$(format_app_entry "$app" 1 emqx-enterprise "$runner")") + ;; apps/*) if [[ -f "${app}/BSL.txt" ]]; then profile='emqx-enterprise' From 826ce5806ddfdd2cd27740477370e1bf7cb75092 Mon Sep 17 00:00:00 2001 From: Andrew Mayorov Date: Sun, 7 Apr 2024 22:31:24 +0200 Subject: [PATCH 082/234] fix(dsrepl): ensure that new member UID matches server's UID Before that change, UIDs supplied in the `ra:add_member/3` were not the same as those servers were using. This haven't caused any issues for some reason, but it's better to ensure that UIDs are the same. --- .../src/emqx_ds_replication_layer_shard.erl | 56 ++++++++++++------- 1 file changed, 36 insertions(+), 20 deletions(-) diff --git a/apps/emqx_durable_storage/src/emqx_ds_replication_layer_shard.erl b/apps/emqx_durable_storage/src/emqx_ds_replication_layer_shard.erl index a57e45dfd..5dbeafdb2 100644 --- a/apps/emqx_durable_storage/src/emqx_ds_replication_layer_shard.erl +++ b/apps/emqx_durable_storage/src/emqx_ds_replication_layer_shard.erl @@ -80,14 +80,6 @@ server_name(DB, Shard, Site) -> DBBin = atom_to_binary(DB), binary_to_atom(<<"ds_", DBBin/binary, Shard/binary, "_", Site/binary>>). -server_uid(_DB, Shard) -> - %% NOTE - %% Each new "instance" of a server should have a unique identifier. Otherwise, - %% if some server migrates to another node during rebalancing, and then comes - %% back, `ra` will be very confused by it having the same UID as before. - Ts = integer_to_binary(erlang:system_time(microsecond)), - <>. - %% servers(DB, Shard, _Order = leader_preferred) -> @@ -159,11 +151,19 @@ add_local_server(DB, Shard) -> %% readiness. ShardServers = shard_servers(DB, Shard), LocalServer = local_server(DB, Shard), - ServerRecord = #{ - id => LocalServer, - membership => promotable, - uid => server_uid(DB, Shard) - }, + case server_info(uid, LocalServer) of + UID when is_binary(UID) -> + ServerRecord = #{ + id => LocalServer, + membership => promotable, + uid => UID + }; + unknown -> + ServerRecord = #{ + id => LocalServer, + membership => voter + } + end, case ra:add_member(ShardServers, ServerRecord, ?MEMBERSHIP_CHANGE_TIMEOUT) of {ok, _, _Leader} -> ok; @@ -206,15 +206,13 @@ server_info(readiness, Server) -> unknown end; server_info(leader, Server) -> - current_leader(Server). + current_leader(Server); +server_info(uid, Server) -> + maps:get(uid, ra_overview(Server), unknown). member_info(readiness, Server, Leader) -> - case ra:member_overview(Leader) of - {ok, #{cluster := Cluster}, _} -> - member_readiness(maps:get(Server, Cluster)); - _Error -> - unknown - end. + Cluster = maps:get(cluster, ra_overview(Leader), #{}), + member_readiness(maps:get(Server, Cluster, #{})). current_leader(Server) -> case ra:members(Server) of @@ -234,6 +232,14 @@ member_readiness(#{status := Status, voter_status := #{membership := Membership} member_readiness(#{}) -> unknown. +ra_overview(Server) -> + case ra:member_overview(Server) of + {ok, Overview, _Leader} -> + Overview; + _Error -> + #{} + end. + %% init({DB, Shard, Opts}) -> @@ -305,6 +311,16 @@ start_shard(DB, Shard, #{replication_options := ReplicationOpts}) -> ok end. +server_uid(_DB, Shard) -> + %% NOTE + %% Each new "instance" of a server should have a unique identifier. Otherwise, + %% if some server migrates to another node during rebalancing, and then comes + %% back, `ra` will be very confused by it having the same UID as before. + %% Keeping the shard ID as a prefix to make it easier to identify the server + %% in the filesystem / logs / etc. + Ts = integer_to_binary(erlang:system_time(microsecond)), + <>. + %% memoize(Fun, Args) -> From 6293efb995675d0470d4178b9916e26149d2ad70 Mon Sep 17 00:00:00 2001 From: Andrew Mayorov Date: Sun, 7 Apr 2024 22:35:14 +0200 Subject: [PATCH 083/234] fix(dsrepl): retry crashed membership transitions --- .../src/emqx_ds_replication_layer_meta.erl | 11 +- .../emqx_ds_replication_shard_allocator.erl | 160 ++++++++++++------ 2 files changed, 120 insertions(+), 51 deletions(-) diff --git a/apps/emqx_durable_storage/src/emqx_ds_replication_layer_meta.erl b/apps/emqx_durable_storage/src/emqx_ds_replication_layer_meta.erl index f27fa414e..31ed62fcb 100644 --- a/apps/emqx_durable_storage/src/emqx_ds_replication_layer_meta.erl +++ b/apps/emqx_durable_storage/src/emqx_ds_replication_layer_meta.erl @@ -79,7 +79,12 @@ n_shards/1 ]). --export_type([site/0, update_cluster_result/0]). +-export_type([ + site/0, + transition/0, + subscription_event/0, + update_cluster_result/0 +]). -include_lib("stdlib/include/qlc.hrl"). -include_lib("stdlib/include/ms_transform.hrl"). @@ -134,6 +139,10 @@ %% Subject of the subscription: -type subject() :: emqx_ds:db(). +%% Event for the subscription: +-type subscription_event() :: + {changed, {shard, emqx_ds:db(), emqx_ds_replication_layer:shard_id()}}. + %% Peristent term key: -define(emqx_ds_builtin_site, emqx_ds_builtin_site). diff --git a/apps/emqx_durable_storage/src/emqx_ds_replication_shard_allocator.erl b/apps/emqx_durable_storage/src/emqx_ds_replication_shard_allocator.erl index 4113fcedc..363a453d6 100644 --- a/apps/emqx_durable_storage/src/emqx_ds_replication_shard_allocator.erl +++ b/apps/emqx_durable_storage/src/emqx_ds_replication_shard_allocator.erl @@ -40,6 +40,7 @@ -define(ALLOCATE_RETRY_TIMEOUT, 1_000). -define(TRANS_RETRY_TIMEOUT, 5_000). +-define(CRASH_RETRY_DELAY, 20_000). -define(REMOVE_REPLICA_DELAY, {10_000, 5_000}). -ifdef(TEST). @@ -51,9 +52,11 @@ %% +-spec start_link(emqx_ds:db()) -> {ok, pid()}. start_link(DB) -> gen_server:start_link(?MODULE, DB, []). +-spec n_shards(emqx_ds:db()) -> non_neg_integer(). n_shards(DB) -> Meta = persistent_term:get(?db_meta(DB)), maps:get(n_shards, Meta). @@ -63,18 +66,44 @@ shard_meta(DB, Shard) -> %% +-record(transhdl, { + shard :: emqx_ds_replication_layer:shard_id(), + trans :: emqx_ds_replication_layer_meta:transition(), + pid :: pid() +}). + +-type state() :: #{ + db := emqx_ds:db(), + shards := [emqx_ds_replication_layer:shard_id()], + status := allocating | ready, + transitions := #{_Track => #transhdl{}} +}. + +-spec init(emqx_ds:db()) -> {ok, state()}. init(DB) -> _ = erlang:process_flag(trap_exit, true), _ = logger:set_process_metadata(#{db => DB, domain => [emqx, ds, DB, shard_allocator]}), - State = #{db => DB, transitions => #{}, status => allocating}, + State = #{ + db => DB, + shards => [], + status => allocating, + transitions => #{} + }, {ok, handle_allocate_shards(State)}. +-spec handle_call(_Call, _From, state()) -> {reply, ignored, state()}. handle_call(_Call, _From, State) -> {reply, ignored, State}. +-spec handle_cast(_Cast, state()) -> {noreply, state()}. handle_cast(_Cast, State) -> {noreply, State}. +-spec handle_info(Info, state()) -> {noreply, state()} when + Info :: + emqx_ds_replication_layer_meta:subscription_event() + | {timeout, reference(), allocate} + | {'EXIT', pid(), _Reason}. handle_info({timeout, _TRef, allocate}, State) -> {noreply, handle_allocate_shards(State)}; handle_info({changed, {shard, DB, Shard}}, State = #{db := DB}) -> @@ -86,6 +115,7 @@ handle_info({'EXIT', Pid, Reason}, State) -> handle_info(_Info, State) -> {noreply, State}. +-spec terminate(_Reason, state()) -> _Ok. terminate(_Reason, State = #{db := DB, shards := Shards}) -> unsubscribe_db_changes(State), erase_db_meta(DB), @@ -121,38 +151,55 @@ unsubscribe_db_changes(_State) -> handle_shard_changed(Shard, State = #{db := DB}) -> ok = save_shard_meta(DB, Shard), - Transitions = emqx_ds_replication_layer_meta:replica_set_transitions(DB, Shard), - handle_shard_transitions(Shard, Transitions, State). + handle_shard_transitions(Shard, next_transitions(DB, Shard), State). -handle_shard_transitions(Shard, Transitions, State = #{db := DB}) -> +next_transitions(DB, Shard) -> + emqx_ds_replication_layer_meta:replica_set_transitions(DB, Shard). + +handle_shard_transitions(_Shard, [], State) -> + %% We reached the target allocation. + State; +handle_shard_transitions(Shard, [Trans | _Rest], State) -> + case transition_handler(Shard, Trans, State) of + {Track, Handler} -> + ensure_transition_handler(Track, Shard, Trans, Handler, State); + undefined -> + State + end. + +transition_handler(Shard, Trans, _State = #{db := DB}) -> ThisSite = emqx_ds_replication_layer_meta:this_site(), - case Transitions of - [] -> - %% We reached the target allocation. - State; - [Trans = {add, ThisSite} | _Rest] -> - ensure_transition_handler(Shard, Trans, fun trans_add_local/3, State); - [Trans = {del, ThisSite} | _Rest] -> - ensure_transition_handler(Shard, Trans, fun trans_drop_local/3, State); - [Trans = {del, Site} | _Rest] -> + case Trans of + {add, ThisSite} -> + {Shard, fun trans_add_local/3}; + {del, ThisSite} -> + {Shard, fun trans_drop_local/3}; + {del, Site} -> ReplicaSet = emqx_ds_replication_layer_meta:replica_set(DB, Shard), case lists:member(Site, ReplicaSet) of true -> + %% NOTE + %% Let the replica handle its own removal first, but still set + %% up a removal handler after a delay, in case the replica is + %% unresponsive. + Handler = {fun trans_delay/5, [ + ?REMOVE_REPLICA_DELAY, + fun trans_rm_unresponsive/3 + ]}, %% NOTE %% Putting this transition handler on separate "track" so that it %% won't block any changes with higher priority (e.g. managing %% local replicas). - Handler = fun trans_rm_unresponsive/3, - ensure_transition_handler(unresp, Shard, Trans, Handler, State); + {_Track = unresp, Handler}; false -> - State + undefined end; - [_Trans | _Rest] -> + _NotOurs -> %% This site is not involved in the next queued transition. - State + undefined end. -handle_transition(DB, Shard, Trans, Fun) -> +handle_transition(DB, Shard, Trans, Handler) -> logger:set_process_metadata(#{ db => DB, shard => Shard, @@ -162,6 +209,11 @@ handle_transition(DB, Shard, Trans, Fun) -> dsrepl_shard_transition_begin, #{shard => Shard, db => DB, transition => Trans, pid => self()} ), + apply_handler(Handler, DB, Shard, Trans). + +apply_handler({Fun, Args}, DB, Shard, Trans) -> + erlang:apply(Fun, [DB, Shard, Trans | Args]); +apply_handler(Fun, DB, Shard, Trans) -> erlang:apply(Fun, [DB, Shard, Trans]). trans_add_local(DB, Shard, {add, Site}) -> @@ -217,18 +269,9 @@ do_drop_local(DB, Shard) -> do_drop_local(DB, Shard) end. -trans_rm_unresponsive(DB, Shard, Trans = {del, Site}) -> - %% NOTE - %% Let the replica handle its own removal first, thus the delay. - ok = delay(?REMOVE_REPLICA_DELAY), - Transitions = emqx_ds_replication_layer_meta:replica_set_transitions(DB, Shard), - case Transitions of - [Trans | _] -> - logger:info(#{msg => "Removing unresponsive shard replica", site => Site}), - do_rm_unresponsive(DB, Shard, Site); - _Outdated -> - exit({shutdown, skipped}) - end. +trans_rm_unresponsive(DB, Shard, {del, Site}) -> + logger:info(#{msg => "Removing unresponsive shard replica", site => Site}), + do_rm_unresponsive(DB, Shard, Site). do_rm_unresponsive(DB, Shard, Site) -> Server = emqx_ds_replication_layer_shard:shard_server(DB, Shard, Site), @@ -245,16 +288,23 @@ do_rm_unresponsive(DB, Shard, Site) -> do_rm_unresponsive(DB, Shard, Site) end. -%% +trans_delay(DB, Shard, Trans, Delay, NextHandler) -> + ok = delay(Delay), + case next_transitions(DB, Shard) of + [Trans | _] -> + apply_handler(NextHandler, DB, Shard, Trans); + _Outdated -> + exit({shutdown, skipped}) + end. -ensure_transition_handler(Shard, Trans, Handler, State) -> - ensure_transition_handler(Shard, Shard, Trans, Handler, State). +%% ensure_transition_handler(Track, Shard, Trans, Handler, State = #{transitions := Ts}) -> case maps:get(Track, Ts, undefined) of undefined -> Pid = start_transition_handler(Shard, Trans, Handler, State), - State#{transitions := Ts#{Track => {Shard, Trans, Pid}}}; + Record = #transhdl{shard = Shard, trans = Trans, pid = Pid}, + State#{transitions := Ts#{Track => Record}}; _AlreadyRunning -> %% NOTE: Avoiding multiple transition handlers for the same shard for safety. State @@ -263,34 +313,42 @@ ensure_transition_handler(Track, Shard, Trans, Handler, State = #{transitions := start_transition_handler(Shard, Trans, Handler, #{db := DB}) -> proc_lib:spawn_link(?MODULE, handle_transition, [DB, Shard, Trans, Handler]). -handle_exit(Pid, Reason, State = #{db := DB, transitions := Ts}) -> - case maps:to_list(maps:filter(fun(_, {_S, _T, P}) -> P == Pid end, Ts)) of - [{Track, {Shard, Trans, Pid}}] -> +handle_exit(Pid, Reason, State0 = #{db := DB, transitions := Ts}) -> + case maps:to_list(maps:filter(fun(_, TH) -> TH#transhdl.pid == Pid end, Ts)) of + [{Track, #transhdl{shard = Shard, trans = Trans}}] -> ?tp( dsrepl_shard_transition_end, #{shard => Shard, db => DB, transition => Trans, pid => Pid, reason => Reason} ), - ok = handle_transition_exit(Shard, Trans, Reason, State), - State#{transitions := maps:remove(Track, Ts)}; + State = State0#{transitions := maps:remove(Track, Ts)}, + handle_transition_exit(Shard, Trans, Reason, State); [] -> logger:warning(#{msg => "Unexpected exit signal", pid => Pid, reason => Reason}), - State + State0 end. -handle_transition_exit(Shard, Trans, normal, _State = #{db := DB}) -> +handle_transition_exit(Shard, Trans, normal, State = #{db := DB}) -> %% NOTE: This will trigger the next transition if any. - ok = emqx_ds_replication_layer_meta:update_replica_set(DB, Shard, Trans); -handle_transition_exit(_Shard, _Trans, {shutdown, skipped}, _State) -> - ok; -handle_transition_exit(Shard, Trans, Reason, _State) -> + ok = emqx_ds_replication_layer_meta:update_replica_set(DB, Shard, Trans), + State; +handle_transition_exit(_Shard, _Trans, {shutdown, skipped}, State) -> + State; +handle_transition_exit(Shard, Trans, Reason, State) -> + %% NOTE + %% In case of `{add, Site}` transition failure, we have no choice but to retry: + %% no other node can perform the transition and make progress towards the desired + %% state. For simplicity, we retry any crashed transition handler after a fixed + %% delay. logger:warning(#{ msg => "Shard membership transition failed", shard => Shard, transition => Trans, - reason => Reason + reason => Reason, + retry_in => ?CRASH_RETRY_DELAY }), - %% FIXME: retry - ok. + {Track, Handler} = transition_handler(Shard, Trans, State), + RetryHandler = {fun trans_delay/5, [?CRASH_RETRY_DELAY, Handler]}, + ensure_transition_handler(Track, Shard, Trans, RetryHandler, State). %% @@ -352,4 +410,6 @@ erase_shard_meta(DB, Shard) -> %% delay({MinDelay, Variance}) -> - timer:sleep(MinDelay + rand:uniform(Variance)). + timer:sleep(MinDelay + rand:uniform(Variance)); +delay(Delay) -> + timer:sleep(Delay). From ecaad348a754f37c25b92580488448d15fd999aa Mon Sep 17 00:00:00 2001 From: Andrew Mayorov Date: Sun, 7 Apr 2024 22:41:44 +0200 Subject: [PATCH 084/234] chore(dsrepl): update few outdated comments / TODOs --- .../src/emqx_ds_replication_layer_shard.erl | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/apps/emqx_durable_storage/src/emqx_ds_replication_layer_shard.erl b/apps/emqx_durable_storage/src/emqx_ds_replication_layer_shard.erl index 5dbeafdb2..c2828f31f 100644 --- a/apps/emqx_durable_storage/src/emqx_ds_replication_layer_shard.erl +++ b/apps/emqx_durable_storage/src/emqx_ds_replication_layer_shard.erl @@ -98,13 +98,11 @@ get_servers_leader_preferred(DB, Shard) -> Servers = ra_leaderboard:lookup_members(ClusterName), [Leader | lists:delete(Leader, Servers)]; undefined -> - %% TODO: Dynamic membership. get_shard_servers(DB, Shard) end. get_server_local_preferred(DB, Shard) -> - %% NOTE: Contact random replica that is not a known leader. - %% TODO: Replica may be down, so we may need to retry. + %% NOTE: Contact either local server or a random replica. ClusterName = get_cluster_name(DB, Shard), case ra_leaderboard:lookup_members(ClusterName) of Servers when is_list(Servers) -> @@ -113,15 +111,14 @@ get_server_local_preferred(DB, Shard) -> %% TODO %% Leader is unkonwn if there are no servers of this group on the %% local node. We want to pick a replica in that case as well. - %% TODO: Dynamic membership. pick_random(get_shard_servers(DB, Shard)) end. pick_local(Servers) -> - case lists:dropwhile(fun({_Name, Node}) -> Node =/= node() end, Servers) of - [Local | _] -> + case lists:keyfind(node(), 2, Servers) of + Local when is_tuple(Local) -> Local; - [] -> + false -> pick_random(Servers) end. @@ -215,6 +212,7 @@ member_info(readiness, Server, Leader) -> member_readiness(maps:get(Server, Cluster, #{})). current_leader(Server) -> + %% NOTE: This call will block until the leader is known, or until the timeout. case ra:members(Server) of {ok, _Servers, Leader} -> Leader; From 2ace9bb893d75325cbf970ebb31b1d4dc878712d Mon Sep 17 00:00:00 2001 From: Andrew Mayorov Date: Sun, 7 Apr 2024 22:50:36 +0200 Subject: [PATCH 085/234] chore(dsrepl): sprinkle few comments and typespecs for exports --- .../src/emqx_ds_replication_layer_shard.erl | 20 +++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/apps/emqx_durable_storage/src/emqx_ds_replication_layer_shard.erl b/apps/emqx_durable_storage/src/emqx_ds_replication_layer_shard.erl index c2828f31f..8f87b69b4 100644 --- a/apps/emqx_durable_storage/src/emqx_ds_replication_layer_shard.erl +++ b/apps/emqx_durable_storage/src/emqx_ds_replication_layer_shard.erl @@ -139,6 +139,11 @@ local_site() -> %% +%% @doc Add a local server to the shard cluster. +%% It's recommended to have the local server running before calling this function. +%% This function is idempotent. +-spec add_local_server(emqx_ds:db(), emqx_ds_replication_layer:shard_id()) -> + ok | emqx_ds:error(_Reason). add_local_server(DB, Shard) -> %% NOTE %% Adding local server as "promotable" member to the cluster, which means @@ -170,6 +175,11 @@ add_local_server(DB, Shard) -> {error, recoverable, Reason} end. +%% @doc Remove a local server from the shard cluster and clean up on-disk data. +%% It's required to have the local server running before calling this function. +%% This function is idempotent. +-spec drop_local_server(emqx_ds:db(), emqx_ds_replication_layer:shard_id()) -> + ok | emqx_ds:error(_Reason). drop_local_server(DB, Shard) -> LocalServer = local_server(DB, Shard), case remove_server(DB, Shard, LocalServer) of @@ -179,6 +189,12 @@ drop_local_server(DB, Shard) -> Error end. +%% @doc Remove a (remote) server from the shard cluster. +%% The server might not be running when calling this function, e.g. the node +%% might be offline. Because of this, on-disk data will not be cleaned up. +%% This function is idempotent. +-spec remove_server(emqx_ds:db(), emqx_ds_replication_layer:shard_id(), server()) -> + ok | emqx_ds:error(_Reason). remove_server(DB, Shard, Server) -> ShardServers = shard_servers(DB, Shard), case ra:remove_member(ShardServers, Server, ?MEMBERSHIP_CHANGE_TIMEOUT) of @@ -190,6 +206,10 @@ remove_server(DB, Shard, Server) -> {error, recoverable, Reason} end. +-spec server_info + (readiness, server()) -> ready | {unready, _Status, _Membership} | unknown; + (leader, server()) -> server() | unknown; + (uid, server()) -> _UID :: binary() | unknown. server_info(readiness, Server) -> %% NOTE %% Server is ready if it's either the leader or a follower with voter "membership" From 282cbb18beae024f935a3aa5d3280ff79ff1957b Mon Sep 17 00:00:00 2001 From: JimMoen Date: Wed, 3 Apr 2024 17:35:49 +0800 Subject: [PATCH 086/234] fix: cpu usage and idle use two decimal places - prometheus - opentelemetry --- apps/emqx_management/src/emqx_mgmt.erl | 4 +++- apps/emqx_opentelemetry/src/emqx_opentelemetry.app.src | 2 +- apps/emqx_opentelemetry/src/emqx_otel_cpu_sup.erl | 4 +++- changes/fix-12844.en.md | 1 + 4 files changed, 8 insertions(+), 3 deletions(-) create mode 100644 changes/fix-12844.en.md diff --git a/apps/emqx_management/src/emqx_mgmt.erl b/apps/emqx_management/src/emqx_mgmt.erl index 35908d3bd..e46658619 100644 --- a/apps/emqx_management/src/emqx_mgmt.erl +++ b/apps/emqx_management/src/emqx_mgmt.erl @@ -221,7 +221,9 @@ vm_stats('cpu') -> case emqx_vm:cpu_util([CpuUtilArg]) of %% return 0.0 when `emqx_cpu_sup_worker` is not started {all, Use, Idle, _} -> - [{cpu_use, Use}, {cpu_idle, Idle}]; + NUse = floor(Use * 100) / 100, + NIdle = ceil(Idle * 100) / 100, + [{cpu_use, NUse}, {cpu_idle, NIdle}]; _ -> [{cpu_use, 0}, {cpu_idle, 0}] end; diff --git a/apps/emqx_opentelemetry/src/emqx_opentelemetry.app.src b/apps/emqx_opentelemetry/src/emqx_opentelemetry.app.src index 81631b03a..cb7c7d32a 100644 --- a/apps/emqx_opentelemetry/src/emqx_opentelemetry.app.src +++ b/apps/emqx_opentelemetry/src/emqx_opentelemetry.app.src @@ -1,6 +1,6 @@ {application, emqx_opentelemetry, [ {description, "OpenTelemetry for EMQX Broker"}, - {vsn, "0.2.4"}, + {vsn, "0.2.5"}, {registered, []}, {mod, {emqx_otel_app, []}}, {applications, [ diff --git a/apps/emqx_opentelemetry/src/emqx_otel_cpu_sup.erl b/apps/emqx_opentelemetry/src/emqx_otel_cpu_sup.erl index fc67831be..54f88ad99 100644 --- a/apps/emqx_opentelemetry/src/emqx_otel_cpu_sup.erl +++ b/apps/emqx_opentelemetry/src/emqx_otel_cpu_sup.erl @@ -119,7 +119,9 @@ code_change(_OldVsn, State, _Extra) -> refresh(#{interval := Interval} = State) -> NState = case cpu_sup:util([]) of - {all, U, I, _} -> + {all, Use, Idle, _} -> + U = floor(Use * 100) / 100, + I = ceil(Idle * 100) / 100, State#{'cpu.use' => U, 'cpu.idle' => I}; _ -> State#{'cpu.use' => 0, 'cpu.idle' => 0} diff --git a/changes/fix-12844.en.md b/changes/fix-12844.en.md new file mode 100644 index 000000000..851c877ac --- /dev/null +++ b/changes/fix-12844.en.md @@ -0,0 +1 @@ +CPU usage/idle statistics values are only retained with 2 decimal precision. This affects Prometheus statistical metrics and OpenTelemetry governance metrics. From 79440064fe9b1d6ab47a3bf0d8c001e268761319 Mon Sep 17 00:00:00 2001 From: Kjell Winblad Date: Mon, 8 Apr 2024 11:03:55 +0200 Subject: [PATCH 087/234] style: fix problems reported by elvis --- .../src/emqx_rule_api_schema.erl | 67 +++++++------------ .../src/emqx_rule_runtime.erl | 2 +- 2 files changed, 26 insertions(+), 43 deletions(-) diff --git a/apps/emqx_rule_engine/src/emqx_rule_api_schema.erl b/apps/emqx_rule_engine/src/emqx_rule_api_schema.erl index a24ef5bd0..4058d2f83 100644 --- a/apps/emqx_rule_engine/src/emqx_rule_api_schema.erl +++ b/apps/emqx_rule_engine/src/emqx_rule_api_schema.erl @@ -102,52 +102,12 @@ fields("rule_events") -> ]; fields("rule_test") -> [ - {"context", - sc( - hoconsc:union([ - ref("ctx_pub"), - ref("ctx_sub"), - ref("ctx_unsub"), - ref("ctx_delivered"), - ref("ctx_acked"), - ref("ctx_dropped"), - ref("ctx_connected"), - ref("ctx_disconnected"), - ref("ctx_connack"), - ref("ctx_check_authz_complete"), - ref("ctx_bridge_mqtt"), - ref("ctx_delivery_dropped") - ]), - #{ - desc => ?DESC("test_context"), - default => #{} - } - )}, + rule_input_message_context(), {"sql", sc(binary(), #{desc => ?DESC("test_sql"), required => true})} ]; fields("rule_apply_test") -> [ - {"context", - sc( - hoconsc:union([ - ref("ctx_pub"), - ref("ctx_sub"), - ref("ctx_unsub"), - ref("ctx_delivered"), - ref("ctx_acked"), - ref("ctx_dropped"), - ref("ctx_connected"), - ref("ctx_disconnected"), - ref("ctx_connack"), - ref("ctx_check_authz_complete"), - ref("ctx_bridge_mqtt"), - ref("ctx_delivery_dropped") - ]), - #{ - desc => ?DESC("test_context"), - default => #{} - } - )}, + rule_input_message_context(), {"environment", sc( typerefl:map(), @@ -358,6 +318,29 @@ fields("ctx_delivery_dropped") -> | msg_event_common_fields() ]. +rule_input_message_context() -> + {"context", + sc( + hoconsc:union([ + ref("ctx_pub"), + ref("ctx_sub"), + ref("ctx_unsub"), + ref("ctx_delivered"), + ref("ctx_acked"), + ref("ctx_dropped"), + ref("ctx_connected"), + ref("ctx_disconnected"), + ref("ctx_connack"), + ref("ctx_check_authz_complete"), + ref("ctx_bridge_mqtt"), + ref("ctx_delivery_dropped") + ]), + #{ + desc => ?DESC("test_context"), + default => #{} + } + )}. + qos() -> {"qos", sc(emqx_schema:qos(), #{desc => ?DESC("event_qos")})}. diff --git a/apps/emqx_rule_engine/src/emqx_rule_runtime.erl b/apps/emqx_rule_engine/src/emqx_rule_runtime.erl index f2c89e8ed..f90b5a974 100644 --- a/apps/emqx_rule_engine/src/emqx_rule_runtime.erl +++ b/apps/emqx_rule_engine/src/emqx_rule_runtime.erl @@ -420,7 +420,7 @@ handle_action(RuleId, ActId, Selected, Envs) -> end. -define(IS_RES_DOWN(R), R == stopped; R == not_connected; R == not_found; R == unhealthy_target). -do_handle_action(RuleId, {bridge, BridgeType, BridgeName, ResId} = Action, Selected, _Envs) -> +do_handle_action(_RuleId, {bridge, BridgeType, BridgeName, ResId} = Action, Selected, _Envs) -> trace_action_bridge("BRIDGE", Action, "bridge_action", #{}, debug), TraceCtx = do_handle_action_get_trace_context(Action), ReplyTo = {fun ?MODULE:inc_action_metrics/2, [TraceCtx], #{reply_dropped => true}}, From b57725f9961f5d1bfa45210222c4627f243e9e1a Mon Sep 17 00:00:00 2001 From: Kjell Winblad Date: Mon, 8 Apr 2024 12:41:35 +0200 Subject: [PATCH 088/234] test(resource_SUITE): do test case fixes needed due to rule tracing work --- .../test/emqx_resource_SUITE.erl | 55 +++++++++---------- 1 file changed, 27 insertions(+), 28 deletions(-) diff --git a/apps/emqx_resource/test/emqx_resource_SUITE.erl b/apps/emqx_resource/test/emqx_resource_SUITE.erl index a6cdaedb2..a97d589b3 100644 --- a/apps/emqx_resource/test/emqx_resource_SUITE.erl +++ b/apps/emqx_resource/test/emqx_resource_SUITE.erl @@ -277,10 +277,9 @@ t_batch_query_counter(_) -> fun(Result, Trace) -> ?assertMatch({ok, 0}, Result), QueryTrace = ?of_kind(call_batch_query, Trace), - ?assertMatch([#{batch := [{query, _, get_counter, _, _}]}], QueryTrace) + ?assertMatch([#{batch := [{query, _, get_counter, _, _, _}]}], QueryTrace) end ), - NMsgs = 1_000, ?check_trace( ?TRACE_OPTS, @@ -340,7 +339,7 @@ t_query_counter_async_query(_) -> fun(Trace) -> %% the callback_mode of 'emqx_connector_demo' is 'always_sync'. QueryTrace = ?of_kind(call_query, Trace), - ?assertMatch([#{query := {query, _, {inc_counter, 1}, _, _}} | _], QueryTrace) + ?assertMatch([#{query := {query, _, {inc_counter, 1}, _, _, _}} | _], QueryTrace) end ), %% simple query ignores the query_mode and batching settings in the resource_worker @@ -351,7 +350,7 @@ t_query_counter_async_query(_) -> ?assertMatch({ok, 1000}, Result), %% the callback_mode if 'emqx_connector_demo' is 'always_sync'. QueryTrace = ?of_kind(call_query, Trace), - ?assertMatch([#{query := {query, _, get_counter, _, _}}], QueryTrace) + ?assertMatch([#{query := {query, _, get_counter, _, _, _}}], QueryTrace) end ), #{counters := C} = emqx_resource:get_metrics(?ID), @@ -397,7 +396,7 @@ t_query_counter_async_callback(_) -> end, fun(Trace) -> QueryTrace = ?of_kind(call_query_async, Trace), - ?assertMatch([#{query := {query, _, {inc_counter, 1}, _, _}} | _], QueryTrace) + ?assertMatch([#{query := {query, _, {inc_counter, 1}, _, _, _}} | _], QueryTrace) end ), @@ -408,7 +407,7 @@ t_query_counter_async_callback(_) -> fun(Result, Trace) -> ?assertMatch({ok, 1000}, Result), QueryTrace = ?of_kind(call_query, Trace), - ?assertMatch([#{query := {query, _, get_counter, _, _}}], QueryTrace) + ?assertMatch([#{query := {query, _, get_counter, _, _, _}}], QueryTrace) end ), #{counters := C} = emqx_resource:get_metrics(?ID), @@ -480,7 +479,7 @@ t_query_counter_async_inflight(_) -> ), fun(Trace) -> QueryTrace = ?of_kind(call_query_async, Trace), - ?assertMatch([#{query := {query, _, {inc_counter, 1}, _, _}} | _], QueryTrace) + ?assertMatch([#{query := {query, _, {inc_counter, 1}, _, _, _}} | _], QueryTrace) end ), tap_metrics(?LINE), @@ -537,7 +536,7 @@ t_query_counter_async_inflight(_) -> end, fun(Trace) -> QueryTrace = ?of_kind(call_query_async, Trace), - ?assertMatch([#{query := {query, _, {inc_counter, _}, _, _}} | _], QueryTrace), + ?assertMatch([#{query := {query, _, {inc_counter, _}, _, _, _}} | _], QueryTrace), ?assertEqual(WindowSize + Num + 1, ets:info(Tab0, size), #{tab => ets:tab2list(Tab0)}), tap_metrics(?LINE), ok @@ -557,7 +556,7 @@ t_query_counter_async_inflight(_) -> ), fun(Trace) -> QueryTrace = ?of_kind(call_query_async, Trace), - ?assertMatch([#{query := {query, _, {inc_counter, 1}, _, _}} | _], QueryTrace) + ?assertMatch([#{query := {query, _, {inc_counter, 1}, _, _, _}} | _], QueryTrace) end ), @@ -669,8 +668,8 @@ t_query_counter_async_inflight_batch(_) -> || Event = #{ ?snk_kind := call_batch_query_async, batch := [ - {query, _, {inc_counter, 1}, _, _}, - {query, _, {inc_counter, 1}, _, _} + {query, _, {inc_counter, 1}, _, _, _}, + {query, _, {inc_counter, 1}, _, _, _} ] } <- Trace @@ -754,7 +753,7 @@ t_query_counter_async_inflight_batch(_) -> fun(Trace) -> QueryTrace = ?of_kind(call_batch_query_async, Trace), ?assertMatch( - [#{batch := [{query, _, {inc_counter, _}, _, _} | _]} | _], + [#{batch := [{query, _, {inc_counter, _}, _, _, _} | _]} | _], QueryTrace ) end @@ -779,7 +778,7 @@ t_query_counter_async_inflight_batch(_) -> fun(Trace) -> QueryTrace = ?of_kind(call_batch_query_async, Trace), ?assertMatch( - [#{batch := [{query, _, {inc_counter, _}, _, _} | _]} | _], + [#{batch := [{query, _, {inc_counter, _}, _, _, _} | _]} | _], QueryTrace ) end @@ -2051,7 +2050,7 @@ do_t_expiration_before_sending(QueryMode) -> end, fun(Trace) -> ?assertMatch( - [#{batch := [{query, _, {inc_counter, 99}, _, _}]}], + [#{batch := [{query, _, {inc_counter, 99}, _, _, _}]}], ?of_kind(buffer_worker_flush_all_expired, Trace) ), Metrics = tap_metrics(?LINE), @@ -2167,7 +2166,7 @@ do_t_expiration_before_sending_partial_batch(QueryMode) -> #{ ?snk_kind := handle_async_reply, action := ack, - batch_or_query := [{query, _, {inc_counter, 99}, _, _}] + batch_or_query := [{query, _, {inc_counter, 99}, _, _, _}] }, 10 * TimeoutMS ); @@ -2189,8 +2188,8 @@ do_t_expiration_before_sending_partial_batch(QueryMode) -> ?assertMatch( [ #{ - expired := [{query, _, {inc_counter, 199}, _, _}], - not_expired := [{query, _, {inc_counter, 99}, _, _}] + expired := [{query, _, {inc_counter, 199}, _, _, _}], + not_expired := [{query, _, {inc_counter, 99}, _, _, _}] } ], ?of_kind(buffer_worker_flush_potentially_partial, Trace) @@ -2303,7 +2302,7 @@ do_t_expiration_async_after_reply(IsBatch) -> #{?snk_kind := delay}, #{ ?snk_kind := handle_async_reply_enter, - batch_or_query := [{query, _, {inc_counter, 199}, _, _} | _] + batch_or_query := [{query, _, {inc_counter, 199}, _, _, _} | _] } ), @@ -2346,8 +2345,8 @@ do_t_expiration_async_after_reply(IsBatch) -> [ #{ expired := [ - {query, _, {inc_counter, 199}, _, _}, - {query, _, {inc_counter, 299}, _, _} + {query, _, {inc_counter, 199}, _, _, _}, + {query, _, {inc_counter, 299}, _, _, _} ] } ], @@ -2365,8 +2364,8 @@ do_t_expiration_async_after_reply(IsBatch) -> single -> ?assertMatch( [ - #{expired := [{query, _, {inc_counter, 199}, _, _}]}, - #{expired := [{query, _, {inc_counter, 299}, _, _}]} + #{expired := [{query, _, {inc_counter, 199}, _, _, _}]}, + #{expired := [{query, _, {inc_counter, 299}, _, _, _}]} ], ?of_kind(handle_async_reply_expired, Trace) ) @@ -2417,7 +2416,7 @@ t_expiration_batch_all_expired_after_reply(_Config) -> #{?snk_kind := delay}, #{ ?snk_kind := handle_async_reply_enter, - batch_or_query := [{query, _, {inc_counter, 199}, _, _} | _] + batch_or_query := [{query, _, {inc_counter, 199}, _, _, _} | _] } ), @@ -2451,8 +2450,8 @@ t_expiration_batch_all_expired_after_reply(_Config) -> [ #{ expired := [ - {query, _, {inc_counter, 199}, _, _}, - {query, _, {inc_counter, 299}, _, _} + {query, _, {inc_counter, 199}, _, _, _}, + {query, _, {inc_counter, 299}, _, _, _} ] } ], @@ -2578,7 +2577,7 @@ do_t_expiration_retry() -> end, fun(Trace) -> ?assertMatch( - [#{expired := [{query, _, {inc_counter, 1}, _, _}]}], + [#{expired := [{query, _, {inc_counter, 1}, _, _, _}]}], ?of_kind(buffer_worker_retry_expired, Trace) ), Metrics = tap_metrics(?LINE), @@ -2655,8 +2654,8 @@ t_expiration_retry_batch_multiple_times(_Config) -> fun(Trace) -> ?assertMatch( [ - #{expired := [{query, _, {inc_counter, 1}, _, _}]}, - #{expired := [{query, _, {inc_counter, 2}, _, _}]} + #{expired := [{query, _, {inc_counter, 1}, _, _, _}]}, + #{expired := [{query, _, {inc_counter, 2}, _, _, _}]} ], ?of_kind(buffer_worker_retry_expired, Trace) ), From dcde30c38a98ae8112b4e8f71b41e0d9a94508f4 Mon Sep 17 00:00:00 2001 From: Andrew Mayorov Date: Mon, 8 Apr 2024 13:22:31 +0200 Subject: [PATCH 089/234] test(dsrepl): add two more testcases for rebalancing --- .../src/emqx_ds_replication_layer_meta.erl | 3 + .../emqx_ds_replication_shard_allocator.erl | 4 + .../test/emqx_ds_replication_SUITE.erl | 164 +++++++++++++++++- 3 files changed, 164 insertions(+), 7 deletions(-) diff --git a/apps/emqx_durable_storage/src/emqx_ds_replication_layer_meta.erl b/apps/emqx_durable_storage/src/emqx_ds_replication_layer_meta.erl index 31ed62fcb..dca2442b8 100644 --- a/apps/emqx_durable_storage/src/emqx_ds_replication_layer_meta.erl +++ b/apps/emqx_durable_storage/src/emqx_ds_replication_layer_meta.erl @@ -134,6 +134,7 @@ ok | {error, {nonexistent_db, emqx_ds:db()}} | {error, {nonexistent_sites, [site()]}} + | {error, {too_few_sites, [site()]}} | {error, _}. %% Subject of the subscription: @@ -452,6 +453,8 @@ allocate_shards_trans(DB) -> assign_db_sites_trans(DB, Sites) -> Opts = db_config_trans(DB), case [S || S <- Sites, mnesia:read(?NODE_TAB, S, read) == []] of + [] when length(Sites) == 0 -> + mnesia:abort({too_few_sites, Sites}); [] -> ok; NonexistentSites -> diff --git a/apps/emqx_durable_storage/src/emqx_ds_replication_shard_allocator.erl b/apps/emqx_durable_storage/src/emqx_ds_replication_shard_allocator.erl index 363a453d6..2c9cc44fa 100644 --- a/apps/emqx_durable_storage/src/emqx_ds_replication_shard_allocator.erl +++ b/apps/emqx_durable_storage/src/emqx_ds_replication_shard_allocator.erl @@ -323,6 +323,10 @@ handle_exit(Pid, Reason, State0 = #{db := DB, transitions := Ts}) -> State = State0#{transitions := maps:remove(Track, Ts)}, handle_transition_exit(Shard, Trans, Reason, State); [] -> + %% NOTE + %% Actually, it's sort of expected to have a portion of exit signals here, + %% because of `mria:with_middleman/3`. But it's impossible to tell them apart + %% from other singals. logger:warning(#{msg => "Unexpected exit signal", pid => Pid, reason => Reason}), State0 end. diff --git a/apps/emqx_durable_storage/test/emqx_ds_replication_SUITE.erl b/apps/emqx_durable_storage/test/emqx_ds_replication_SUITE.erl index 872169765..6a2c36b30 100644 --- a/apps/emqx_durable_storage/test/emqx_ds_replication_SUITE.erl +++ b/apps/emqx_durable_storage/test/emqx_ds_replication_SUITE.erl @@ -127,22 +127,22 @@ t_replication_transfers_snapshots(Config) -> MessagesOffline ). -t_replication_rebalance(init, Config) -> +t_rebalance(init, Config) -> Apps = [appspec(emqx_durable_storage)], Nodes = emqx_cth_cluster:start( [ - {t_replication_rebalance1, #{apps => Apps}}, - {t_replication_rebalance2, #{apps => Apps}}, - {t_replication_rebalance3, #{apps => Apps}}, - {t_replication_rebalance4, #{apps => Apps}} + {t_rebalance1, #{apps => Apps}}, + {t_rebalance2, #{apps => Apps}}, + {t_rebalance3, #{apps => Apps}}, + {t_rebalance4, #{apps => Apps}} ], #{work_dir => emqx_cth_suite:work_dir(?FUNCTION_NAME, Config)} ), [{nodes, Nodes} | Config]; -t_replication_rebalance('end', Config) -> +t_rebalance('end', Config) -> ok = emqx_cth_cluster:stop(?config(nodes, Config)). -t_replication_rebalance(Config) -> +t_rebalance(Config) -> NMsgs = 800, NClients = 5, Nodes = [N1, N2, N3, N4] = ?config(nodes, Config), @@ -243,6 +243,156 @@ t_replication_rebalance(Config) -> ?assertEqual(sample(20, Messages), sample(20, MessagesN3)), ?assertEqual(Messages, MessagesN3). +t_join_leave_errors(init, Config) -> + Apps = [appspec(emqx_durable_storage)], + Nodes = emqx_cth_cluster:start( + [ + {t_join_leave_errors1, #{apps => Apps}}, + {t_join_leave_errors2, #{apps => Apps}} + ], + #{work_dir => emqx_cth_suite:work_dir(?FUNCTION_NAME, Config)} + ), + [{nodes, Nodes} | Config]; +t_join_leave_errors('end', Config) -> + ok = emqx_cth_cluster:stop(?config(nodes, Config)). + +t_join_leave_errors(Config) -> + [N1, N2] = ?config(nodes, Config), + + Opts = opts(#{n_shards => 16, n_sites => 1, replication_factor => 3}), + ?assertEqual(ok, erpc:call(N1, emqx_ds, open_db, [?DB, Opts])), + ?assertEqual(ok, erpc:call(N2, emqx_ds, open_db, [?DB, Opts])), + + [S1, S2] = [ds_repl_meta(N, this_site) || N <- [N1, N2]], + + ?assertEqual([S1], ds_repl_meta(N1, db_sites, [?DB])), + + %% Attempts to join a nonexistent DB / site. + ?assertEqual( + {error, {nonexistent_db, boo}}, + ds_repl_meta(N1, join_db_site, [_DB = boo, S1]) + ), + ?assertEqual( + {error, {nonexistent_sites, [<<"NO-MANS-SITE">>]}}, + ds_repl_meta(N1, join_db_site, [?DB, <<"NO-MANS-SITE">>]) + ), + %% NOTE: Leaving a non-existent site is not an error. + ?assertEqual( + ok, + ds_repl_meta(N1, leave_db_site, [?DB, <<"NO-MANS-SITE">>]) + ), + + %% Should be no-op. + ?assertEqual(ok, ds_repl_meta(N1, join_db_site, [?DB, S1])), + ?assertEqual([], transitions(N1, ?DB)), + + %% Impossible to leave the last site. + ?assertEqual( + {error, {too_few_sites, []}}, + ds_repl_meta(N1, leave_db_site, [?DB, S1]) + ), + + %% "Move" the DB to the other node. + ?assertEqual(ok, ds_repl_meta(N1, join_db_site, [?DB, S2])), + ?assertEqual(ok, ds_repl_meta(N2, leave_db_site, [?DB, S1])), + ?assertMatch([_ | _], transitions(N1, ?DB)), + ?retry(1000, 10, ?assertEqual([], transitions(N1, ?DB))), + + %% Should be no-op. + ?assertEqual(ok, ds_repl_meta(N2, leave_db_site, [?DB, S1])), + ?assertEqual([], transitions(N1, ?DB)). + +t_rebalance_chaotic_converges(init, Config) -> + Apps = [appspec(emqx_durable_storage)], + Nodes = emqx_cth_cluster:start( + [ + {t_rebalance_chaotic_converges1, #{apps => Apps}}, + {t_rebalance_chaotic_converges2, #{apps => Apps}}, + {t_rebalance_chaotic_converges3, #{apps => Apps}} + ], + #{work_dir => emqx_cth_suite:work_dir(?FUNCTION_NAME, Config)} + ), + [{nodes, Nodes} | Config]; +t_rebalance_chaotic_converges('end', Config) -> + ok = emqx_cth_cluster:stop(?config(nodes, Config)). + +t_rebalance_chaotic_converges(Config) -> + NMsgs = 500, + Nodes = [N1, N2, N3] = ?config(nodes, Config), + + %% Initialize DB on first two nodes. + Opts = opts(#{n_shards => 16, n_sites => 2, replication_factor => 3}), + ?assertEqual( + [{ok, ok}, {ok, ok}], + erpc:multicall([N1, N2], emqx_ds, open_db, [?DB, Opts]) + ), + + %% Open DB on the last node. + ?assertEqual( + ok, + erpc:call(N3, emqx_ds, open_db, [?DB, Opts]) + ), + + %% Find out which sites there are. + Sites = [S1, S2, S3] = [ds_repl_meta(N, this_site) || N <- Nodes], + ct:pal("Sites: ~p~n", [Sites]), + + %% Initially, the DB is assigned to [S1, S2]. + ?retry(500, 10, ?assertEqual([16, 16], [n_shards_online(N, ?DB) || N <- [N1, N2]])), + ?assertEqual( + lists:sort([S1, S2]), + ds_repl_meta(N1, db_sites, [?DB]) + ), + + %% Fill the storage with messages and few additional generations. + Messages0 = lists:append([ + fill_storage(N1, ?DB, NMsgs, #{client_id => <<"C1">>}), + fill_storage(N2, ?DB, NMsgs, #{client_id => <<"C2">>}), + fill_storage(N3, ?DB, NMsgs, #{client_id => <<"C3">>}) + ]), + + %% Construct a chaotic transition sequence that changes assignment to [S2, S3]. + Sequence = [ + {N1, join_db_site, S3}, + {N2, leave_db_site, S2}, + {N3, leave_db_site, S1}, + {N1, join_db_site, S2}, + {N2, join_db_site, S1}, + {N3, leave_db_site, S3}, + {N1, leave_db_site, S1}, + {N2, join_db_site, S3} + ], + + %% Apply the sequence while also filling the storage with messages. + TransitionMessages = lists:map( + fun({N, Transition, Site}) -> + %% Apply the transition. + ?assertEqual(ok, ds_repl_meta(N, Transition, [?DB, Site])), + %% Give some time for at least one transition to complete. + Transitions = transitions(N, ?DB), + ct:pal("Transitions after ~p: ~p", [N, Transitions]), + ?retry(200, 10, ?assertNotEqual(Transitions, transitions(N, ?DB))), + %% Fill the storage with messages. + CID = integer_to_binary(erlang:system_time()), + fill_storage(N, ?DB, NMsgs, #{client_id => CID}) + end, + Sequence + ), + + %% Wait for the last transition to complete. + ?retry(500, 20, ?assertEqual([], transitions(N1, ?DB))), + + ?assertEqual( + lists:sort([S2, S3]), + ds_repl_meta(N1, db_sites, [?DB]) + ), + + %% Check that all messages are still there. + Messages = lists:append(TransitionMessages) ++ Messages0, + MessagesDB = lists:sort(fun compare_message/2, consume(N1, ?DB, ['#'], 0)), + ?assertEqual(sample(20, Messages), sample(20, MessagesDB)), + ?assertEqual(Messages, MessagesDB). + %% shard_server_info(Node, DB, Shard, Site, Info) -> From 4c0cc079c24d2ea7e47a18ccf746c8d4b63a424e Mon Sep 17 00:00:00 2001 From: Andrew Mayorov Date: Mon, 8 Apr 2024 13:25:45 +0200 Subject: [PATCH 090/234] fix(dsrepl): apply unnecessary rebalancing transitions cleanly --- .../src/emqx_ds_replication_layer_meta.erl | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/apps/emqx_durable_storage/src/emqx_ds_replication_layer_meta.erl b/apps/emqx_durable_storage/src/emqx_ds_replication_layer_meta.erl index dca2442b8..97d4e7412 100644 --- a/apps/emqx_durable_storage/src/emqx_ds_replication_layer_meta.erl +++ b/apps/emqx_durable_storage/src/emqx_ds_replication_layer_meta.erl @@ -481,15 +481,19 @@ modify_db_sites_trans(DB, Modifications) -> case Sites of Sites0 -> ok; - _Chagned -> + _Changed -> assign_db_sites_trans(DB, Sites) end. update_replica_set_trans(DB, Shard, Trans) -> case mnesia:read(?SHARD_TAB, {DB, Shard}, write) of [Record = #?SHARD_TAB{replica_set = ReplicaSet0, target_set = TargetSet0}] -> + %% NOTE + %% It's possible to complete a transition that's no longer planned. We + %% should anticipate that we may stray _away_ from the target set. + TargetSet1 = emqx_maybe:define(TargetSet0, ReplicaSet0), ReplicaSet = apply_transition(Trans, ReplicaSet0), - case lists:usort(TargetSet0) of + case lists:usort(TargetSet1) of ReplicaSet -> TargetSet = undefined; TS -> From 02ee87309471092c7f8794def9ac519b4c1f5a42 Mon Sep 17 00:00:00 2001 From: Kjell Winblad Date: Mon, 8 Apr 2024 13:42:15 +0200 Subject: [PATCH 091/234] docs(emqx_rule_api_schema): fix type spec --- apps/emqx_rule_engine/src/emqx_rule_api_schema.erl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/apps/emqx_rule_engine/src/emqx_rule_api_schema.erl b/apps/emqx_rule_engine/src/emqx_rule_api_schema.erl index 4058d2f83..862aea3b3 100644 --- a/apps/emqx_rule_engine/src/emqx_rule_api_schema.erl +++ b/apps/emqx_rule_engine/src/emqx_rule_api_schema.erl @@ -26,7 +26,7 @@ -export([namespace/0, roots/0, fields/1]). --type tag() :: rule_creation | rule_test | rule_engine. +-type tag() :: rule_creation | rule_test | rule_engine | rule_apply_test. -spec check_params(map(), tag()) -> {ok, map()} | {error, term()}. check_params(Params, Tag) -> From 600526a0e407e768701e179ba8c4904b35c34984 Mon Sep 17 00:00:00 2001 From: Kjell Winblad Date: Mon, 8 Apr 2024 14:54:33 +0200 Subject: [PATCH 092/234] test(emqx_bridge_http_SUITE): test case after name change --- apps/emqx_bridge_http/test/emqx_bridge_http_SUITE.erl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/apps/emqx_bridge_http/test/emqx_bridge_http_SUITE.erl b/apps/emqx_bridge_http/test/emqx_bridge_http_SUITE.erl index ab0d5bb55..73f6359ab 100644 --- a/apps/emqx_bridge_http/test/emqx_bridge_http_SUITE.erl +++ b/apps/emqx_bridge_http/test/emqx_bridge_http_SUITE.erl @@ -421,7 +421,7 @@ t_send_get_trace_messages(Config) -> ?assertNotEqual(nomatch, binary:match(Bin, [<<"SELECT_yielded_result">>])), ?assertNotEqual(nomatch, binary:match(Bin, [<<"bridge_action">>])), ?assertNotEqual(nomatch, binary:match(Bin, [<<"action_activated">>])), - ?assertNotEqual(nomatch, binary:match(Bin, [<<"successfully_rendered_request">>])), + ?assertNotEqual(nomatch, binary:match(Bin, [<<"action_template_rendered">>])), ?assertNotEqual(nomatch, binary:match(Bin, [<<"QUERY_ASYNC">>])) end ), From c4fd94c7c5ade313738f9330265b5bed4d8e3f0a Mon Sep 17 00:00:00 2001 From: Ivan Dyachkov Date: Mon, 8 Apr 2024 10:53:22 +0200 Subject: [PATCH 093/234] ci: add selenium test for emqx docs link in dashboard --- .github/workflows/run_docker_tests.yaml | 3 +++ scripts/ui-tests/dashboard_test.py | 23 +++++++++++++++++++++++ scripts/ui-tests/docker-compose.yaml | 4 ++++ 3 files changed, 30 insertions(+) diff --git a/.github/workflows/run_docker_tests.yaml b/.github/workflows/run_docker_tests.yaml index cfc67be75..527c1cb32 100644 --- a/.github/workflows/run_docker_tests.yaml +++ b/.github/workflows/run_docker_tests.yaml @@ -47,6 +47,9 @@ jobs: echo "_EMQX_DOCKER_IMAGE_TAG=$_EMQX_DOCKER_IMAGE_TAG" >> $GITHUB_ENV - name: dashboard tests working-directory: ./scripts/ui-tests + env: + EMQX_VERSION: ${{ inputs.version-emqx }} + EMQX_ENTERPRISE_VERSION: ${{ inputs.version-emqx-enterprise }} run: | set -eu docker compose up --abort-on-container-exit --exit-code-from selenium diff --git a/scripts/ui-tests/dashboard_test.py b/scripts/ui-tests/dashboard_test.py index 79eb0640f..5f7ac8e15 100644 --- a/scripts/ui-tests/dashboard_test.py +++ b/scripts/ui-tests/dashboard_test.py @@ -1,3 +1,4 @@ +import os import time import unittest import pytest @@ -73,3 +74,25 @@ def test_log(driver, login, dashboard_url): label = driver.find_element(By.XPATH, "//div[@id='app']//form//label[contains(., 'Time Offset')]") assert driver.find_elements(By.ID, label.get_attribute("for")) +def test_docs_link(driver, login, dashboard_url): + dest_url = urljoin(dashboard_url, "/#/dashboard/overview") + driver.get(dest_url) + ensure_current_url(driver, dest_url) + xpath_link_help = "//div[@id='app']//div[@class='nav-header']//a[contains(@class, 'link-help')]" + link_help = driver.find_element(By.XPATH, xpath_link_help) + driver.execute_script("arguments[0].click();", link_help) + + emqx_name = os.getenv("EMQX_NAME") + emqx_community_version = os.getenv("EMQX_COMMUNITY_VERSION") + emqx_enterprise_version = os.getenv("EMQX_ENTERPRISE_VERSION") + if emqx_name == 'emqx-enterprise': + emqx_version = f"v{emqx_enterprise_version}" + docs_base_url = "https://docs.emqx.com/en/enterprise" + else: + emqx_version = f"v{emqx_community_version}" + docs_base_url = "https://www.emqx.io/docs/en" + + emqx_version = ".".join(emqx_version.split(".")[:2]) + docs_url = f"{docs_base_url}/{emqx_version}" + xpath = f"//div[@id='app']//div[@class='nav-header']//a[@href[starts-with(.,'{docs_url}')]]" + assert driver.find_element(By.XPATH, xpath) diff --git a/scripts/ui-tests/docker-compose.yaml b/scripts/ui-tests/docker-compose.yaml index f5a66ab33..c4a92e51f 100644 --- a/scripts/ui-tests/docker-compose.yaml +++ b/scripts/ui-tests/docker-compose.yaml @@ -9,6 +9,10 @@ services: selenium: shm_size: '2gb' image: ghcr.io/emqx/selenium-chrome:latest + environment: + EMQX_NAME: ${EMQX_NAME} + EMQX_COMMUNITY_VERSION: ${EMQX_VERSION} + EMQX_ENTERPRISE_VERSION: ${EMQX_ENTERPRISE_VERSION} volumes: - ./:/app depends_on: From ba96edb0611fce85680c40268b887635a1121956 Mon Sep 17 00:00:00 2001 From: Thales Macedo Garitezi Date: Mon, 8 Apr 2024 10:08:49 -0300 Subject: [PATCH 094/234] fix(clients api): use alternative base64 function for OTP 25 Fixes https://github.com/emqx/emqx/pull/12798#discussion_r1555524603 --- apps/emqx_management/src/emqx_mgmt_api_clients.erl | 6 +++--- apps/emqx_management/test/emqx_mgmt_api_clients_SUITE.erl | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/apps/emqx_management/src/emqx_mgmt_api_clients.erl b/apps/emqx_management/src/emqx_mgmt_api_clients.erl index 9175f91ff..ee8e182bd 100644 --- a/apps/emqx_management/src/emqx_mgmt_api_clients.erl +++ b/apps/emqx_management/src/emqx_mgmt_api_clients.erl @@ -1088,7 +1088,7 @@ next_ds_cursor(Iter) -> }. parse_cursor(CursorBin, Nodes) -> - try base64:decode(CursorBin, #{mode => urlsafe, padding => false}) of + try emqx_base62:decode(CursorBin) of Bin -> parse_cursor1(Bin, Nodes) catch @@ -1133,11 +1133,11 @@ serialize_cursor(#{type := ?CURSOR_TYPE_ETS, node_idx := NodeIdx, cont := Cont}) #{?CURSOR_ETS_NODE_IDX => NodeIdx, ?CURSOR_ETS_CONT => Cont} ], Bin = term_to_binary(Cursor0, [{compressed, 9}]), - base64:encode(Bin, #{mode => urlsafe, padding => false}); + emqx_base62:encode(Bin); serialize_cursor(#{type := ?CURSOR_TYPE_DS, iterator := Iter}) -> Cursor0 = [?CURSOR_VSN1, ?CURSOR_TYPE_DS, Iter], Bin = term_to_binary(Cursor0, [{compressed, 9}]), - base64:encode(Bin, #{mode => urlsafe, padding => false}). + emqx_base62:encode(Bin). %% An adapter function so we can reutilize all the logic in `emqx_mgmt_api' for %% selecting/fuzzy filters, and also reutilize its BPAPI for selecting rows. diff --git a/apps/emqx_management/test/emqx_mgmt_api_clients_SUITE.erl b/apps/emqx_management/test/emqx_mgmt_api_clients_SUITE.erl index ebda34bc2..2623e6d4d 100644 --- a/apps/emqx_management/test/emqx_mgmt_api_clients_SUITE.erl +++ b/apps/emqx_management/test/emqx_mgmt_api_clients_SUITE.erl @@ -1673,7 +1673,7 @@ t_list_clients_v2(Config) -> %% Verify that a malicious cursor that could generate an atom on the node is %% rejected EvilAtomBin0 = <<131, 100, 0, 5, "some_atom_that_doesnt_exist_on_the_remote_node">>, - EvilAtomBin = base64:encode(EvilAtomBin0, #{mode => urlsafe, padding => false}), + EvilAtomBin = emqx_base62:encode(EvilAtomBin0), ?assertMatch( {error, {{_, 400, _}, _, #{<<"message">> := <<"bad cursor">>}}}, From 9628a00a82295da6190dbe4dc4a6fac1e0de9357 Mon Sep 17 00:00:00 2001 From: Kjell Winblad Date: Mon, 8 Apr 2024 15:34:29 +0200 Subject: [PATCH 095/234] docs(emqx_rule_api apply rule): fix doc strings --- apps/emqx_rule_engine/src/emqx_rule_api_schema.erl | 2 +- apps/emqx_rule_engine/src/emqx_rule_engine_api.erl | 4 ++-- apps/emqx_rule_engine/src/emqx_rule_sqltester.erl | 2 +- .../test/emqx_rule_engine_api_rule_apply_SUITE.erl | 5 +---- rel/i18n/emqx_rule_api_schema.hocon | 12 ++++++++++++ rel/i18n/emqx_rule_engine_api.hocon | 6 ++++++ 6 files changed, 23 insertions(+), 8 deletions(-) diff --git a/apps/emqx_rule_engine/src/emqx_rule_api_schema.erl b/apps/emqx_rule_engine/src/emqx_rule_api_schema.erl index 862aea3b3..20363e726 100644 --- a/apps/emqx_rule_engine/src/emqx_rule_api_schema.erl +++ b/apps/emqx_rule_engine/src/emqx_rule_api_schema.erl @@ -117,7 +117,7 @@ fields("rule_apply_test") -> default => #{} } )}, - {"stop_action_after_template_render", + {"stop_action_after_template_rendering", sc( typerefl:boolean(), #{ diff --git a/apps/emqx_rule_engine/src/emqx_rule_engine_api.erl b/apps/emqx_rule_engine/src/emqx_rule_engine_api.erl index c0514b82b..d203dd915 100644 --- a/apps/emqx_rule_engine/src/emqx_rule_engine_api.erl +++ b/apps/emqx_rule_engine/src/emqx_rule_engine_api.erl @@ -268,8 +268,8 @@ schema("/rules/:id/test") -> 'operationId' => '/rules/:id/test', post => #{ tags => [<<"rules">>], - description => ?DESC("api8"), - summary => <<"Apply a rule with the given message and environment">>, + description => ?DESC("api11"), + summary => <<"Apply a rule for testing">>, 'requestBody' => rule_apply_test_schema(), responses => #{ 400 => error_schema('BAD_REQUEST', "Invalid Parameters"), diff --git a/apps/emqx_rule_engine/src/emqx_rule_sqltester.erl b/apps/emqx_rule_engine/src/emqx_rule_sqltester.erl index fc4d2614f..342a8d9f9 100644 --- a/apps/emqx_rule_engine/src/emqx_rule_sqltester.erl +++ b/apps/emqx_rule_engine/src/emqx_rule_sqltester.erl @@ -29,7 +29,7 @@ apply_rule( #{ context := Context, environment := Env, - stop_action_after_template_render := StopAfterRender + stop_action_after_template_rendering := StopAfterRender } ) -> {ok, Rule} = emqx_rule_engine:get_rule(RuleId), diff --git a/apps/emqx_rule_engine/test/emqx_rule_engine_api_rule_apply_SUITE.erl b/apps/emqx_rule_engine/test/emqx_rule_engine_api_rule_apply_SUITE.erl index 31a462de3..2b77f9c3d 100644 --- a/apps/emqx_rule_engine/test/emqx_rule_engine_api_rule_apply_SUITE.erl +++ b/apps/emqx_rule_engine/test/emqx_rule_engine_api_rule_apply_SUITE.erl @@ -30,9 +30,6 @@ all() -> init_per_suite(Config) -> application:load(emqx_conf), - % ok = emqx_common_test_helpers:load_config(emqx_rule_engine_schema, ?CONF_DEFAULT), - % ok = emqx_common_test_helpers:start_apps([emqx_conf, emqx, emqx_rule_engine, emqx_bridge, emqx_bridge_http]), - Apps = emqx_cth_suite:start( [ emqx, @@ -125,7 +122,7 @@ basic_apply_rule_test_helper(Config, TraceType, StopAfterRender) -> Params = #{ % body => #{ <<"context">> => Context, - <<"stop_action_after_template_render">> => StopAfterRender + <<"stop_action_after_template_rendering">> => StopAfterRender % } }, emqx_trace:check(), diff --git a/rel/i18n/emqx_rule_api_schema.hocon b/rel/i18n/emqx_rule_api_schema.hocon index 0289f53ab..7f684e8ef 100644 --- a/rel/i18n/emqx_rule_api_schema.hocon +++ b/rel/i18n/emqx_rule_api_schema.hocon @@ -66,6 +66,18 @@ test_context.desc: test_context.label: """Event Conetxt""" +test_rule_environment.desc: +"""The environment that will be passed to the rule when it is applied. A default environment will be used if no environment is given.""" + +test_rule_environment.label: +"""Event Environment""" + +stop_action_after_template_render.desc: +"""Set this to true if the action should be stopped after its template has been rendered.""" + +stop_action_after_template_render.label: +"""Stop Action After Template Rendering""" + node_node.desc: """The node name""" diff --git a/rel/i18n/emqx_rule_engine_api.hocon b/rel/i18n/emqx_rule_engine_api.hocon index 385b71ddc..0745a108d 100644 --- a/rel/i18n/emqx_rule_engine_api.hocon +++ b/rel/i18n/emqx_rule_engine_api.hocon @@ -90,4 +90,10 @@ api9.desc: api9.label: """Get configuration""" +api11.desc: +"""Apply a rule with the given message and environment""" + +api11.label: +"""Apply Rule""" + } From 75bb7f5cdc26c319729aad445195c39b6cd5e411 Mon Sep 17 00:00:00 2001 From: Andrew Mayorov Date: Mon, 8 Apr 2024 16:04:33 +0200 Subject: [PATCH 096/234] fix(dsrepl): retry only `{add, Site}` crashed membership transitions To minimize the potential negative impact of removal transitions that crash for some unknown and unusual reasons. --- .../emqx_ds_replication_shard_allocator.erl | 21 ++++++++++++------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/apps/emqx_durable_storage/src/emqx_ds_replication_shard_allocator.erl b/apps/emqx_durable_storage/src/emqx_ds_replication_shard_allocator.erl index 2c9cc44fa..7afeb9d26 100644 --- a/apps/emqx_durable_storage/src/emqx_ds_replication_shard_allocator.erl +++ b/apps/emqx_durable_storage/src/emqx_ds_replication_shard_allocator.erl @@ -290,6 +290,7 @@ do_rm_unresponsive(DB, Shard, Site) -> trans_delay(DB, Shard, Trans, Delay, NextHandler) -> ok = delay(Delay), + %% NOTE: Proceed only if the transition we are going to handle is still desired. case next_transitions(DB, Shard) of [Trans | _] -> apply_handler(NextHandler, DB, Shard, Trans); @@ -338,11 +339,6 @@ handle_transition_exit(Shard, Trans, normal, State = #{db := DB}) -> handle_transition_exit(_Shard, _Trans, {shutdown, skipped}, State) -> State; handle_transition_exit(Shard, Trans, Reason, State) -> - %% NOTE - %% In case of `{add, Site}` transition failure, we have no choice but to retry: - %% no other node can perform the transition and make progress towards the desired - %% state. For simplicity, we retry any crashed transition handler after a fixed - %% delay. logger:warning(#{ msg => "Shard membership transition failed", shard => Shard, @@ -350,9 +346,18 @@ handle_transition_exit(Shard, Trans, Reason, State) -> reason => Reason, retry_in => ?CRASH_RETRY_DELAY }), - {Track, Handler} = transition_handler(Shard, Trans, State), - RetryHandler = {fun trans_delay/5, [?CRASH_RETRY_DELAY, Handler]}, - ensure_transition_handler(Track, Shard, Trans, RetryHandler, State). + %% NOTE + %% In case of `{add, Site}` transition failure, we have no choice but to retry: + %% no other node can perform the transition and make progress towards the desired + %% state. + case Trans of + {add, _ThisSite} -> + {Track, Handler} = transition_handler(Shard, Trans, State), + RetryHandler = {fun trans_delay/5, [?CRASH_RETRY_DELAY, Handler]}, + ensure_transition_handler(Track, Shard, Trans, RetryHandler, State); + _Another -> + State + end. %% From 7a836317acffb72224b617a9d7154735e1ae3139 Mon Sep 17 00:00:00 2001 From: Andrew Mayorov Date: Mon, 8 Apr 2024 16:12:42 +0200 Subject: [PATCH 097/234] fix(dsrepl): trigger unfinished shard transition upon startup Also provide a trivial API to trigger them by hand. --- .../emqx_ds_replication_shard_allocator.erl | 33 ++++++++++++++++--- 1 file changed, 28 insertions(+), 5 deletions(-) diff --git a/apps/emqx_durable_storage/src/emqx_ds_replication_shard_allocator.erl b/apps/emqx_durable_storage/src/emqx_ds_replication_shard_allocator.erl index 7afeb9d26..cfc2b7c81 100644 --- a/apps/emqx_durable_storage/src/emqx_ds_replication_shard_allocator.erl +++ b/apps/emqx_durable_storage/src/emqx_ds_replication_shard_allocator.erl @@ -23,6 +23,9 @@ -export([n_shards/1]). -export([shard_meta/2]). +%% Maintenace purposes: +-export([trigger_transitions/1]). + -behaviour(gen_server). -export([ init/1, @@ -52,10 +55,16 @@ %% +-record(trigger_transitions, {}). + -spec start_link(emqx_ds:db()) -> {ok, pid()}. start_link(DB) -> gen_server:start_link(?MODULE, DB, []). +-spec trigger_transitions(pid()) -> ok. +trigger_transitions(Pid) -> + gen_server:cast(Pid, #trigger_transitions{}). + -spec n_shards(emqx_ds:db()) -> non_neg_integer(). n_shards(DB) -> Meta = persistent_term:get(?db_meta(DB)), @@ -96,6 +105,8 @@ handle_call(_Call, _From, State) -> {reply, ignored, State}. -spec handle_cast(_Cast, state()) -> {noreply, state()}. +handle_cast(#trigger_transitions{}, State) -> + {noreply, handle_pending_transitions(State)}; handle_cast(_Cast, State) -> {noreply, State}. @@ -125,11 +136,14 @@ terminate(_Reason, #{}) -> %% -handle_allocate_shards(State) -> - case allocate_shards(State) of - {ok, NState} -> +handle_allocate_shards(State0) -> + case allocate_shards(State0) of + {ok, State} -> + %% NOTE + %% Subscribe to shard changes and trigger any yet unhandled transitions. ok = subscribe_db_changes(State), - NState; + ok = trigger_transitions(self()), + State; {error, Data} -> _ = logger:notice( Data#{ @@ -138,7 +152,7 @@ handle_allocate_shards(State) -> } ), _TRef = erlang:start_timer(?ALLOCATE_RETRY_TIMEOUT, self(), allocate), - State + State0 end. subscribe_db_changes(#{db := DB}) -> @@ -153,6 +167,15 @@ handle_shard_changed(Shard, State = #{db := DB}) -> ok = save_shard_meta(DB, Shard), handle_shard_transitions(Shard, next_transitions(DB, Shard), State). +handle_pending_transitions(State = #{db := DB, shards := Shards}) -> + lists:foldl( + fun(Shard, StateAcc) -> + handle_shard_transitions(Shard, next_transitions(DB, Shard), StateAcc) + end, + State, + Shards + ). + next_transitions(DB, Shard) -> emqx_ds_replication_layer_meta:replica_set_transitions(DB, Shard). From bf12efac6dba235a46e6446acbaef357144f120e Mon Sep 17 00:00:00 2001 From: zmstone Date: Mon, 8 Apr 2024 20:18:01 +0200 Subject: [PATCH 098/234] fix(variform): add basic tests --- apps/emqx_utils/src/emqx_variform.erl | 49 +++++-- apps/emqx_utils/src/emqx_variform_str.erl | 17 ++- apps/emqx_utils/test/emqx_variform_tests.erl | 129 +++++++++++++++++++ 3 files changed, 183 insertions(+), 12 deletions(-) create mode 100644 apps/emqx_utils/test/emqx_variform_tests.erl diff --git a/apps/emqx_utils/src/emqx_variform.erl b/apps/emqx_utils/src/emqx_variform.erl index 95ea1e1ce..25825ea9f 100644 --- a/apps/emqx_utils/src/emqx_variform.erl +++ b/apps/emqx_utils/src/emqx_variform.erl @@ -1,5 +1,5 @@ %%-------------------------------------------------------------------- -%% Copyright (c) 2020-2024 EMQ Technologies Co., Ltd. All Rights Reserved. +%% Copyright (c) 2024 EMQ Technologies Co., Ltd. All Rights Reserved. %% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. @@ -22,7 +22,12 @@ %% or used to choose the first non-empty value from a list of variables. -module(emqx_variform). --export([inject_allowed_modules/1]). +-export([ + inject_allowed_module/1, + inject_allowed_modules/1, + erase_allowed_module/1, + erase_allowed_modules/1 +]). -export([render/2, render/3]). %% @doc Render a variform expression with bindings. @@ -48,6 +53,8 @@ render(Expression, Bindings) -> render(Expression, Bindings, #{}). +render(Expression, Bindings, Opts) when is_binary(Expression) -> + render(unicode:characters_to_list(Expression), Bindings, Opts); render(Expression, Bindings, Opts) -> case emqx_variform_scan:string(Expression) of {ok, Tokens, _Line} -> @@ -66,7 +73,7 @@ render(Expression, Bindings, Opts) -> eval_as_string(Expr, Bindings, _Opts) -> try - {ok, iolist_to_binary(eval(Expr, Bindings))} + {ok, str(eval(Expr, Bindings))} catch throw:Reason -> {error, Reason}; @@ -97,7 +104,7 @@ call(emqx_variform_str, concat, Args) -> call(emqx_variform_str, coalesce, Args) -> str(emqx_variform_str:coalesce(Args)); call(Mod, Fun, Args) -> - str(erlang:apply(Mod, Fun, Args)). + erlang:apply(Mod, Fun, Args). resolve_func_name(FuncNameStr) -> case string:tokens(FuncNameStr, ".") of @@ -107,7 +114,10 @@ resolve_func_name(FuncNameStr) -> list_to_existing_atom(Mod0) catch error:badarg -> - throw(#{unknown_module => Mod0}) + throw(#{ + reason => unknown_variform_module, + module => Mod0 + }) end, ok = assert_module_allowed(Mod), Fun = @@ -115,7 +125,10 @@ resolve_func_name(FuncNameStr) -> list_to_existing_atom(Fun0) catch error:badarg -> - throw(#{unknown_function => Fun0}) + throw(#{ + reason => unknown_variform_function, + function => Fun0 + }) end, {Mod, Fun}; [Fun] -> @@ -125,11 +138,13 @@ resolve_func_name(FuncNameStr) -> catch error:badarg -> throw(#{ - reason => "unknown_variform_function", + reason => unknown_variform_function, function => Fun }) end, - {emqx_variform_str, FuncName} + {emqx_variform_str, FuncName}; + _ -> + throw(#{reason => invalid_function_reference, function => FuncNameStr}) end. resolve_var_value(VarName, Bindings) -> @@ -145,13 +160,14 @@ assert_func_exported(emqx_variform_str, concat, _Arity) -> assert_func_exported(emqx_variform_str, coalesce, _Arity) -> ok; assert_func_exported(Mod, Fun, Arity) -> + %% ensure beam loaded _ = Mod:module_info(md5), case erlang:function_exported(Mod, Fun, Arity) of true -> ok; false -> throw(#{ - reason => "unknown_variform_function", + reason => unknown_variform_function, module => Mod, function => Fun, arity => Arity @@ -167,16 +183,27 @@ assert_module_allowed(Mod) -> ok; false -> throw(#{ - reason => "unallowed_veriform_module", + reason => unallowed_veriform_module, module => Mod }) end. -inject_allowed_modules(Modules) -> +inject_allowed_module(Module) when is_atom(Module) -> + inject_allowed_modules([Module]). + +inject_allowed_modules(Modules) when is_list(Modules) -> Allowed0 = get_allowed_modules(), Allowed = lists:usort(Allowed0 ++ Modules), persistent_term:put({emqx_variform, allowed_modules}, Allowed). +erase_allowed_module(Module) when is_atom(Module) -> + erase_allowed_modules([Module]). + +erase_allowed_modules(Modules) when is_list(Modules) -> + Allowed0 = get_allowed_modules(), + Allowed = Allowed0 -- Modules, + persistent_term:put({emqx_variform, allowed_modules}, Allowed). + get_allowed_modules() -> persistent_term:get({emqx_variform, allowed_modules}, []). diff --git a/apps/emqx_utils/src/emqx_variform_str.erl b/apps/emqx_utils/src/emqx_variform_str.erl index 7b8e2e742..a53e1e216 100644 --- a/apps/emqx_utils/src/emqx_variform_str.erl +++ b/apps/emqx_utils/src/emqx_variform_str.erl @@ -52,7 +52,8 @@ find/3, join_to_string/1, join_to_string/2, - unescape/1 + unescape/1, + nth/2 ]). -define(IS_EMPTY(X), (X =:= <<>> orelse X =:= "" orelse X =:= undefined)). @@ -224,6 +225,20 @@ unescape(Bin) when is_binary(Bin) -> throw({invalid_unicode_character, Error}) end. +nth(N, List) when (is_list(N) orelse is_binary(N)) andalso is_list(List) -> + try binary_to_integer(iolist_to_binary(N)) of + N1 -> + nth(N1, List) + catch + _:_ -> + throw(#{reason => invalid_argument, func => nth, index => N}) + end; +nth(N, List) when is_integer(N) andalso is_list(List) -> + case length(List) of + L when L < N -> <<>>; + _ -> lists:nth(N, List) + end. + unescape_string(Input) -> unescape_string(Input, []). unescape_string([], Acc) -> diff --git a/apps/emqx_utils/test/emqx_variform_tests.erl b/apps/emqx_utils/test/emqx_variform_tests.erl new file mode 100644 index 000000000..da26a383d --- /dev/null +++ b/apps/emqx_utils/test/emqx_variform_tests.erl @@ -0,0 +1,129 @@ +%%-------------------------------------------------------------------- +%% Copyright (c) 2024 EMQ Technologies Co., Ltd. All Rights Reserved. +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. +%%-------------------------------------------------------------------- + +-module(emqx_variform_tests). + +-compile(export_all). +-compile(nowarn_export_all). + +-include_lib("eunit/include/eunit.hrl"). + +-define(SYNTAX_ERROR, {error, "syntax error before:" ++ _}). + +redner_test_() -> + [ + {"direct var reference", fun() -> ?assertEqual({ok, <<"1">>}, render("a", #{a => 1})) end}, + {"concat strings", fun() -> + ?assertEqual({ok, <<"a,b">>}, render("concat('a',',','b')", #{})) + end}, + {"concat empty string", fun() -> ?assertEqual({ok, <<"">>}, render("concat('')", #{})) end}, + {"tokens 1st", fun() -> + ?assertEqual({ok, <<"a">>}, render("nth(1,tokens(var, ','))", #{var => <<"a,b">>})) + end}, + {"unknown var as empty str", fun() -> + ?assertEqual({ok, <<>>}, render("var", #{})) + end}, + {"out of range nth index", fun() -> + ?assertEqual({ok, <<>>}, render("nth(2, tokens(var, ','))", #{var => <<"a">>})) + end}, + {"not a index number for nth", fun() -> + ?assertMatch( + {error, #{reason := invalid_argument, func := nth, index := <<"notnum">>}}, + render("nth('notnum', tokens(var, ','))", #{var => <<"a">>}) + ) + end} + ]. + +unknown_func_test_() -> + [ + {"unknown function", fun() -> + ?assertMatch( + {error, #{reason := unknown_variform_function}}, + render("nonexistingatom__(a)", #{}) + ) + end}, + {"unknown module", fun() -> + ?assertMatch( + {error, #{reason := unknown_variform_module}}, + render("nonexistingatom__.nonexistingatom__(a)", #{}) + ) + end}, + {"unknown function in a known module", fun() -> + ?assertMatch( + {error, #{reason := unknown_variform_function}}, + render("emqx_variform_str.nonexistingatom__(a)", #{}) + ) + end}, + {"invalid func reference", fun() -> + ?assertMatch( + {error, #{reason := invalid_function_reference, function := "a.b.c"}}, + render("a.b.c(var)", #{}) + ) + end} + ]. + +concat(L) -> iolist_to_binary(L). + +inject_allowed_module_test() -> + try + emqx_variform:inject_allowed_module(?MODULE), + ?assertEqual({ok, <<"ab">>}, render(atom_to_list(?MODULE) ++ ".concat(['a','b'])", #{})), + ?assertMatch( + {error, #{ + reason := unknown_variform_function, + module := ?MODULE, + function := concat, + arity := 2 + }}, + render(atom_to_list(?MODULE) ++ ".concat('a','b')", #{}) + ), + ?assertMatch( + {error, #{reason := unallowed_veriform_module, module := emqx}}, + render("emqx.concat('a','b')", #{}) + ) + after + emqx_variform:erase_allowed_module(?MODULE) + end. + +coalesce_test_() -> + [ + {"coalesce first", fun() -> + ?assertEqual({ok, <<"a">>}, render("coalesce('a','b')", #{})) + end}, + {"coalesce second", fun() -> + ?assertEqual({ok, <<"b">>}, render("coalesce('', 'b')", #{})) + end}, + {"coalesce first var", fun() -> + ?assertEqual({ok, <<"a">>}, render("coalesce(a,b)", #{a => <<"a">>, b => <<"b">>})) + end}, + {"coalesce second var", fun() -> + ?assertEqual({ok, <<"b">>}, render("coalesce(a,b)", #{b => <<"b">>})) + end}, + {"coalesce empty", fun() -> ?assertEqual({ok, <<>>}, render("coalesce(a,b)", #{})) end} + ]. + +syntax_error_test_() -> + [ + {"empty expression", fun() -> ?assertMatch(?SYNTAX_ERROR, render("", #{})) end}, + {"const string single quote", fun() -> ?assertMatch(?SYNTAX_ERROR, render("'a'", #{})) end}, + {"const string double quote", fun() -> + ?assertMatch(?SYNTAX_ERROR, render(<<"\"a\"">>, #{})) + end}, + {"no arity", fun() -> ?assertMatch(?SYNTAX_ERROR, render("concat()", #{})) end} + ]. + +render(Expression, Bindings) -> + emqx_variform:render(Expression, Bindings). From 41677eb7855015c1e3a68c7a5726fd0d8eae0eb6 Mon Sep 17 00:00:00 2001 From: zmstone Date: Mon, 8 Apr 2024 21:25:58 +0200 Subject: [PATCH 099/234] refactor: make elvis happy --- apps/emqx_utils/src/emqx_variform.erl | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/apps/emqx_utils/src/emqx_variform.erl b/apps/emqx_utils/src/emqx_variform.erl index 25825ea9f..834d22750 100644 --- a/apps/emqx_utils/src/emqx_variform.erl +++ b/apps/emqx_utils/src/emqx_variform.erl @@ -160,8 +160,7 @@ assert_func_exported(emqx_variform_str, concat, _Arity) -> assert_func_exported(emqx_variform_str, coalesce, _Arity) -> ok; assert_func_exported(Mod, Fun, Arity) -> - %% ensure beam loaded - _ = Mod:module_info(md5), + ok = try_load(Mod), case erlang:function_exported(Mod, Fun, Arity) of true -> ok; @@ -174,6 +173,18 @@ assert_func_exported(Mod, Fun, Arity) -> }) end. +%% best effort to load the module because it might not be loaded as a part of the release modules +%% e.g. from a plugin. +%% do not call code server, just try to call a function in the module. +try_load(Mod) -> + try + _ = erlang:apply(Mod, module_info, [md5]), + ok + catch + _:_ -> + ok + end. + assert_module_allowed(emqx_variform_str) -> ok; assert_module_allowed(Mod) -> From 1e95bd4da6d04d7426930b6225c0a6c569c67c54 Mon Sep 17 00:00:00 2001 From: Andrew Mayorov Date: Mon, 8 Apr 2024 19:58:59 +0200 Subject: [PATCH 100/234] test(dsrepl): test unresponsive nodes removal / node restarts --- .../emqx_ds_replication_shard_allocator.erl | 4 +- .../test/emqx_ds_replication_SUITE.erl | 78 ++++++++++++++++++- 2 files changed, 77 insertions(+), 5 deletions(-) diff --git a/apps/emqx_durable_storage/src/emqx_ds_replication_shard_allocator.erl b/apps/emqx_durable_storage/src/emqx_ds_replication_shard_allocator.erl index cfc2b7c81..f02335a10 100644 --- a/apps/emqx_durable_storage/src/emqx_ds_replication_shard_allocator.erl +++ b/apps/emqx_durable_storage/src/emqx_ds_replication_shard_allocator.erl @@ -50,7 +50,7 @@ -undef(TRANS_RETRY_TIMEOUT). -undef(REMOVE_REPLICA_DELAY). -define(TRANS_RETRY_TIMEOUT, 1_000). --define(REMOVE_REPLICA_DELAY, {4_000, 2_000}). +-define(REMOVE_REPLICA_DELAY, {3_000, 2_000}). -endif. %% @@ -213,7 +213,7 @@ transition_handler(Shard, Trans, _State = #{db := DB}) -> %% Putting this transition handler on separate "track" so that it %% won't block any changes with higher priority (e.g. managing %% local replicas). - {_Track = unresp, Handler}; + {{unresp, Shard}, Handler}; false -> undefined end; diff --git a/apps/emqx_durable_storage/test/emqx_ds_replication_SUITE.erl b/apps/emqx_durable_storage/test/emqx_ds_replication_SUITE.erl index 6a2c36b30..9fc55d170 100644 --- a/apps/emqx_durable_storage/test/emqx_ds_replication_SUITE.erl +++ b/apps/emqx_durable_storage/test/emqx_ds_replication_SUITE.erl @@ -143,6 +143,12 @@ t_rebalance('end', Config) -> ok = emqx_cth_cluster:stop(?config(nodes, Config)). t_rebalance(Config) -> + %% This testcase verifies that the storage rebalancing works correctly: + %% 1. Join/leave operations are applied successfully. + %% 2. Message data survives the rebalancing. + %% 3. Shard cluster membership converges to the target replica allocation. + %% 4. Replication factor is respected. + NMsgs = 800, NClients = 5, Nodes = [N1, N2, N3, N4] = ?config(nodes, Config), @@ -257,6 +263,9 @@ t_join_leave_errors('end', Config) -> ok = emqx_cth_cluster:stop(?config(nodes, Config)). t_join_leave_errors(Config) -> + %% This testcase verifies that logical errors arising during handling of + %% join/leave operations are reported correctly. + [N1, N2] = ?config(nodes, Config), Opts = opts(#{n_shards => 16, n_sites => 1, replication_factor => 3}), @@ -317,6 +326,10 @@ t_rebalance_chaotic_converges('end', Config) -> ok = emqx_cth_cluster:stop(?config(nodes, Config)). t_rebalance_chaotic_converges(Config) -> + %% This testcase verifies that even a very chaotic sequence of join/leave + %% operations will still be handled consistently, and that the shard + %% allocation will converge to the target state. + NMsgs = 500, Nodes = [N1, N2, N3] = ?config(nodes, Config), @@ -365,12 +378,12 @@ t_rebalance_chaotic_converges(Config) -> %% Apply the sequence while also filling the storage with messages. TransitionMessages = lists:map( - fun({N, Transition, Site}) -> + fun({N, Operation, Site}) -> %% Apply the transition. - ?assertEqual(ok, ds_repl_meta(N, Transition, [?DB, Site])), + ?assertEqual(ok, ds_repl_meta(N, Operation, [?DB, Site])), %% Give some time for at least one transition to complete. Transitions = transitions(N, ?DB), - ct:pal("Transitions after ~p: ~p", [N, Transitions]), + ct:pal("Transitions after ~p: ~p", [Operation, Transitions]), ?retry(200, 10, ?assertNotEqual(Transitions, transitions(N, ?DB))), %% Fill the storage with messages. CID = integer_to_binary(erlang:system_time()), @@ -393,6 +406,65 @@ t_rebalance_chaotic_converges(Config) -> ?assertEqual(sample(20, Messages), sample(20, MessagesDB)), ?assertEqual(Messages, MessagesDB). +t_rebalance_offline_restarts(init, Config) -> + Apps = [appspec(emqx_durable_storage)], + Specs = emqx_cth_cluster:mk_nodespecs( + [ + {t_rebalance_offline_restarts1, #{apps => Apps}}, + {t_rebalance_offline_restarts2, #{apps => Apps}}, + {t_rebalance_offline_restarts3, #{apps => Apps}} + ], + #{work_dir => emqx_cth_suite:work_dir(?FUNCTION_NAME, Config)} + ), + Nodes = emqx_cth_cluster:start(Specs), + [{nodes, Nodes}, {nodespecs, Specs} | Config]; +t_rebalance_offline_restarts('end', Config) -> + ok = emqx_cth_cluster:stop(?config(nodes, Config)). + +t_rebalance_offline_restarts(Config) -> + %% This testcase verifies that rebalancing progresses if nodes restart or + %% go offline and never come back. + + Nodes = [N1, N2, N3] = ?config(nodes, Config), + _Specs = [NS1, NS2, _] = ?config(nodespecs, Config), + + %% Initialize DB on all 3 nodes. + Opts = opts(#{n_shards => 8, n_sites => 3, replication_factor => 3}), + ?assertEqual( + [{ok, ok} || _ <- Nodes], + erpc:multicall(Nodes, emqx_ds, open_db, [?DB, Opts]) + ), + ?retry( + 500, + 10, + ?assertEqual([8 || _ <- Nodes], [n_shards_online(N, ?DB) || N <- Nodes]) + ), + + %% Find out which sites are there. + Sites = [S1, S2, S3] = [ds_repl_meta(N, this_site) || N <- Nodes], + ct:pal("Sites: ~p~n", [Sites]), + + %% Shut down N3 and then remove it from the DB. + ok = emqx_cth_cluster:stop_node(N3), + ?assertEqual(ok, ds_repl_meta(N1, leave_db_site, [?DB, S3])), + Transitions = transitions(N1, ?DB), + ct:pal("Transitions: ~p~n", [Transitions]), + + %% Wait until at least one transition completes. + ?block_until(#{?snk_kind := dsrepl_shard_transition_end}), + + %% Restart N1 and N2. + [N1] = emqx_cth_cluster:restart(NS1), + [N2] = emqx_cth_cluster:restart(NS2), + ?assertEqual( + [{ok, ok}, {ok, ok}], + erpc:multicall([N1, N2], emqx_ds, open_db, [?DB, Opts]) + ), + + %% Target state should still be reached eventually. + ?retry(1000, 20, ?assertEqual([], transitions(N1, ?DB))), + ?assertEqual(lists:sort([S1, S2]), ds_repl_meta(N1, db_sites, [?DB])). + %% shard_server_info(Node, DB, Shard, Site, Info) -> From 3223797ae5930587523a15be1ce00e62012dbdab Mon Sep 17 00:00:00 2001 From: Andrew Mayorov Date: Mon, 8 Apr 2024 21:28:20 +0200 Subject: [PATCH 101/234] fix(dsrepl): attempt leadership transfer before server removal This should make it much less likely to hit weird edge cases that lead to duplicate Raft log entries because of client retries upon receiving `shutdown` from the leader being removed. --- .../src/emqx_ds_replication_layer_shard.erl | 43 +++++++++++++++++++ 1 file changed, 43 insertions(+) diff --git a/apps/emqx_durable_storage/src/emqx_ds_replication_layer_shard.erl b/apps/emqx_durable_storage/src/emqx_ds_replication_layer_shard.erl index 8f87b69b4..2d19ec7ef 100644 --- a/apps/emqx_durable_storage/src/emqx_ds_replication_layer_shard.erl +++ b/apps/emqx_durable_storage/src/emqx_ds_replication_layer_shard.erl @@ -114,6 +114,13 @@ get_server_local_preferred(DB, Shard) -> pick_random(get_shard_servers(DB, Shard)) end. +lookup_leader(DB, Shard) -> + %% NOTE + %% Does not block, but the result may be outdated or even unknown when there's + %% no servers on the local node. + ClusterName = get_cluster_name(DB, Shard), + ra_leaderboard:lookup_leader(ClusterName). + pick_local(Servers) -> case lists:keyfind(node(), 2, Servers) of Local when is_tuple(Local) -> @@ -181,7 +188,22 @@ add_local_server(DB, Shard) -> -spec drop_local_server(emqx_ds:db(), emqx_ds_replication_layer:shard_id()) -> ok | emqx_ds:error(_Reason). drop_local_server(DB, Shard) -> + ShardServers = shard_servers(DB, Shard), LocalServer = local_server(DB, Shard), + case lookup_leader(DB, Shard) of + LocalServer -> + %% NOTE + %% Trigger leadership transfer *and* force to wait until the new leader + %% is elected and updated in the leaderboard. This should help to avoid + %% edge cases where entries appended right before removal are duplicated + %% due to client retries. + %% Timeouts are ignored, it's a best effort attempt. + [Candidate | _] = lists:delete(LocalServer, ShardServers), + _ = ra:transfer_leadership(LocalServer, Candidate), + _ = wait_until(fun() -> lookup_leader(DB, Shard) == Candidate end); + _Another -> + ok + end, case remove_server(DB, Shard, LocalServer) of ok -> ra:force_delete_server(DB, LocalServer); @@ -351,3 +373,24 @@ memoize(Fun, Args) -> Result -> Result end. + +wait_until(Fun) -> + wait_until(Fun, 5_000, 250). + +wait_until(Fun, Timeout, Sleep) -> + Deadline = erlang:monotonic_time(millisecond) + Timeout, + loop_until(Fun, Deadline, Sleep). + +loop_until(Fun, Deadline, Sleep) -> + case Fun() of + true -> + ok; + false -> + case erlang:monotonic_time(millisecond) of + Now when Now < Deadline -> + timer:sleep(Sleep), + loop_until(Fun, Deadline, Sleep); + _ -> + timeout + end + end. From d12e907209786a0170d5026f04de01ae949f92f7 Mon Sep 17 00:00:00 2001 From: Andrew Mayorov Date: Mon, 8 Apr 2024 22:44:34 +0200 Subject: [PATCH 102/234] fix(dsrepl): correctly handle ra membership change command results Before this change, results similar to `{error, {no_more_servers_to_try, [{error, nodedown}, {error, not_member}]}}` were considered retryable failures, which is incorrect. --- .../src/emqx_ds_replication_layer_shard.erl | 28 +++++++++++++++---- 1 file changed, 22 insertions(+), 6 deletions(-) diff --git a/apps/emqx_durable_storage/src/emqx_ds_replication_layer_shard.erl b/apps/emqx_durable_storage/src/emqx_ds_replication_layer_shard.erl index 2d19ec7ef..f4c0d3b01 100644 --- a/apps/emqx_durable_storage/src/emqx_ds_replication_layer_shard.erl +++ b/apps/emqx_durable_storage/src/emqx_ds_replication_layer_shard.erl @@ -173,13 +173,14 @@ add_local_server(DB, Shard) -> membership => voter } end, - case ra:add_member(ShardServers, ServerRecord, ?MEMBERSHIP_CHANGE_TIMEOUT) of + Timeout = ?MEMBERSHIP_CHANGE_TIMEOUT, + case ra_try_servers(ShardServers, fun ra:add_member/3, [ServerRecord, Timeout]) of {ok, _, _Leader} -> ok; {error, already_member} -> ok; - {error, Reason} -> - {error, recoverable, Reason} + Error -> + {error, recoverable, Error} end. %% @doc Remove a local server from the shard cluster and clean up on-disk data. @@ -219,13 +220,14 @@ drop_local_server(DB, Shard) -> ok | emqx_ds:error(_Reason). remove_server(DB, Shard, Server) -> ShardServers = shard_servers(DB, Shard), - case ra:remove_member(ShardServers, Server, ?MEMBERSHIP_CHANGE_TIMEOUT) of + Timeout = ?MEMBERSHIP_CHANGE_TIMEOUT, + case ra_try_servers(ShardServers, fun ra:remove_member/3, [Server, Timeout]) of {ok, _, _Leader} -> ok; {error, not_member} -> ok; - {error, Reason} -> - {error, recoverable, Reason} + Error -> + {error, recoverable, Error} end. -spec server_info @@ -272,6 +274,20 @@ member_readiness(#{status := Status, voter_status := #{membership := Membership} member_readiness(#{}) -> unknown. +%% + +ra_try_servers([Server | Rest], Fun, Args) -> + case erlang:apply(Fun, [Server | Args]) of + {ok, R, Leader} -> + {ok, R, Leader}; + {error, Reason} when Reason == noproc; Reason == nodedown -> + ra_try_servers(Rest, Fun, Args); + ErrorOrTimeout -> + ErrorOrTimeout + end; +ra_try_servers([], _Fun, _Args) -> + {error, servers_unreachable}. + ra_overview(Server) -> case ra:member_overview(Server) of {ok, Overview, _Leader} -> From 698b8e6a05c600d0de4469bd99f9a69936ad73dc Mon Sep 17 00:00:00 2001 From: Kinplemelon Date: Tue, 9 Apr 2024 09:37:49 +0800 Subject: [PATCH 103/234] chore(dashboard): bump dashboard version to v1.8.1 & e1.6.1 --- Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index 1753dd4d8..2f01ad16e 100644 --- a/Makefile +++ b/Makefile @@ -20,8 +20,8 @@ endif # Dashboard version # from https://github.com/emqx/emqx-dashboard5 -export EMQX_DASHBOARD_VERSION ?= v1.8.0 -export EMQX_EE_DASHBOARD_VERSION ?= e1.6.0 +export EMQX_DASHBOARD_VERSION ?= v1.8.1 +export EMQX_EE_DASHBOARD_VERSION ?= e1.6.1 PROFILE ?= emqx REL_PROFILES := emqx emqx-enterprise From a1495689c0d890903353ff052f306ee76f133728 Mon Sep 17 00:00:00 2001 From: zhongwencool Date: Mon, 8 Apr 2024 10:21:00 +0800 Subject: [PATCH 104/234] fix: clean self node's cluster commit when leave cluster --- apps/emqx_conf/src/emqx_cluster_rpc.erl | 3 ++- changes/ce/fix-12843.en.md | 2 ++ 2 files changed, 4 insertions(+), 1 deletion(-) create mode 100644 changes/ce/fix-12843.en.md diff --git a/apps/emqx_conf/src/emqx_cluster_rpc.erl b/apps/emqx_conf/src/emqx_cluster_rpc.erl index 39d471ed9..37f052f56 100644 --- a/apps/emqx_conf/src/emqx_cluster_rpc.erl +++ b/apps/emqx_conf/src/emqx_cluster_rpc.erl @@ -224,6 +224,7 @@ reset() -> gen_server:call(?MODULE, reset). status() -> transaction(fun ?MODULE:trans_status/0, []). +%% DO NOT delete this on_leave_clean/0, It's use when rpc before v560. on_leave_clean() -> on_leave_clean(node()). @@ -367,7 +368,7 @@ handle_call({fast_forward_to_commit, ToTnxId}, _From, State) -> NodeId = do_fast_forward_to_commit(ToTnxId, State), {reply, NodeId, State, catch_up(State)}; handle_call(on_leave, _From, State) -> - {atomic, ok} = transaction(fun ?MODULE:on_leave_clean/0, []), + {atomic, ok} = transaction(fun ?MODULE:on_leave_clean/1, [node()]), {reply, ok, State#{is_leaving := true}}; handle_call(_, _From, State) -> {reply, ok, State, catch_up(State)}. diff --git a/changes/ce/fix-12843.en.md b/changes/ce/fix-12843.en.md new file mode 100644 index 000000000..f0ba2af8c --- /dev/null +++ b/changes/ce/fix-12843.en.md @@ -0,0 +1,2 @@ +Fixed cluster_rpc_commit tnx_id was not properly cleanup after 'cluster leave' on replicator nodes, +The tnx_id of the core node will be deleted before, resulting in the failure of the core node update configuration. From 5579086220b77b2914c0d7f9844f83df022aa330 Mon Sep 17 00:00:00 2001 From: zhongwencool Date: Tue, 9 Apr 2024 14:15:18 +0800 Subject: [PATCH 105/234] chore: replicantor -> replicant --- changes/ce/fix-12843.en.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/changes/ce/fix-12843.en.md b/changes/ce/fix-12843.en.md index f0ba2af8c..38a46273f 100644 --- a/changes/ce/fix-12843.en.md +++ b/changes/ce/fix-12843.en.md @@ -1,2 +1,2 @@ -Fixed cluster_rpc_commit tnx_id was not properly cleanup after 'cluster leave' on replicator nodes, +Fixed cluster_rpc_commit tnx_id was not properly cleanup after 'cluster leave' on replicant nodes, The tnx_id of the core node will be deleted before, resulting in the failure of the core node update configuration. From 838113291931ae9cc06ea732b70e531340909269 Mon Sep 17 00:00:00 2001 From: zhongwencool Date: Tue, 9 Apr 2024 14:44:11 +0800 Subject: [PATCH 106/234] chore: bump emqx_conf to 0.1.36 --- apps/emqx_conf/src/emqx_conf.app.src | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/apps/emqx_conf/src/emqx_conf.app.src b/apps/emqx_conf/src/emqx_conf.app.src index dedb0c3c6..0646bc255 100644 --- a/apps/emqx_conf/src/emqx_conf.app.src +++ b/apps/emqx_conf/src/emqx_conf.app.src @@ -1,6 +1,6 @@ {application, emqx_conf, [ {description, "EMQX configuration management"}, - {vsn, "0.1.35"}, + {vsn, "0.1.36"}, {registered, []}, {mod, {emqx_conf_app, []}}, {applications, [kernel, stdlib]}, From 53b78086ed810453900e5e740944ed6c42a4b91d Mon Sep 17 00:00:00 2001 From: zmstone Date: Tue, 9 Apr 2024 09:34:05 +0200 Subject: [PATCH 107/234] chore: fix xref checks --- apps/emqx_utils/src/emqx_variform_parser.yrl | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/apps/emqx_utils/src/emqx_variform_parser.yrl b/apps/emqx_utils/src/emqx_variform_parser.yrl index 8a8a03a4d..508ef46d0 100644 --- a/apps/emqx_utils/src/emqx_variform_parser.yrl +++ b/apps/emqx_utils/src/emqx_variform_parser.yrl @@ -36,3 +36,8 @@ arg -> expr : '$1'. arg -> array : '$1'. arg -> number : {num, element(3, '$1')}. arg -> string : {str, element(3, '$1')}. + +Erlang code. + +%% mute xref warning +-export([return_error/2]). From 54d770d32edc296ff5acb425ad49a49303efb9e5 Mon Sep 17 00:00:00 2001 From: Ivan Dyachkov Date: Tue, 9 Apr 2024 10:03:37 +0200 Subject: [PATCH 108/234] chore: update codeowners --- .github/CODEOWNERS | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index 23911f9a8..a9474f01a 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -1,18 +1,29 @@ ## Default * @emqx/emqx-review-board +# emqx-review-board members +## HJianBo +## id +## ieQu1 +## qzhuyan +## savonarola +## terry-xiaoyu +## thalesmg +## zhongwencool +## zmstone + ## apps /apps/emqx/ @emqx/emqx-review-board @lafirest -/apps/emqx_connector/ @emqx/emqx-review-board -/apps/emqx_auth/ @emqx/emqx-review-board @JimMoen @savonarola +/apps/emqx_auth/ @emqx/emqx-review-board @JimMoen /apps/emqx_connector/ @emqx/emqx-review-board @JimMoen /apps/emqx_dashboard/ @emqx/emqx-review-board @JimMoen @lafirest /apps/emqx_dashboard_rbac/ @emqx/emqx-review-board @lafirest /apps/emqx_dashboard_sso/ @emqx/emqx-review-board @JimMoen @lafirest -/apps/emqx_exhook/ @emqx/emqx-review-board @JimMoen @HJianBo -/apps/emqx_ft/ @emqx/emqx-review-board @savonarola @keynslug +/apps/emqx_exhook/ @emqx/emqx-review-board @JimMoen +/apps/emqx_ft/ @emqx/emqx-review-board @keynslug /apps/emqx_gateway/ @emqx/emqx-review-board @lafirest -/apps/emqx_management/ @emqx/emqx-review-board @lafirest @sstrigler +/apps/emqx_management/ @emqx/emqx-review-board @lafirest +/apps/emqx_opentelemetry @emqx/emqx-review-board @SergeTupchiy /apps/emqx_plugins/ @emqx/emqx-review-board @JimMoen /apps/emqx_prometheus/ @emqx/emqx-review-board @JimMoen /apps/emqx_psk/ @emqx/emqx-review-board @lafirest @@ -20,7 +31,7 @@ /apps/emqx_rule_engine/ @emqx/emqx-review-board @kjellwinblad /apps/emqx_slow_subs/ @emqx/emqx-review-board @lafirest /apps/emqx_statsd/ @emqx/emqx-review-board @JimMoen -/apps/emqx_durable_storage/ @emqx/emqx-review-board @ieQu1 @keynslug +/apps/emqx_durable_storage/ @emqx/emqx-review-board @keynslug ## CI /deploy/ @emqx/emqx-review-board @Rory-Z From d393e963798a443887ed864d45d1e1efef00c820 Mon Sep 17 00:00:00 2001 From: zhongwencool Date: Tue, 9 Apr 2024 16:56:41 +0800 Subject: [PATCH 109/234] chore: Apply suggestions from code review Co-authored-by: ieQu1 <99872536+ieQu1@users.noreply.github.com> --- changes/ce/fix-12843.en.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/changes/ce/fix-12843.en.md b/changes/ce/fix-12843.en.md index 38a46273f..000026c00 100644 --- a/changes/ce/fix-12843.en.md +++ b/changes/ce/fix-12843.en.md @@ -1,2 +1,2 @@ -Fixed cluster_rpc_commit tnx_id was not properly cleanup after 'cluster leave' on replicant nodes, -The tnx_id of the core node will be deleted before, resulting in the failure of the core node update configuration. +Fixed `cluster_rpc_commit` transaction ID cleanup procedure after `cluster leave` on replicant nodes. +Previously, the transaction id of the core node would be deleted prematurely, blocking configuration updates on the core node. From 47e0f3bb1f3b376b58d12af1ad5b828da51ec3de Mon Sep 17 00:00:00 2001 From: JimMoen Date: Tue, 9 Apr 2024 16:40:39 +0800 Subject: [PATCH 110/234] fix(mgmt): $queue shared topics format in mgmt topics api --- apps/emqx/include/emqx_mqtt.hrl | 1 - apps/emqx/test/emqx_shared_sub_SUITE.erl | 13 ++++++---- .../src/emqx_mgmt_api_topics.erl | 5 +++- .../test/emqx_mgmt_api_topics_SUITE.erl | 24 +++++++++++++++++++ 4 files changed, 36 insertions(+), 7 deletions(-) diff --git a/apps/emqx/include/emqx_mqtt.hrl b/apps/emqx/include/emqx_mqtt.hrl index 63e2799fd..09f7495ea 100644 --- a/apps/emqx/include/emqx_mqtt.hrl +++ b/apps/emqx/include/emqx_mqtt.hrl @@ -673,7 +673,6 @@ end). -define(SHARE, "$share"). -define(QUEUE, "$queue"). --define(SHARE(Group, Topic), emqx_topic:join([<>, Group, Topic])). -define(REDISPATCH_TO(GROUP, TOPIC), {GROUP, TOPIC}). diff --git a/apps/emqx/test/emqx_shared_sub_SUITE.erl b/apps/emqx/test/emqx_shared_sub_SUITE.erl index df713ac74..040b3d295 100644 --- a/apps/emqx/test/emqx_shared_sub_SUITE.erl +++ b/apps/emqx/test/emqx_shared_sub_SUITE.erl @@ -1004,9 +1004,9 @@ t_different_groups_same_topic(Config) when is_list(Config) -> GroupB = <<"bb">>, Topic = <<"t/1">>, - SharedTopicGroupA = ?SHARE(GroupA, Topic), + SharedTopicGroupA = format_share(GroupA, Topic), ?UPDATE_SUB_QOS(C, SharedTopicGroupA, ?QOS_2), - SharedTopicGroupB = ?SHARE(GroupB, Topic), + SharedTopicGroupB = format_share(GroupB, Topic), ?UPDATE_SUB_QOS(C, SharedTopicGroupB, ?QOS_2), ?retry( @@ -1050,11 +1050,11 @@ t_different_groups_update_subopts(Config) when is_list(Config) -> Topic = <<"t/1">>, GroupA = <<"aa">>, GroupB = <<"bb">>, - SharedTopicGroupA = ?SHARE(GroupA, Topic), - SharedTopicGroupB = ?SHARE(GroupB, Topic), + SharedTopicGroupA = format_share(GroupA, Topic), + SharedTopicGroupB = format_share(GroupB, Topic), Fun = fun(Group, QoS) -> - ?UPDATE_SUB_QOS(C, ?SHARE(Group, Topic), QoS), + ?UPDATE_SUB_QOS(C, format_share(Group, Topic), QoS), ?assertMatch( #{qos := QoS}, emqx_broker:get_subopts(ClientId, emqx_topic:make_shared_record(Group, Topic)) @@ -1153,6 +1153,9 @@ t_queue_subscription(Config) when is_list(Config) -> %% help functions %%-------------------------------------------------------------------- +format_share(Group, Topic) -> + emqx_topic:maybe_format_share(emqx_topic:make_shared_record(Group, Topic)). + kill_process(Pid) -> kill_process(Pid, fun(_) -> erlang:exit(Pid, kill) end). diff --git a/apps/emqx_management/src/emqx_mgmt_api_topics.erl b/apps/emqx_management/src/emqx_mgmt_api_topics.erl index 1cb12f8f3..ff935ce10 100644 --- a/apps/emqx_management/src/emqx_mgmt_api_topics.erl +++ b/apps/emqx_management/src/emqx_mgmt_api_topics.erl @@ -225,7 +225,10 @@ format_response_meta(Meta, _Query, #{hasnext := HasNext}) -> Meta#{hasnext => HasNext}. format(#route{topic = Topic, dest = {Group, Node}}) -> - #{topic => ?SHARE(Group, Topic), node => Node}; + #{ + topic => emqx_topic:maybe_format_share(emqx_topic:make_shared_record(Group, Topic)), + node => Node + }; format(#route{topic = Topic, dest = Node}) when is_atom(Node) -> #{topic => Topic, node => Node}; format(#route{topic = Topic, dest = SessionId}) when is_binary(SessionId) -> diff --git a/apps/emqx_management/test/emqx_mgmt_api_topics_SUITE.erl b/apps/emqx_management/test/emqx_mgmt_api_topics_SUITE.erl index 55113c9e2..a8f912802 100644 --- a/apps/emqx_management/test/emqx_mgmt_api_topics_SUITE.erl +++ b/apps/emqx_management/test/emqx_mgmt_api_topics_SUITE.erl @@ -187,6 +187,30 @@ t_shared_topics(_Configs) -> ok = emqtt:stop(Client). +t_queue_topics(_Configs) -> + Node = atom_to_binary(node(), utf8), + RealTopic = <<"t/+">>, + Topic = <<"$queue/", RealTopic/binary>>, + + Client = client(?FUNCTION_NAME), + {ok, _, _} = emqtt:subscribe(Client, Topic), + {ok, _, _} = emqtt:subscribe(Client, RealTopic), + + %% exact match with shared topic + MatchData = request_json(get, ["topics"], [ + {"topic", Topic}, + {"node", atom_to_list(node())} + ]), + ?assertMatch( + #{ + <<"data">> := [#{<<"topic">> := Topic, <<"node">> := Node}], + <<"meta">> := #{<<"page">> := 1, <<"limit">> := 100, <<"count">> := 1} + }, + MatchData + ), + + ok = emqtt:stop(Client). + t_shared_topics_invalid(_Config) -> %% no real topic InvalidShareTopicFilter = <<"$share/group">>, From 03a9c46ca7d4bdb4d9e62e2d71166aa9e7fb4f7a Mon Sep 17 00:00:00 2001 From: JimMoen Date: Tue, 9 Apr 2024 16:53:20 +0800 Subject: [PATCH 111/234] fix(sys_topic): format shared topics --- apps/emqx/src/emqx_sys.erl | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/apps/emqx/src/emqx_sys.erl b/apps/emqx/src/emqx_sys.erl index cc8eec3af..f50e23235 100644 --- a/apps/emqx/src/emqx_sys.erl +++ b/apps/emqx/src/emqx_sys.erl @@ -22,6 +22,7 @@ -include("types.hrl"). -include("logger.hrl"). -include("emqx_hooks.hrl"). +-include("emqx_mqtt.hrl"). -export([ start_link/0, @@ -279,7 +280,7 @@ on_client_subscribed( clientid => ClientId, username => Username, protocol => Protocol, - topic => Topic, + topic => emqx_topic:maybe_format_share(Topic), subopts => SubOpts, ts => erlang:system_time(millisecond) }, @@ -298,7 +299,7 @@ on_client_unsubscribed( clientid => ClientId, username => Username, protocol => Protocol, - topic => Topic, + topic => emqx_topic:maybe_format_share(Topic), ts => erlang:system_time(millisecond) }, publish(unsubscribed, Payload). From 958748cf7ff2117cb049aa168d6de4ff47c05d79 Mon Sep 17 00:00:00 2001 From: Kjell Winblad Date: Tue, 9 Apr 2024 11:10:39 +0200 Subject: [PATCH 112/234] test: fix inter test suite problem --- apps/emqx_rule_engine/rebar.config | 8 +++++++ .../emqx_rule_engine_api_rule_apply_SUITE.erl | 21 ++++++++++++------- 2 files changed, 21 insertions(+), 8 deletions(-) diff --git a/apps/emqx_rule_engine/rebar.config b/apps/emqx_rule_engine/rebar.config index 07c53d3e3..d51bffa20 100644 --- a/apps/emqx_rule_engine/rebar.config +++ b/apps/emqx_rule_engine/rebar.config @@ -5,6 +5,14 @@ {emqx_utils, {path, "../emqx_utils"}} ]}. +{profiles, [ + {test, [ + {deps, [ + {emqx_bridge_http, {path, "../emqx_bridge_http"}} + ]} + ]} +]}. + {erl_opts, [ warn_unused_vars, warn_shadow_vars, diff --git a/apps/emqx_rule_engine/test/emqx_rule_engine_api_rule_apply_SUITE.erl b/apps/emqx_rule_engine/test/emqx_rule_engine_api_rule_apply_SUITE.erl index 2b77f9c3d..576806464 100644 --- a/apps/emqx_rule_engine/test/emqx_rule_engine_api_rule_apply_SUITE.erl +++ b/apps/emqx_rule_engine/test/emqx_rule_engine_api_rule_apply_SUITE.erl @@ -30,15 +30,20 @@ all() -> init_per_suite(Config) -> application:load(emqx_conf), + AppsToStart = [ + emqx, + emqx_conf, + emqx_connector, + emqx_bridge, + emqx_bridge_http, + emqx_rule_engine + ], + %% I don't know why we need to stop the apps and then start them but if we + %% don't do this and other suites run before this suite the test cases will + %% fail as it seems like the connector silently refuses to start. + ok = emqx_cth_suite:stop(AppsToStart), Apps = emqx_cth_suite:start( - [ - emqx, - emqx_conf, - emqx_connector, - emqx_bridge_http, - emqx_bridge, - emqx_rule_engine - ], + AppsToStart, #{work_dir => emqx_cth_suite:work_dir(Config)} ), emqx_mgmt_api_test_util:init_suite(), From 3898950017f4dc8eee4dfbd23fa19a6931b84df2 Mon Sep 17 00:00:00 2001 From: Kjell Winblad Date: Tue, 9 Apr 2024 11:38:35 +0200 Subject: [PATCH 113/234] test: fix test by adding dependency --- apps/emqx/rebar.config | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/apps/emqx/rebar.config b/apps/emqx/rebar.config index 4e850b2cc..66600cebc 100644 --- a/apps/emqx/rebar.config +++ b/apps/emqx/rebar.config @@ -46,7 +46,8 @@ {meck, "0.9.2"}, {proper, "1.4.0"}, {bbmustache, "1.10.0"}, - {emqtt, {git, "https://github.com/emqx/emqtt", {tag, "1.10.0"}}} + {emqtt, {git, "https://github.com/emqx/emqtt", {tag, "1.10.0"}}}, + {emqx_rule_engine, {path, "../emqx_rule_engine"}} ]}, {extra_src_dirs, [ {"test", [recursive]}, @@ -58,7 +59,8 @@ {meck, "0.9.2"}, {proper, "1.4.0"}, {bbmustache, "1.10.0"}, - {emqtt, {git, "https://github.com/emqx/emqtt", {tag, "1.9.7"}}} + {emqtt, {git, "https://github.com/emqx/emqtt", {tag, "1.9.7"}}}, + {emqx_rule_engine, {path, "../emqx_rule_engine"}} ]}, {extra_src_dirs, [{"test", [recursive]}]} ]} From a79df4ba690c2dbd83f740cf4fbbb4b5ea5ef1bb Mon Sep 17 00:00:00 2001 From: JimMoen Date: Tue, 9 Apr 2024 18:14:30 +0800 Subject: [PATCH 114/234] chore: add change log for #12855 --- changes/fix-12855.en.md | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 changes/fix-12855.en.md diff --git a/changes/fix-12855.en.md b/changes/fix-12855.en.md new file mode 100644 index 000000000..422008243 --- /dev/null +++ b/changes/fix-12855.en.md @@ -0,0 +1,2 @@ +Fix when the client subscribes/unsubscribes to a shared topic, the system topic messages for Client subscribed/unsubscribed notification cannot be serialized correctly. +Fix the `$queue` shared topics format error in endpoint `/topics`. From 7bcd553786ff4825a37ff2662a86ecc567c0adfe Mon Sep 17 00:00:00 2001 From: Kjell Winblad Date: Tue, 9 Apr 2024 14:08:46 +0200 Subject: [PATCH 115/234] test: move test case with rule_engine dep from emqx to emqx_rule_engine --- apps/emqx/rebar.config | 6 +- apps/emqx/test/emqx_trace_handler_SUITE.erl | 85 +------------ .../test/emqx_rule_engine_SUITE.erl | 115 +++++++++++++++++- 3 files changed, 119 insertions(+), 87 deletions(-) diff --git a/apps/emqx/rebar.config b/apps/emqx/rebar.config index 66600cebc..4e850b2cc 100644 --- a/apps/emqx/rebar.config +++ b/apps/emqx/rebar.config @@ -46,8 +46,7 @@ {meck, "0.9.2"}, {proper, "1.4.0"}, {bbmustache, "1.10.0"}, - {emqtt, {git, "https://github.com/emqx/emqtt", {tag, "1.10.0"}}}, - {emqx_rule_engine, {path, "../emqx_rule_engine"}} + {emqtt, {git, "https://github.com/emqx/emqtt", {tag, "1.10.0"}}} ]}, {extra_src_dirs, [ {"test", [recursive]}, @@ -59,8 +58,7 @@ {meck, "0.9.2"}, {proper, "1.4.0"}, {bbmustache, "1.10.0"}, - {emqtt, {git, "https://github.com/emqx/emqtt", {tag, "1.9.7"}}}, - {emqx_rule_engine, {path, "../emqx_rule_engine"}} + {emqtt, {git, "https://github.com/emqx/emqtt", {tag, "1.9.7"}}} ]}, {extra_src_dirs, [{"test", [recursive]}]} ]} diff --git a/apps/emqx/test/emqx_trace_handler_SUITE.erl b/apps/emqx/test/emqx_trace_handler_SUITE.erl index 85a9c056b..59a472f3e 100644 --- a/apps/emqx/test/emqx_trace_handler_SUITE.erl +++ b/apps/emqx/test/emqx_trace_handler_SUITE.erl @@ -20,7 +20,6 @@ -compile(nowarn_export_all). -include_lib("eunit/include/eunit.hrl"). --include_lib("snabbkaffe/include/test_macros.hrl"). -include_lib("common_test/include/ct.hrl"). -define(CLIENT, [ @@ -30,12 +29,11 @@ {password, <<"pass">>} ]). -all() -> - [t_trace_clientid, t_trace_topic, t_trace_ip_address, t_trace_clientid_utf8, t_trace_rule_id]. +all() -> [t_trace_clientid, t_trace_topic, t_trace_ip_address, t_trace_clientid_utf8]. init_per_suite(Config) -> Apps = emqx_cth_suite:start( - [emqx, emqx_rule_engine], + [emqx], #{work_dir => emqx_cth_suite:work_dir(Config)} ), [{apps, Apps} | Config]. @@ -207,79 +205,6 @@ t_trace_topic(_Config) -> ?assertEqual([], emqx_trace_handler:running()), emqtt:disconnect(T). -create_rule(Name, SQL) -> - Rule = emqx_rule_engine_SUITE:make_simple_rule(Name, SQL), - {ok, _} = emqx_rule_engine:create_rule(Rule). - -t_trace_rule_id(_Config) -> - %% Start MQTT Client - {ok, T} = emqtt:start_link(?CLIENT), - emqtt:connect(T), - %% Create rules - create_rule( - <<"test_rule_id_1">>, - <<"select 1 as rule_number from \"rule_1_topic\"">> - ), - create_rule( - <<"test_rule_id_2">>, - <<"select 2 as rule_number from \"rule_2_topic\"">> - ), - %% Start tracing - ok = emqx_trace_handler:install( - "CLI-RULE-1", ruleid, <<"test_rule_id_1">>, all, "tmp/rule_trace_1.log" - ), - ok = emqx_trace_handler:install( - "CLI-RULE-2", ruleid, <<"test_rule_id_2">>, all, "tmp/rule_trace_2.log" - ), - emqx_trace:check(), - ok = filesync("CLI-RULE-1", ruleid), - ok = filesync("CLI-RULE-2", ruleid), - - %% Verify the tracing file exits - ?assert(filelib:is_regular("tmp/rule_trace_1.log")), - ?assert(filelib:is_regular("tmp/rule_trace_2.log")), - - %% Get current traces - ?assertMatch( - [ - #{ - type := ruleid, - filter := <<"test_rule_id_1">>, - level := debug, - dst := "tmp/rule_trace_1.log", - name := <<"CLI-RULE-1">> - }, - #{ - type := ruleid, - filter := <<"test_rule_id_2">>, - name := <<"CLI-RULE-2">>, - level := debug, - dst := "tmp/rule_trace_2.log" - } - ], - emqx_trace_handler:running() - ), - - %% Trigger rule - emqtt:publish(T, <<"rule_1_topic">>, <<"my_traced_message">>), - ?retry( - 100, - 5, - begin - ok = filesync("CLI-RULE-1", ruleid), - {ok, Bin} = file:read_file("tmp/rule_trace_1.log"), - ?assertNotEqual(nomatch, binary:match(Bin, [<<"my_traced_message">>])) - end - ), - ok = filesync("CLI-RULE-2", ruleid), - ?assert(filelib:file_size("tmp/rule_trace_2.log") =:= 0), - - %% Stop tracing - ok = emqx_trace_handler:uninstall(ruleid, <<"CLI-RULE-1">>), - ok = emqx_trace_handler:uninstall(ruleid, <<"CLI-RULE-2">>), - ?assertEqual([], emqx_trace_handler:running()), - emqtt:disconnect(T). - t_trace_ip_address(_Config) -> {ok, T} = emqtt:start_link(?CLIENT), emqtt:connect(T), @@ -347,11 +272,11 @@ t_trace_ip_address(_Config) -> filesync(Name, Type) -> ct:sleep(50), - filesync(Name, Type, 5). + filesync(Name, Type, 3). %% sometime the handler process is not started yet. -filesync(Name, Type, 0) -> - ct:fail("Handler process not started ~p ~p", [Name, Type]); +filesync(_Name, _Type, 0) -> + ok; filesync(Name0, Type, Retry) -> Name = case is_binary(Name0) of diff --git a/apps/emqx_rule_engine/test/emqx_rule_engine_SUITE.erl b/apps/emqx_rule_engine/test/emqx_rule_engine_SUITE.erl index 76cc23c0d..5040d15b3 100644 --- a/apps/emqx_rule_engine/test/emqx_rule_engine_SUITE.erl +++ b/apps/emqx_rule_engine/test/emqx_rule_engine_SUITE.erl @@ -43,7 +43,8 @@ all() -> {group, metrics}, {group, metrics_simple}, {group, metrics_fail}, - {group, metrics_fail_simple} + {group, metrics_fail_simple}, + {group, tracing} ]. suite() -> @@ -142,6 +143,9 @@ groups() -> {metrics_fail_simple, [], [ t_rule_metrics_sync_fail, t_rule_metrics_async_fail + ]}, + {tracing, [], [ + t_trace_rule_id ]} ]. @@ -154,13 +158,13 @@ init_per_suite(Config) -> emqx_rule_funcs_demo:module_info(), application:load(emqx_conf), ok = emqx_common_test_helpers:start_apps( - [emqx_conf, emqx_rule_engine, emqx_auth, emqx_bridge], + [emqx, emqx_conf, emqx_rule_engine, emqx_auth, emqx_bridge], fun set_special_configs/1 ), Config. end_per_suite(_Config) -> - emqx_common_test_helpers:stop_apps([emqx_conf, emqx_rule_engine]), + emqx_common_test_helpers:stop_apps([emqx, emqx_conf, emqx_rule_engine, emqx_auth, emqx_bridge]), ok. set_special_configs(emqx_auth) -> @@ -3632,6 +3636,111 @@ create_bridge(Type, Name, Config) -> {ok, _Bridge} = emqx_bridge:create(Type, Name, Config), emqx_bridge_resource:bridge_id(Type, Name). +create_rule(Name, SQL) -> + Rule = emqx_rule_engine_SUITE:make_simple_rule(Name, SQL), + {ok, _} = emqx_rule_engine:create_rule(Rule). + +emqtt_client_config() -> + [ + {host, "localhost"}, + {clientid, <<"client">>}, + {username, <<"testuser">>}, + {password, <<"pass">>} + ]. + +filesync(Name, Type) -> + ct:sleep(50), + filesync(Name, Type, 5). + +%% sometime the handler process is not started yet. +filesync(Name, Type, 0) -> + ct:fail("Handler process not started ~p ~p", [Name, Type]); +filesync(Name0, Type, Retry) -> + Name = + case is_binary(Name0) of + true -> Name0; + false -> list_to_binary(Name0) + end, + try + Handler = binary_to_atom(<<"trace_", (atom_to_binary(Type))/binary, "_", Name/binary>>), + ok = logger_disk_log_h:filesync(Handler) + catch + E:R -> + ct:pal("Filesync error:~p ~p~n", [{Name, Type, Retry}, {E, R}]), + ct:sleep(100), + filesync(Name, Type, Retry - 1) + end. + +t_trace_rule_id(_Config) -> + %% Start MQTT Client + emqx_trace_SUITE:reload(), + {ok, T} = emqtt:start_link(emqtt_client_config()), + emqtt:connect(T), + %% Create rules + create_rule( + <<"test_rule_id_1">>, + <<"select 1 as rule_number from \"rule_1_topic\"">> + ), + create_rule( + <<"test_rule_id_2">>, + <<"select 2 as rule_number from \"rule_2_topic\"">> + ), + %% Start tracing + ok = emqx_trace_handler:install( + "CLI-RULE-1", ruleid, <<"test_rule_id_1">>, all, "tmp/rule_trace_1.log" + ), + ok = emqx_trace_handler:install( + "CLI-RULE-2", ruleid, <<"test_rule_id_2">>, all, "tmp/rule_trace_2.log" + ), + emqx_trace:check(), + ok = filesync("CLI-RULE-1", ruleid), + ok = filesync("CLI-RULE-2", ruleid), + + %% Verify the tracing file exits + ?assert(filelib:is_regular("tmp/rule_trace_1.log")), + ?assert(filelib:is_regular("tmp/rule_trace_2.log")), + + %% Get current traces + ?assertMatch( + [ + #{ + type := ruleid, + filter := <<"test_rule_id_1">>, + level := debug, + dst := "tmp/rule_trace_1.log", + name := <<"CLI-RULE-1">> + }, + #{ + type := ruleid, + filter := <<"test_rule_id_2">>, + name := <<"CLI-RULE-2">>, + level := debug, + dst := "tmp/rule_trace_2.log" + } + ], + emqx_trace_handler:running() + ), + + %% Trigger rule + emqtt:publish(T, <<"rule_1_topic">>, <<"my_traced_message">>), + ?retry( + 100, + 5, + begin + ok = filesync("CLI-RULE-1", ruleid), + {ok, Bin} = file:read_file("tmp/rule_trace_1.log"), + ?assertNotEqual(nomatch, binary:match(Bin, [<<"my_traced_message">>])) + end + ), + ok = filesync("CLI-RULE-2", ruleid), + ?assert(filelib:file_size("tmp/rule_trace_2.log") =:= 0), + + %% Stop tracing + ok = emqx_trace_handler:uninstall(ruleid, <<"CLI-RULE-1">>), + ok = emqx_trace_handler:uninstall(ruleid, <<"CLI-RULE-2">>), + ?assertEqual([], emqx_trace_handler:running()), + emqtt:disconnect(T). + %%------------------------------------------------------------------------------ %% Internal helpers %%------------------------------------------------------------------------------ From 43ff2e3a743f6c2f34e5d0c83897c572b3cf0af7 Mon Sep 17 00:00:00 2001 From: Kjell Winblad Date: Tue, 9 Apr 2024 16:17:26 +0200 Subject: [PATCH 116/234] docs: add change log entry --- changes/ce/feat-12827.en.md | 1 + 1 file changed, 1 insertion(+) create mode 100644 changes/ce/feat-12827.en.md diff --git a/changes/ce/feat-12827.en.md b/changes/ce/feat-12827.en.md new file mode 100644 index 000000000..633a33d6b --- /dev/null +++ b/changes/ce/feat-12827.en.md @@ -0,0 +1 @@ +It is now possible to trace rules with a new Rule ID trace filter as well as with the Client ID filter. For testing purposes it is now also possible to use a new HTTP API endpoint (rules/:id/test) to artificially apply a rule and optionally stop its actions after they have been rendered. From 31142df5cfc375a744d41faf485b72873cd54bbd Mon Sep 17 00:00:00 2001 From: Kjell Winblad Date: Tue, 9 Apr 2024 16:20:37 +0200 Subject: [PATCH 117/234] fix: default value of stop_action_after_template_rendering to true This commit changes the default value for the stop_action_after_template_rendering option of the apply rule HTTP API endpoint so that it is true instead of false. --- apps/emqx_rule_engine/src/emqx_rule_api_schema.erl | 2 +- rel/i18n/emqx_rule_api_schema.hocon | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/apps/emqx_rule_engine/src/emqx_rule_api_schema.erl b/apps/emqx_rule_engine/src/emqx_rule_api_schema.erl index 20363e726..5e19b33ee 100644 --- a/apps/emqx_rule_engine/src/emqx_rule_api_schema.erl +++ b/apps/emqx_rule_engine/src/emqx_rule_api_schema.erl @@ -123,7 +123,7 @@ fields("rule_apply_test") -> #{ desc => ?DESC("stop_action_after_template_render"), - default => false + default => true } )} ]; diff --git a/rel/i18n/emqx_rule_api_schema.hocon b/rel/i18n/emqx_rule_api_schema.hocon index 7f684e8ef..25535d0ca 100644 --- a/rel/i18n/emqx_rule_api_schema.hocon +++ b/rel/i18n/emqx_rule_api_schema.hocon @@ -73,7 +73,7 @@ test_rule_environment.label: """Event Environment""" stop_action_after_template_render.desc: -"""Set this to true if the action should be stopped after its template has been rendered.""" +"""Set this to true if the action should be stopped after its template has been rendered (default is true).""" stop_action_after_template_render.label: """Stop Action After Template Rendering""" From 55179ccfeda75bbf31f9d38fe1a6c47f4f74f5fc Mon Sep 17 00:00:00 2001 From: Serge Tupchii Date: Tue, 9 Apr 2024 19:04:38 +0300 Subject: [PATCH 118/234] chore: update ekka to 0.19.3 Included updates: - https://github.com/emqx/mria/pull/178 --- apps/emqx/rebar.config | 2 +- mix.exs | 2 +- rebar.config | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/apps/emqx/rebar.config b/apps/emqx/rebar.config index 70cf636e7..99b8a21a4 100644 --- a/apps/emqx/rebar.config +++ b/apps/emqx/rebar.config @@ -28,7 +28,7 @@ {gproc, {git, "https://github.com/emqx/gproc", {tag, "0.9.0.1"}}}, {cowboy, {git, "https://github.com/emqx/cowboy", {tag, "2.9.2"}}}, {esockd, {git, "https://github.com/emqx/esockd", {tag, "5.11.1"}}}, - {ekka, {git, "https://github.com/emqx/ekka", {tag, "0.19.2"}}}, + {ekka, {git, "https://github.com/emqx/ekka", {tag, "0.19.3"}}}, {gen_rpc, {git, "https://github.com/emqx/gen_rpc", {tag, "3.3.1"}}}, {hocon, {git, "https://github.com/emqx/hocon.git", {tag, "0.42.1"}}}, {emqx_http_lib, {git, "https://github.com/emqx/emqx_http_lib.git", {tag, "0.5.3"}}}, diff --git a/mix.exs b/mix.exs index 2ebbc7e66..87fd4d62e 100644 --- a/mix.exs +++ b/mix.exs @@ -55,7 +55,7 @@ defmodule EMQXUmbrella.MixProject do {:cowboy, github: "emqx/cowboy", tag: "2.9.2", override: true}, {:esockd, github: "emqx/esockd", tag: "5.11.1", override: true}, {:rocksdb, github: "emqx/erlang-rocksdb", tag: "1.8.0-emqx-2", override: true}, - {:ekka, github: "emqx/ekka", tag: "0.19.2", override: true}, + {:ekka, github: "emqx/ekka", tag: "0.19.3", override: true}, {:gen_rpc, github: "emqx/gen_rpc", tag: "3.3.1", override: true}, {:grpc, github: "emqx/grpc-erl", tag: "0.6.12", override: true}, {:minirest, github: "emqx/minirest", tag: "1.4.0", override: true}, diff --git a/rebar.config b/rebar.config index 537707f4a..b5f5f1d9a 100644 --- a/rebar.config +++ b/rebar.config @@ -83,7 +83,7 @@ {cowboy, {git, "https://github.com/emqx/cowboy", {tag, "2.9.2"}}}, {esockd, {git, "https://github.com/emqx/esockd", {tag, "5.11.1"}}}, {rocksdb, {git, "https://github.com/emqx/erlang-rocksdb", {tag, "1.8.0-emqx-2"}}}, - {ekka, {git, "https://github.com/emqx/ekka", {tag, "0.19.2"}}}, + {ekka, {git, "https://github.com/emqx/ekka", {tag, "0.19.3"}}}, {gen_rpc, {git, "https://github.com/emqx/gen_rpc", {tag, "3.3.1"}}}, {grpc, {git, "https://github.com/emqx/grpc-erl", {tag, "0.6.12"}}}, {minirest, {git, "https://github.com/emqx/minirest", {tag, "1.4.0"}}}, From fae9005f87d9b4925d8a3a19328e2fb368793f29 Mon Sep 17 00:00:00 2001 From: Serge Tupchii Date: Tue, 9 Apr 2024 19:06:13 +0300 Subject: [PATCH 119/234] test(emqx_mgmt_cli): test that replicants do not join a left core node --- .../test/emqx_mgmt_cli_SUITE.erl | 38 +++++++++---------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/apps/emqx_management/test/emqx_mgmt_cli_SUITE.erl b/apps/emqx_management/test/emqx_mgmt_cli_SUITE.erl index c6f00bff0..b1d646b40 100644 --- a/apps/emqx_management/test/emqx_mgmt_cli_SUITE.erl +++ b/apps/emqx_management/test/emqx_mgmt_cli_SUITE.erl @@ -33,12 +33,12 @@ end_per_suite(_) -> emqx_mgmt_api_test_util:end_suite([emqx_management, emqx_conf]). init_per_testcase(t_autocluster_leave = TC, Config) -> - [Core1, Core2, Core3, Repl] = + [Core1, Core2, Repl1, Repl2] = Nodes = [ t_autocluster_leave_core1, t_autocluster_leave_core2, - t_autocluster_leave_core3, - t_autocluster_leave_replicant + t_autocluster_leave_replicant1, + t_autocluster_leave_replicant2 ], NodeNames = [emqx_cth_cluster:node_name(N) || N <- Nodes], @@ -58,8 +58,8 @@ init_per_testcase(t_autocluster_leave = TC, Config) -> [ {Core1, #{role => core, apps => AppSpec}}, {Core2, #{role => core, apps => AppSpec}}, - {Core3, #{role => core, apps => AppSpec}}, - {Repl, #{role => replicant, apps => AppSpec}} + {Repl1, #{role => replicant, apps => AppSpec}}, + {Repl2, #{role => replicant, apps => AppSpec}} ], #{work_dir => emqx_cth_suite:work_dir(TC, Config)} ), @@ -307,7 +307,7 @@ t_admin(_Config) -> ok. t_autocluster_leave(Config) -> - [Core1, Core2, Core3, Repl] = Cluster = ?config(cluster, Config), + [Core1, Core2, Repl1, Repl2] = Cluster = ?config(cluster, Config), %% Mria membership updates are async, makes sense to wait a little timer:sleep(300), ClusterView = [lists:sort(rpc:call(N, emqx, running_nodes, [])) || N <- Cluster], @@ -317,24 +317,24 @@ t_autocluster_leave(Config) -> ?assertEqual(View1, View3), ?assertEqual(View1, View4), - rpc:call(Core3, emqx_mgmt_cli, cluster, [["leave"]]), + rpc:call(Core2, emqx_mgmt_cli, cluster, [["leave"]]), timer:sleep(1000), - %% Replicant node may still discover and join Core3 which is now split from [Core1, Core2], - %% but it's expected to choose a bigger cluster of [Core1, Core2].. - ?assertMatch([Core3], rpc:call(Core3, emqx, running_nodes, [])), + %% Replicant nodes can discover Core2 which is now split from [Core1, Core2], + %% but they are expected to ignore Core2, + %% since mria_lb must filter out core nodes that disabled discovery. + ?assertMatch([Core2], rpc:call(Core2, emqx, running_nodes, [])), ?assertEqual(undefined, rpc:call(Core1, erlang, whereis, [ekka_autocluster])), - ?assertEqual(lists:sort([Core1, Core2, Repl]), rpc:call(Core1, emqx, running_nodes, [])), - ?assertEqual(lists:sort([Core1, Core2, Repl]), rpc:call(Core2, emqx, running_nodes, [])), - ?assertEqual(lists:sort([Core1, Core2, Repl]), rpc:call(Repl, emqx, running_nodes, [])), + ?assertEqual(lists:sort([Core1, Repl1, Repl2]), rpc:call(Core1, emqx, running_nodes, [])), + ?assertEqual(lists:sort([Core1, Repl1, Repl2]), rpc:call(Repl1, emqx, running_nodes, [])), + ?assertEqual(lists:sort([Core1, Repl1, Repl2]), rpc:call(Repl2, emqx, running_nodes, [])), - rpc:call(Repl, emqx_mgmt_cli, cluster, [["leave"]]), + rpc:call(Repl1, emqx_mgmt_cli, cluster, [["leave"]]), timer:sleep(1000), - ?assertEqual(lists:sort([Core1, Core2]), rpc:call(Core1, emqx, running_nodes, [])), - ?assertEqual(lists:sort([Core1, Core2]), rpc:call(Core2, emqx, running_nodes, [])), + ?assertEqual(lists:sort([Core1, Repl2]), rpc:call(Core1, emqx, running_nodes, [])), - rpc:call(Core3, emqx_mgmt_cli, cluster, [["discovery", "enable"]]), - rpc:call(Repl, emqx_mgmt_cli, cluster, [["discovery", "enable"]]), - %% core nodes will join and restart asyncly, may need more time to re-cluster + rpc:call(Core2, emqx_mgmt_cli, cluster, [["discovery", "enable"]]), + rpc:call(Repl1, emqx_mgmt_cli, cluster, [["discovery", "enable"]]), + %% nodes will join and restart asyncly, may need more time to re-cluster ?assertEqual( ok, emqx_common_test_helpers:wait_for( From e6330dddecea49a98cdadb1208bdb9604db4ab3d Mon Sep 17 00:00:00 2001 From: zmstone Date: Wed, 10 Apr 2024 10:19:38 +0200 Subject: [PATCH 120/234] fix(variform): allow numbers to be numbers --- apps/emqx_utils/src/emqx_variform.erl | 8 +++-- apps/emqx_utils/src/emqx_variform_parser.yrl | 6 ++-- apps/emqx_utils/src/emqx_variform_scan.xrl | 7 +++-- apps/emqx_utils/test/emqx_variform_tests.erl | 33 ++++++++++++++++++++ 4 files changed, 46 insertions(+), 8 deletions(-) diff --git a/apps/emqx_utils/src/emqx_variform.erl b/apps/emqx_utils/src/emqx_variform.erl index 834d22750..0a26f7480 100644 --- a/apps/emqx_utils/src/emqx_variform.erl +++ b/apps/emqx_utils/src/emqx_variform.erl @@ -83,8 +83,10 @@ eval_as_string(Expr, Bindings, _Opts) -> eval({str, Str}, _Bindings) -> str(Str); -eval({num, Num}, _Bindings) -> - str(Num); +eval({integer, Num}, _Bindings) -> + Num; +eval({float, Num}, _Bindings) -> + Num; eval({array, Args}, Bindings) -> eval(Args, Bindings); eval({call, FuncNameStr, Args}, Bindings) -> @@ -150,7 +152,7 @@ resolve_func_name(FuncNameStr) -> resolve_var_value(VarName, Bindings) -> case emqx_template:lookup_var(split(VarName), Bindings) of {ok, Value} -> - str(Value); + Value; {error, _Reason} -> <<>> end. diff --git a/apps/emqx_utils/src/emqx_variform_parser.yrl b/apps/emqx_utils/src/emqx_variform_parser.yrl index 508ef46d0..45d92696b 100644 --- a/apps/emqx_utils/src/emqx_variform_parser.yrl +++ b/apps/emqx_utils/src/emqx_variform_parser.yrl @@ -7,7 +7,8 @@ Nonterminals Terminals identifier - number + integer + float string '(' ')' ',' '[' ']'. @@ -34,7 +35,8 @@ args -> args ',' arg : '$1' ++ ['$3']. %% Arguments can be expressions, arrays, numbers, or strings arg -> expr : '$1'. arg -> array : '$1'. -arg -> number : {num, element(3, '$1')}. +arg -> integer: {integer, element(3, '$1')}. +arg -> float: {float, element(3, '$1')}. arg -> string : {str, element(3, '$1')}. Erlang code. diff --git a/apps/emqx_utils/src/emqx_variform_scan.xrl b/apps/emqx_utils/src/emqx_variform_scan.xrl index 29a45ef92..63c9fba29 100644 --- a/apps/emqx_utils/src/emqx_variform_scan.xrl +++ b/apps/emqx_utils/src/emqx_variform_scan.xrl @@ -3,7 +3,8 @@ Definitions. IDENTIFIER = [a-zA-Z][a-zA-Z0-9_.]* SQ_STRING = \'[^\']*\' DQ_STRING = \"[^\"]*\" -NUMBER = [+-]?(\\d+\\.\\d+|[0-9]+) +INTEGER = [+-]?[0-9]+ +FLOAT = [+-]?\\d+\\.\\d+ LPAREN = \( RPAREN = \) LBRACKET = \[ @@ -12,12 +13,12 @@ COMMA = , WHITESPACE = [\s\t\n]+ Rules. -%% Match function names, variable names (with ${}), strings, numbers, and structural characters {WHITESPACE} : skip_token. {IDENTIFIER} : {token, {identifier, TokenLine, TokenChars}}. {SQ_STRING} : {token, {string, TokenLine, unquote(TokenChars, $')}}. {DQ_STRING} : {token, {string, TokenLine, unquote(TokenChars, $")}}. -{NUMBER} : {token, {number, TokenLine, TokenChars}}. +{INTEGER} : {token, {integer, TokenLine, list_to_integer(TokenChars)}}. +{FLOAT} : {token, {float, TokenLine, list_to_float(TokenChars)}}. {LPAREN} : {token, {'(', TokenLine}}. {RPAREN} : {token, {')', TokenLine}}. {LBRACKET} : {token, {'[', TokenLine}}. diff --git a/apps/emqx_utils/test/emqx_variform_tests.erl b/apps/emqx_utils/test/emqx_variform_tests.erl index da26a383d..72fbf2637 100644 --- a/apps/emqx_utils/test/emqx_variform_tests.erl +++ b/apps/emqx_utils/test/emqx_variform_tests.erl @@ -39,11 +39,44 @@ redner_test_() -> {"out of range nth index", fun() -> ?assertEqual({ok, <<>>}, render("nth(2, tokens(var, ','))", #{var => <<"a">>})) end}, + {"string for nth index", fun() -> + ?assertEqual({ok, <<"a">>}, render("nth('1', tokens(var, ','))", #{var => <<"a">>})) + end}, {"not a index number for nth", fun() -> ?assertMatch( {error, #{reason := invalid_argument, func := nth, index := <<"notnum">>}}, render("nth('notnum', tokens(var, ','))", #{var => <<"a">>}) ) + end}, + {"substr", fun() -> + ?assertMatch( + {ok, <<"b">>}, + render("substr(var,1)", #{var => <<"ab">>}) + ) + end}, + {"result in integer", fun() -> + ?assertMatch( + {ok, <<"2">>}, + render("strlen(var)", #{var => <<"ab">>}) + ) + end}, + {"result in float", fun() -> + ?assertMatch( + {ok, <<"2.2">>}, + render("var", #{var => 2.2}) + ) + end}, + {"concat a number", fun() -> + ?assertMatch( + {ok, <<"2.2">>}, + render("concat(strlen(var),'.2')", #{var => <<"xy">>}) + ) + end}, + {"var is an array", fun() -> + ?assertMatch( + {ok, <<"y">>}, + render("nth(2,var)", #{var => [<<"x">>, <<"y">>]}) + ) end} ]. From 834bddadad753de48b8c26e76daf867e06fb6323 Mon Sep 17 00:00:00 2001 From: zmstone Date: Thu, 11 Apr 2024 09:39:17 +0200 Subject: [PATCH 121/234] test: delete flaky test for now --- .../test/emqx_bridge_greptimedb_SUITE.erl | 63 ------------------- 1 file changed, 63 deletions(-) diff --git a/apps/emqx_bridge_greptimedb/test/emqx_bridge_greptimedb_SUITE.erl b/apps/emqx_bridge_greptimedb/test/emqx_bridge_greptimedb_SUITE.erl index 0fd839b7c..96cf0d7c9 100644 --- a/apps/emqx_bridge_greptimedb/test/emqx_bridge_greptimedb_SUITE.erl +++ b/apps/emqx_bridge_greptimedb/test/emqx_bridge_greptimedb_SUITE.erl @@ -911,69 +911,6 @@ t_start_exception(Config) -> ), ok. -t_write_failure(Config) -> - ProxyName = ?config(proxy_name, Config), - ProxyPort = ?config(proxy_port, Config), - ProxyHost = ?config(proxy_host, Config), - QueryMode = ?config(query_mode, Config), - {ok, _} = create_bridge(Config), - ClientId = emqx_guid:to_hexstr(emqx_guid:gen()), - Payload = #{ - int_key => -123, - bool => true, - float_key => 24.5, - uint_key => 123 - }, - SentData = #{ - <<"clientid">> => ClientId, - <<"topic">> => atom_to_binary(?FUNCTION_NAME), - <<"timestamp">> => erlang:system_time(millisecond), - <<"payload">> => Payload - }, - ?check_trace( - emqx_common_test_helpers:with_failure(down, ProxyName, ProxyHost, ProxyPort, fun() -> - case QueryMode of - sync -> - ?wait_async_action( - ?assertMatch( - {error, {resource_error, #{reason := timeout}}}, - send_message(Config, SentData) - ), - #{?snk_kind := handle_async_reply, action := nack}, - 1_000 - ); - async -> - ?wait_async_action( - ?assertEqual(ok, send_message(Config, SentData)), - #{?snk_kind := handle_async_reply}, - 1_000 - ) - end - end), - fun(Trace0) -> - case QueryMode of - sync -> - Trace = ?of_kind(handle_async_reply, Trace0), - ?assertMatch([_ | _], Trace), - [#{result := Result} | _] = Trace, - ?assert( - not emqx_bridge_greptimedb_connector:is_unrecoverable_error(Result), - #{got => Result} - ); - async -> - Trace = ?of_kind(handle_async_reply, Trace0), - ?assertMatch([_ | _], Trace), - [#{result := Result} | _] = Trace, - ?assert( - not emqx_bridge_greptimedb_connector:is_unrecoverable_error(Result), - #{got => Result} - ) - end, - ok - end - ), - ok. - t_missing_field(Config) -> BatchSize = ?config(batch_size, Config), IsBatch = BatchSize > 1, From eac25194e570650fbeece52e69fa3a1bd25c9881 Mon Sep 17 00:00:00 2001 From: zmstone Date: Thu, 11 Apr 2024 13:39:30 +0200 Subject: [PATCH 122/234] refactor: resolve some old TODOs move dashboard schema generation code to the right module --- apps/emqx_conf/src/emqx_conf.erl | 69 ------------------- .../src/emqx_dashboard_schema_api.erl | 63 ++++++++++++++++- 2 files changed, 60 insertions(+), 72 deletions(-) diff --git a/apps/emqx_conf/src/emqx_conf.erl b/apps/emqx_conf/src/emqx_conf.erl index 122998eeb..0bd319503 100644 --- a/apps/emqx_conf/src/emqx_conf.erl +++ b/apps/emqx_conf/src/emqx_conf.erl @@ -31,13 +31,6 @@ -export([dump_schema/2, reformat_schema_dump/2]). -export([schema_module/0]). -%% TODO: move to emqx_dashboard when we stop building api schema at build time --export([ - hotconf_schema_json/0, - bridge_schema_json/0, - hocon_schema_to_spec/2 -]). - %% for rpc -export([get_node_and_config/1]). @@ -456,17 +449,6 @@ warn_bad_namespace(Namespace) -> ok end. -%% TODO: move this function to emqx_dashboard when we stop generating this JSON at build time. -hotconf_schema_json() -> - SchemaInfo = #{title => <<"EMQX Hot Conf API Schema">>, version => <<"0.1.0">>}, - gen_api_schema_json_iodata(emqx_mgmt_api_configs, SchemaInfo). - -%% TODO: move this function to emqx_dashboard when we stop generating this JSON at build time. -bridge_schema_json() -> - Version = <<"0.1.0">>, - SchemaInfo = #{title => <<"EMQX Data Bridge API Schema">>, version => Version}, - gen_api_schema_json_iodata(emqx_bridge_api, SchemaInfo). - %% @doc return the root schema module. -spec schema_module() -> module(). schema_module() -> @@ -506,57 +488,6 @@ make_desc_resolver(Lang) -> unicode:characters_to_binary(Desc) end. -gen_api_schema_json_iodata(SchemaMod, SchemaInfo) -> - emqx_dashboard_swagger:gen_api_schema_json_iodata( - SchemaMod, - SchemaInfo, - fun ?MODULE:hocon_schema_to_spec/2 - ). - --define(TO_REF(_N_, _F_), iolist_to_binary([to_bin(_N_), ".", to_bin(_F_)])). --define(TO_COMPONENTS_SCHEMA(_M_, _F_), - iolist_to_binary([ - <<"#/components/schemas/">>, - ?TO_REF(emqx_dashboard_swagger:namespace(_M_), _F_) - ]) -). - -hocon_schema_to_spec(?R_REF(Module, StructName), _LocalModule) -> - {#{<<"$ref">> => ?TO_COMPONENTS_SCHEMA(Module, StructName)}, [{Module, StructName}]}; -hocon_schema_to_spec(?REF(StructName), LocalModule) -> - {#{<<"$ref">> => ?TO_COMPONENTS_SCHEMA(LocalModule, StructName)}, [{LocalModule, StructName}]}; -hocon_schema_to_spec(Type, LocalModule) when ?IS_TYPEREFL(Type) -> - {typename_to_spec(typerefl:name(Type), LocalModule), []}; -hocon_schema_to_spec(?ARRAY(Item), LocalModule) -> - {Schema, Refs} = hocon_schema_to_spec(Item, LocalModule), - {#{type => array, items => Schema}, Refs}; -hocon_schema_to_spec(?ENUM(Items), _LocalModule) -> - {#{type => enum, symbols => Items}, []}; -hocon_schema_to_spec(?MAP(Name, Type), LocalModule) -> - {Schema, SubRefs} = hocon_schema_to_spec(Type, LocalModule), - { - #{ - <<"type">> => object, - <<"properties">> => #{<<"$", (to_bin(Name))/binary>> => Schema} - }, - SubRefs - }; -hocon_schema_to_spec(?UNION(Types, _DisplayName), LocalModule) -> - {OneOf, Refs} = lists:foldl( - fun(Type, {Acc, RefsAcc}) -> - {Schema, SubRefs} = hocon_schema_to_spec(Type, LocalModule), - {[Schema | Acc], SubRefs ++ RefsAcc} - end, - {[], []}, - hoconsc:union_members(Types) - ), - {#{<<"oneOf">> => OneOf}, Refs}; -hocon_schema_to_spec(Atom, _LocalModule) when is_atom(Atom) -> - {#{type => enum, symbols => [Atom]}, []}. - -typename_to_spec(TypeStr, Module) -> - emqx_conf_schema_types:readable_dashboard(Module, TypeStr). - join_format(Snippets) -> case [S || S <- Snippets, S =/= undefined] of [] -> diff --git a/apps/emqx_dashboard/src/emqx_dashboard_schema_api.erl b/apps/emqx_dashboard/src/emqx_dashboard_schema_api.erl index 9b5c45e71..41f516c7a 100644 --- a/apps/emqx_dashboard/src/emqx_dashboard_schema_api.erl +++ b/apps/emqx_dashboard/src/emqx_dashboard_schema_api.erl @@ -33,6 +33,14 @@ -define(TAGS, [<<"dashboard">>]). -define(BAD_REQUEST, 'BAD_REQUEST'). +-define(TO_REF(_N_, _F_), iolist_to_binary([to_bin(_N_), ".", to_bin(_F_)])). +-define(TO_COMPONENTS_SCHEMA(_M_, _F_), + iolist_to_binary([ + <<"#/components/schemas/">>, + ?TO_REF(emqx_dashboard_swagger:namespace(_M_), _F_) + ]) +). + %%-------------------------------------------------------------------- %% minirest API and schema %%-------------------------------------------------------------------- @@ -77,14 +85,22 @@ get_schema(get, _) -> {400, ?BAD_REQUEST, <<"unknown">>}. gen_schema(hotconf) -> - emqx_conf:hotconf_schema_json(); + hotconf_schema_json(); gen_schema(bridges) -> - emqx_conf:bridge_schema_json(); + bridge_schema_json(); gen_schema(actions) -> actions_schema_json(); gen_schema(connectors) -> connectors_schema_json(). +hotconf_schema_json() -> + SchemaInfo = #{title => <<"EMQX Hot Conf API Schema">>, version => <<"0.1.0">>}, + gen_api_schema_json_iodata(emqx_mgmt_api_configs, SchemaInfo). + +bridge_schema_json() -> + SchemaInfo = #{title => <<"EMQX Data Bridge API Schema">>, version => <<"0.1.0">>}, + gen_api_schema_json_iodata(emqx_bridge_api, SchemaInfo). + actions_schema_json() -> SchemaInfo = #{title => <<"EMQX Data Actions API Schema">>, version => <<"0.1.0">>}, %% Note: this will be moved to `emqx_actions' application in the future. @@ -98,5 +114,46 @@ gen_api_schema_json_iodata(SchemaMod, SchemaInfo) -> emqx_dashboard_swagger:gen_api_schema_json_iodata( SchemaMod, SchemaInfo, - fun emqx_conf:hocon_schema_to_spec/2 + fun hocon_schema_to_spec/2 ). + +hocon_schema_to_spec(?R_REF(Module, StructName), _LocalModule) -> + {#{<<"$ref">> => ?TO_COMPONENTS_SCHEMA(Module, StructName)}, [{Module, StructName}]}; +hocon_schema_to_spec(?REF(StructName), LocalModule) -> + {#{<<"$ref">> => ?TO_COMPONENTS_SCHEMA(LocalModule, StructName)}, [{LocalModule, StructName}]}; +hocon_schema_to_spec(Type, LocalModule) when ?IS_TYPEREFL(Type) -> + {typename_to_spec(typerefl:name(Type), LocalModule), []}; +hocon_schema_to_spec(?ARRAY(Item), LocalModule) -> + {Schema, Refs} = hocon_schema_to_spec(Item, LocalModule), + {#{type => array, items => Schema}, Refs}; +hocon_schema_to_spec(?ENUM(Items), _LocalModule) -> + {#{type => enum, symbols => Items}, []}; +hocon_schema_to_spec(?MAP(Name, Type), LocalModule) -> + {Schema, SubRefs} = hocon_schema_to_spec(Type, LocalModule), + { + #{ + <<"type">> => object, + <<"properties">> => #{<<"$", (to_bin(Name))/binary>> => Schema} + }, + SubRefs + }; +hocon_schema_to_spec(?UNION(Types, _DisplayName), LocalModule) -> + {OneOf, Refs} = lists:foldl( + fun(Type, {Acc, RefsAcc}) -> + {Schema, SubRefs} = hocon_schema_to_spec(Type, LocalModule), + {[Schema | Acc], SubRefs ++ RefsAcc} + end, + {[], []}, + hoconsc:union_members(Types) + ), + {#{<<"oneOf">> => OneOf}, Refs}; +hocon_schema_to_spec(Atom, _LocalModule) when is_atom(Atom) -> + {#{type => enum, symbols => [Atom]}, []}. + +typename_to_spec(TypeStr, Module) -> + emqx_conf_schema_types:readable_dashboard(Module, TypeStr). + +to_bin(List) when is_list(List) -> iolist_to_binary(List); +to_bin(Boolean) when is_boolean(Boolean) -> Boolean; +to_bin(Atom) when is_atom(Atom) -> atom_to_binary(Atom, utf8); +to_bin(X) -> X. From 3b7cade6715514508f0f05dd8267a5dec30906ee Mon Sep 17 00:00:00 2001 From: Ivan Dyachkov Date: Thu, 11 Apr 2024 13:49:40 +0200 Subject: [PATCH 123/234] chore: 5.6.1-beta.1 --- apps/emqx/include/emqx_release.hrl | 4 ++-- deploy/charts/emqx-enterprise/Chart.yaml | 4 ++-- deploy/charts/emqx/Chart.yaml | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/apps/emqx/include/emqx_release.hrl b/apps/emqx/include/emqx_release.hrl index 9679510d9..2637a0270 100644 --- a/apps/emqx/include/emqx_release.hrl +++ b/apps/emqx/include/emqx_release.hrl @@ -32,7 +32,7 @@ %% `apps/emqx/src/bpapi/README.md' %% Opensource edition --define(EMQX_RELEASE_CE, "5.6.0"). +-define(EMQX_RELEASE_CE, "5.6.1-beta.1"). %% Enterprise edition --define(EMQX_RELEASE_EE, "5.6.0"). +-define(EMQX_RELEASE_EE, "5.6.1-beta.1"). diff --git a/deploy/charts/emqx-enterprise/Chart.yaml b/deploy/charts/emqx-enterprise/Chart.yaml index 0fd47100b..573277cac 100644 --- a/deploy/charts/emqx-enterprise/Chart.yaml +++ b/deploy/charts/emqx-enterprise/Chart.yaml @@ -14,8 +14,8 @@ type: application # This is the chart version. This version number should be incremented each time you make changes # to the chart and its templates, including the app version. -version: 5.6.0 +version: 5.6.1-beta.1 # This is the version number of the application being deployed. This version number should be # incremented each time you make changes to the application. -appVersion: 5.6.0 +appVersion: 5.6.1-beta.1 diff --git a/deploy/charts/emqx/Chart.yaml b/deploy/charts/emqx/Chart.yaml index b1ae7ff66..e771499b6 100644 --- a/deploy/charts/emqx/Chart.yaml +++ b/deploy/charts/emqx/Chart.yaml @@ -14,8 +14,8 @@ type: application # This is the chart version. This version number should be incremented each time you make changes # to the chart and its templates, including the app version. -version: 5.6.0 +version: 5.6.1-beta.1 # This is the version number of the application being deployed. This version number should be # incremented each time you make changes to the application. -appVersion: 5.6.0 +appVersion: 5.6.1-beta.1 From afc87ddc9e7d8533e065416b656923870a877d25 Mon Sep 17 00:00:00 2001 From: zmstone Date: Thu, 11 Apr 2024 14:05:32 +0200 Subject: [PATCH 124/234] refactor: do not generate i18n msgid in dashboard schema json dashboard has its own mind now i18n is only used to generate docs: 1. runtime swagger spec at /api-docs 2. build-time config schema both result in the resolved i18n text, but not msgid --- .../src/emqx_dashboard_swagger.erl | 29 +++++++++---------- 1 file changed, 13 insertions(+), 16 deletions(-) diff --git a/apps/emqx_dashboard/src/emqx_dashboard_swagger.erl b/apps/emqx_dashboard/src/emqx_dashboard_swagger.erl index 8cad67695..88c6e62eb 100644 --- a/apps/emqx_dashboard/src/emqx_dashboard_swagger.erl +++ b/apps/emqx_dashboard/src/emqx_dashboard_swagger.erl @@ -81,7 +81,7 @@ ]) ). --define(SPECIAL_LANG_MSGID, <<"$msgid">>). +-define(NO_I18N, undefined). -define(MAX_ROW_LIMIT, 10000). -define(DEFAULT_ROW, 100). @@ -267,7 +267,7 @@ gen_api_schema_json_iodata(SchemaMod, SchemaInfo, Converter) -> SchemaMod, #{ schema_converter => Converter, - i18n_lang => ?SPECIAL_LANG_MSGID + i18n_lang => ?NO_I18N } ), ApiSpec = lists:foldl( @@ -672,10 +672,10 @@ trans_description(Spec, Hocon, Options) -> ?DESC(_, _) = Struct -> get_i18n(<<"desc">>, Struct, undefined, Options); Text -> to_bin(Text) end, - case Desc of - undefined -> + case Desc =:= undefined of + true -> Spec; - Desc -> + false -> Desc1 = binary:replace(Desc, [<<"\n">>], <<"
">>, [global]), Spec#{description => Desc1} end. @@ -683,8 +683,8 @@ trans_description(Spec, Hocon, Options) -> get_i18n(Tag, ?DESC(Namespace, Id), Default, Options) -> Lang = get_lang(Options), case Lang of - ?SPECIAL_LANG_MSGID -> - make_msgid(Namespace, Id, Tag); + ?NO_I18N -> + undefined; _ -> get_i18n_text(Lang, Namespace, Id, Tag, Default) end. @@ -697,14 +697,6 @@ get_i18n_text(Lang, Namespace, Id, Tag, Default) -> Text end. -%% Format:$msgid:Namespace.Id.Tag -%% e.g. $msgid:emqx_schema.key.desc -%% $msgid:emqx_schema.key.label -%% if needed, the consumer of this schema JSON can use this msgid to -%% resolve the text in the i18n database. -make_msgid(Namespace, Id, Tag) -> - iolist_to_binary(["$msgid:", to_bin(Namespace), ".", to_bin(Id), ".", Tag]). - %% So far i18n_lang in options is only used at build time. %% At runtime, it's still the global config which controls the language. get_lang(#{i18n_lang := Lang}) -> Lang; @@ -716,7 +708,12 @@ trans_label(Spec, Hocon, Default, Options) -> ?DESC(_, _) = Struct -> get_i18n(<<"label">>, Struct, Default, Options); _ -> Default end, - Spec#{label => Label}. + case Label =:= undefined of + true -> + Spec; + false -> + Spec#{label => Label} + end. desc_struct(Hocon) -> R = From dfd13b4ab54510471c1f78b813b404d9efac3093 Mon Sep 17 00:00:00 2001 From: zmstone Date: Thu, 11 Apr 2024 15:12:01 +0200 Subject: [PATCH 125/234] test: fix dashboard schema json test cases --- apps/emqx_bridge_kafka/test/emqx_bridge_kafka_tests.erl | 2 +- apps/emqx_dashboard/src/emqx_dashboard_schema_api.erl | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/apps/emqx_bridge_kafka/test/emqx_bridge_kafka_tests.erl b/apps/emqx_bridge_kafka/test/emqx_bridge_kafka_tests.erl index 2f20099ae..54f6f9efc 100644 --- a/apps/emqx_bridge_kafka/test/emqx_bridge_kafka_tests.erl +++ b/apps/emqx_bridge_kafka/test/emqx_bridge_kafka_tests.erl @@ -357,7 +357,7 @@ kafka_consumer_hocon() -> %% assert compatibility bridge_schema_json_test() -> - JSON = iolist_to_binary(emqx_conf:bridge_schema_json()), + JSON = iolist_to_binary(emqx_dashboard_schema_api:bridge_schema_json()), Map = emqx_utils_json:decode(JSON), Path = [<<"components">>, <<"schemas">>, <<"bridge_kafka.post_producer">>, <<"properties">>], ?assertMatch(#{<<"kafka">> := _}, emqx_utils_maps:deep_get(Path, Map)). diff --git a/apps/emqx_dashboard/src/emqx_dashboard_schema_api.erl b/apps/emqx_dashboard/src/emqx_dashboard_schema_api.erl index 41f516c7a..4a708cd78 100644 --- a/apps/emqx_dashboard/src/emqx_dashboard_schema_api.erl +++ b/apps/emqx_dashboard/src/emqx_dashboard_schema_api.erl @@ -30,6 +30,9 @@ -export([get_schema/2]). +%% for test +-export([bridge_schema_json/0]). + -define(TAGS, [<<"dashboard">>]). -define(BAD_REQUEST, 'BAD_REQUEST'). From b27fc0da2645150141b660b4b9878ac63b23cdc3 Mon Sep 17 00:00:00 2001 From: Ivan Dyachkov Date: Thu, 11 Apr 2024 15:24:41 +0200 Subject: [PATCH 126/234] test(emqx_machine): ensure node is down before testing open ports --- apps/emqx_machine/test/emqx_machine_SUITE.erl | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/apps/emqx_machine/test/emqx_machine_SUITE.erl b/apps/emqx_machine/test/emqx_machine_SUITE.erl index d8bd01c00..d9301aba4 100644 --- a/apps/emqx_machine/test/emqx_machine_SUITE.erl +++ b/apps/emqx_machine/test/emqx_machine_SUITE.erl @@ -144,7 +144,13 @@ t_open_ports_check(Config) -> ?assertEqual(ok, erpc:call(Core2, emqx_machine, open_ports_check, [])), ?assertEqual(ok, erpc:call(Replicant, emqx_machine, open_ports_check, [])), + true = erlang:monitor_node(Core2, true), ok = emqx_cth_cluster:stop_node(Core2), + receive + {nodedown, Core2} -> ok + after 10000 -> + ct:fail("nodedown message not received after 10 seconds.") + end, ?assertEqual(ok, erpc:call(Replicant, emqx_machine, open_ports_check, [])), Results = erpc:call(Core1, emqx_machine, open_ports_check, []), From 9998940aa22626a263d39a7b2e97b851d4b53b2e Mon Sep 17 00:00:00 2001 From: Kjell Winblad Date: Thu, 11 Apr 2024 16:48:43 +0200 Subject: [PATCH 127/234] fix(trace): several improvements thanks to comments from @zmstone --- apps/emqx/src/emqx_trace/emqx_trace.erl | 12 +-- .../src/emqx_bridge_http_connector.erl | 73 +++++++------------ apps/emqx_rule_engine/rebar.config | 3 +- .../src/emqx_rule_api_schema.erl | 9 --- .../src/emqx_rule_engine.app.src | 4 +- .../src/emqx_rule_runtime.erl | 38 +++++++--- .../src/emqx_rule_sqltester.erl | 44 +++++------ .../test/emqx_rule_engine_SUITE.erl | 4 +- rel/i18n/emqx_rule_api_schema.hocon | 6 -- 9 files changed, 87 insertions(+), 106 deletions(-) diff --git a/apps/emqx/src/emqx_trace/emqx_trace.erl b/apps/emqx/src/emqx_trace/emqx_trace.erl index 408644128..4e182f300 100644 --- a/apps/emqx/src/emqx_trace/emqx_trace.erl +++ b/apps/emqx/src/emqx_trace/emqx_trace.erl @@ -88,16 +88,18 @@ unsubscribe(Topic, SubOpts) -> ?TRACE("UNSUBSCRIBE", "unsubscribe", #{topic => Topic, sub_opts => SubOpts}). rendered_action_template(ActionID, RenderResult) -> - Msg = io_lib:format("action_template_rendered(~s)", [ActionID]), + Msg = lists:flatten(io_lib:format("action_template_rendered(~ts)", [ActionID])), TraceResult = ?TRACE("QUERY_RENDER", Msg, RenderResult), case logger:get_process_metadata() of #{stop_action_after_render := true} -> %% We throw an unrecoverable error to stop action before the %% resource is called/modified - StopMsg = io_lib:format( - "action_stopped_after_render(~s): " - "Action stopped after template render due to test setting.", - [ActionID] + StopMsg = lists:flatten( + io_lib:format( + "action_stopped_after_render(~ts): " + "Action stopped after template render due to test setting.", + [ActionID] + ) ), MsgBin = iolist_to_binary(StopMsg), error({unrecoverable_error, MsgBin}); diff --git a/apps/emqx_bridge_http/src/emqx_bridge_http_connector.erl b/apps/emqx_bridge_http/src/emqx_bridge_http_connector.erl index 46b3d5e1f..19b7ef875 100644 --- a/apps/emqx_bridge_http/src/emqx_bridge_http_connector.erl +++ b/apps/emqx_bridge_http/src/emqx_bridge_http_connector.erl @@ -371,29 +371,7 @@ on_query( } ), NRequest = formalize_request(Method, BasePath, Request), - case NRequest of - {Path, Headers} -> - emqx_trace:rendered_action_template( - InstId, - #{ - path => Path, - method => Method, - headers => Headers, - timeout => Timeout - } - ); - {Path, Headers, Body} -> - emqx_trace:rendered_action_template( - InstId, - #{ - path => Path, - method => Method, - headers => Headers, - timeout => Timeout, - body => Body - } - ) - end, + trace_rendered_action_template(InstId, Method, NRequest, Timeout), Worker = resolve_pool_worker(State, KeyOrNum), Result0 = ehttpc:request( Worker, @@ -503,29 +481,7 @@ on_query_async( } ), NRequest = formalize_request(Method, BasePath, Request), - case NRequest of - {Path, Headers} -> - emqx_trace:rendered_action_template( - InstId, - #{ - path => Path, - method => Method, - headers => Headers, - timeout => Timeout - } - ); - {Path, Headers, Body} -> - emqx_trace:rendered_action_template( - InstId, - #{ - path => Path, - method => Method, - headers => Headers, - timeout => Timeout, - body => Body - } - ) - end, + trace_rendered_action_template(InstId, Method, NRequest, Timeout), MaxAttempts = maps:get(max_attempts, State, 3), Context = #{ attempt => 1, @@ -545,6 +501,31 @@ on_query_async( ), {ok, Worker}. +trace_rendered_action_template(InstId, Method, NRequest, Timeout) -> + case NRequest of + {Path, Headers} -> + emqx_trace:rendered_action_template( + InstId, + #{ + path => Path, + method => Method, + headers => Headers, + timeout => Timeout + } + ); + {Path, Headers, Body} -> + emqx_trace:rendered_action_template( + InstId, + #{ + path => Path, + method => Method, + headers => emqx_utils_redact:redact_headers(Headers), + timeout => Timeout, + body => Body + } + ) + end. + resolve_pool_worker(State, undefined) -> resolve_pool_worker(State, self()); resolve_pool_worker(#{pool_name := PoolName} = State, Key) -> diff --git a/apps/emqx_rule_engine/rebar.config b/apps/emqx_rule_engine/rebar.config index d51bffa20..0f00f15c6 100644 --- a/apps/emqx_rule_engine/rebar.config +++ b/apps/emqx_rule_engine/rebar.config @@ -2,7 +2,8 @@ {deps, [ {emqx, {path, "../emqx"}}, - {emqx_utils, {path, "../emqx_utils"}} + {emqx_utils, {path, "../emqx_utils"}}, + {emqx_modules, {path, "../emqx_modules"}} ]}. {profiles, [ diff --git a/apps/emqx_rule_engine/src/emqx_rule_api_schema.erl b/apps/emqx_rule_engine/src/emqx_rule_api_schema.erl index 5e19b33ee..f4685222c 100644 --- a/apps/emqx_rule_engine/src/emqx_rule_api_schema.erl +++ b/apps/emqx_rule_engine/src/emqx_rule_api_schema.erl @@ -108,15 +108,6 @@ fields("rule_test") -> fields("rule_apply_test") -> [ rule_input_message_context(), - {"environment", - sc( - typerefl:map(), - #{ - desc => - ?DESC("test_rule_environment"), - default => #{} - } - )}, {"stop_action_after_template_rendering", sc( typerefl:boolean(), diff --git a/apps/emqx_rule_engine/src/emqx_rule_engine.app.src b/apps/emqx_rule_engine/src/emqx_rule_engine.app.src index 1768141ae..1fed922dd 100644 --- a/apps/emqx_rule_engine/src/emqx_rule_engine.app.src +++ b/apps/emqx_rule_engine/src/emqx_rule_engine.app.src @@ -17,7 +17,9 @@ %% rule_engine should wait for bridge connector start, %% it's will check action/connector ref's exist. emqx_bridge, - emqx_connector + emqx_connector, + %% Needed to start the tracing functionality + emqx_modules ]}, {mod, {emqx_rule_engine_app, []}}, {env, []}, diff --git a/apps/emqx_rule_engine/src/emqx_rule_runtime.erl b/apps/emqx_rule_engine/src/emqx_rule_runtime.erl index f90b5a974..d4cde213d 100644 --- a/apps/emqx_rule_engine/src/emqx_rule_runtime.erl +++ b/apps/emqx_rule_engine/src/emqx_rule_runtime.erl @@ -420,13 +420,13 @@ handle_action(RuleId, ActId, Selected, Envs) -> end. -define(IS_RES_DOWN(R), R == stopped; R == not_connected; R == not_found; R == unhealthy_target). -do_handle_action(_RuleId, {bridge, BridgeType, BridgeName, ResId} = Action, Selected, _Envs) -> +do_handle_action(RuleId, {bridge, BridgeType, BridgeName, ResId} = Action, Selected, _Envs) -> trace_action_bridge("BRIDGE", Action, "bridge_action", #{}, debug), - TraceCtx = do_handle_action_get_trace_context(Action), - ReplyTo = {fun ?MODULE:inc_action_metrics/2, [TraceCtx], #{reply_dropped => true}}, + {TraceCtx, IncCtx} = do_handle_action_get_trace_inc_metrics_context(RuleId, Action), + ReplyTo = {fun ?MODULE:inc_action_metrics/2, [IncCtx], #{reply_dropped => true}}, case emqx_bridge:send_message(BridgeType, BridgeName, ResId, Selected, #{ - reply_to => ReplyTo, trace_ctx => maps:remove(action_id, TraceCtx) + reply_to => ReplyTo, trace_ctx => TraceCtx }) of {error, Reason} when Reason == bridge_not_found; Reason == bridge_stopped -> @@ -437,20 +437,20 @@ do_handle_action(_RuleId, {bridge, BridgeType, BridgeName, ResId} = Action, Sele Result end; do_handle_action( - _RuleId, + RuleId, {bridge_v2, BridgeType, BridgeName} = Action, Selected, _Envs ) -> trace_action_bridge("BRIDGE", Action, "bridge_action", #{}, debug), - TraceCtx = do_handle_action_get_trace_context(Action), - ReplyTo = {fun ?MODULE:inc_action_metrics/2, [TraceCtx], #{reply_dropped => true}}, + {TraceCtx, IncCtx} = do_handle_action_get_trace_inc_metrics_context(RuleId, Action), + ReplyTo = {fun ?MODULE:inc_action_metrics/2, [IncCtx], #{reply_dropped => true}}, case emqx_bridge_v2:send_message( BridgeType, BridgeName, Selected, - #{reply_to => ReplyTo, trace_ctx => maps:remove(action_id, TraceCtx)} + #{reply_to => ReplyTo, trace_ctx => TraceCtx} ) of {error, Reason} when Reason == bridge_not_found; Reason == bridge_stopped -> @@ -460,17 +460,31 @@ do_handle_action( Result -> Result end; -do_handle_action(_RuleId, #{mod := Mod, func := Func} = Action, Selected, Envs) -> +do_handle_action(RuleId, #{mod := Mod, func := Func} = Action, Selected, Envs) -> trace_action(Action, "call_action_function"), %% the function can also throw 'out_of_service' Args = maps:get(args, Action, []), Result = Mod:Func(Selected, Envs, Args), - TraceCtx = do_handle_action_get_trace_context(Action), - inc_action_metrics(TraceCtx, Result), + {_, IncCtx} = do_handle_action_get_trace_inc_metrics_context(RuleId, Action), + inc_action_metrics(IncCtx, Result), trace_action(Action, "call_action_function_result", #{result => Result}, debug), Result. -do_handle_action_get_trace_context(Action) -> +do_handle_action_get_trace_inc_metrics_context(RuleID, Action) -> + case emqx_trace:list() of + [] -> + %% As a performance/memory optimization, we don't create any trace + %% context if there are no trace patterns. + {undefined, #{ + rule_id => RuleID, + action_id => Action + }}; + _List -> + Ctx = do_handle_action_get_trace_inc_metrics_context_unconditionally(Action), + {maps:remove(action_id, Ctx), Ctx} + end. + +do_handle_action_get_trace_inc_metrics_context_unconditionally(Action) -> Metadata = logger:get_process_metadata(), StopAfterRender = maps:get(stop_action_after_render, Metadata, false), case Metadata of diff --git a/apps/emqx_rule_engine/src/emqx_rule_sqltester.erl b/apps/emqx_rule_engine/src/emqx_rule_sqltester.erl index 342a8d9f9..83f29eef3 100644 --- a/apps/emqx_rule_engine/src/emqx_rule_sqltester.erl +++ b/apps/emqx_rule_engine/src/emqx_rule_sqltester.erl @@ -28,7 +28,6 @@ apply_rule( RuleId, #{ context := Context, - environment := Env, stop_action_after_template_rendering := StopAfterRender } ) -> @@ -39,30 +38,32 @@ apply_rule( true -> %% test if the topic matches the topic filters in the rule case emqx_topic:match_any(InTopic, EventTopics) of - true -> do_apply_matched_rule(Rule, Context, Env, StopAfterRender); - false -> {error, nomatch} + true -> + do_apply_matched_rule( + Rule, + Context, + StopAfterRender + ); + false -> + {error, nomatch} end; false -> case lists:member(InTopic, EventTopics) of true -> %% the rule is for both publish and events, test it directly - do_apply_matched_rule(Rule, Context, Env, StopAfterRender); + do_apply_matched_rule(Rule, Context, StopAfterRender); false -> {error, nomatch} end end. -do_apply_matched_rule(Rule, Context, Env, StopAfterRender) -> +do_apply_matched_rule(Rule, Context, StopAfterRender) -> update_process_trace_metadata(StopAfterRender), - Env1 = - case Env of - M when map_size(M) =:= 0 -> - %% Use the default environment if no environment is provided - default_apply_rule_environment(); - _ -> - Env - end, - ApplyRuleRes = emqx_rule_runtime:apply_rule(Rule, Context, Env1), + ApplyRuleRes = emqx_rule_runtime:apply_rule( + Rule, + Context, + apply_rule_environment() + ), reset_trace_process_metadata(StopAfterRender), ApplyRuleRes. @@ -80,16 +81,11 @@ reset_trace_process_metadata(true = _StopAfterRender) -> reset_trace_process_metadata(false = _StopAfterRender) -> ok. -default_apply_rule_environment() -> - #{ - headers => #{ - protocol => mqtt, - username => undefined, - peerhost => {127, 0, 0, 1}, - proto_ver => 5, - properties => #{} - } - }. +%% At the time of writing the environment passed to the apply rule function is +%% not used at all for normal actions. When it is used for custom functions it +%% is first merged with the context so there does not seem to be any need to +%% set this to anything else then the empty map. +apply_rule_environment() -> #{}. -spec test(#{sql := binary(), context := map()}) -> {ok, map() | list()} | {error, term()}. test(#{sql := Sql, context := Context}) -> diff --git a/apps/emqx_rule_engine/test/emqx_rule_engine_SUITE.erl b/apps/emqx_rule_engine/test/emqx_rule_engine_SUITE.erl index 5040d15b3..b0ca00a0e 100644 --- a/apps/emqx_rule_engine/test/emqx_rule_engine_SUITE.erl +++ b/apps/emqx_rule_engine/test/emqx_rule_engine_SUITE.erl @@ -158,13 +158,13 @@ init_per_suite(Config) -> emqx_rule_funcs_demo:module_info(), application:load(emqx_conf), ok = emqx_common_test_helpers:start_apps( - [emqx, emqx_conf, emqx_rule_engine, emqx_auth, emqx_bridge], + [emqx_conf, emqx_rule_engine, emqx_auth, emqx_bridge], fun set_special_configs/1 ), Config. end_per_suite(_Config) -> - emqx_common_test_helpers:stop_apps([emqx, emqx_conf, emqx_rule_engine, emqx_auth, emqx_bridge]), + emqx_common_test_helpers:stop_apps([emqx_conf, emqx_rule_engine, emqx_auth, emqx_bridge]), ok. set_special_configs(emqx_auth) -> diff --git a/rel/i18n/emqx_rule_api_schema.hocon b/rel/i18n/emqx_rule_api_schema.hocon index 25535d0ca..68c6a560d 100644 --- a/rel/i18n/emqx_rule_api_schema.hocon +++ b/rel/i18n/emqx_rule_api_schema.hocon @@ -66,12 +66,6 @@ test_context.desc: test_context.label: """Event Conetxt""" -test_rule_environment.desc: -"""The environment that will be passed to the rule when it is applied. A default environment will be used if no environment is given.""" - -test_rule_environment.label: -"""Event Environment""" - stop_action_after_template_render.desc: """Set this to true if the action should be stopped after its template has been rendered (default is true).""" From aa950f97fa6ed07be927c3e99239a18b82ada85a Mon Sep 17 00:00:00 2001 From: Kjell Winblad Date: Fri, 12 Apr 2024 11:22:46 +0200 Subject: [PATCH 128/234] test: fix tests with missing application --- .../test/emqx_bridge_rabbitmq_test_utils.erl | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/apps/emqx_bridge_rabbitmq/test/emqx_bridge_rabbitmq_test_utils.erl b/apps/emqx_bridge_rabbitmq/test/emqx_bridge_rabbitmq_test_utils.erl index 47df47976..2110a0520 100644 --- a/apps/emqx_bridge_rabbitmq/test/emqx_bridge_rabbitmq_test_utils.erl +++ b/apps/emqx_bridge_rabbitmq/test/emqx_bridge_rabbitmq_test_utils.erl @@ -52,7 +52,7 @@ init_per_group(_Group, Config) -> common_init_per_group(Opts) -> emqx_common_test_helpers:render_and_load_app_config(emqx_conf), ok = emqx_common_test_helpers:start_apps([ - emqx_conf, emqx_bridge, emqx_bridge_rabbitmq, emqx_rule_engine + emqx_conf, emqx_bridge, emqx_bridge_rabbitmq, emqx_rule_engine, emqx_modules ]), ok = emqx_connector_test_helpers:start_apps([emqx_resource]), {ok, _} = application:ensure_all_started(emqx_connector), @@ -116,7 +116,9 @@ end_per_group(_Group, Config) -> } = get_channel_connection(Config), amqp_channel:call(Channel, #'queue.purge'{queue = rabbit_mq_queue()}), emqx_mgmt_api_test_util:end_suite(), - ok = emqx_common_test_helpers:stop_apps([emqx_conf, emqx_bridge_rabbitmq, emqx_rule_engine]), + ok = emqx_common_test_helpers:stop_apps([ + emqx_conf, emqx_bridge_rabbitmq, emqx_rule_engine, emqx_modules + ]), ok = emqx_connector_test_helpers:stop_apps([emqx_resource]), _ = application:stop(emqx_connector), _ = application:stop(emqx_bridge), From f444c6fc328765bf5e5739d5e6ea636aa2654cbe Mon Sep 17 00:00:00 2001 From: Kjell Winblad Date: Fri, 12 Apr 2024 11:38:01 +0200 Subject: [PATCH 129/234] fix: pass stop_action_after_render=true in trace meta data Even if there is no trace we still need to pass stop_action_after_render=true in the trace meta data so that the action will be stopped. --- .../src/emqx_rule_runtime.erl | 28 +++++++++++++------ 1 file changed, 20 insertions(+), 8 deletions(-) diff --git a/apps/emqx_rule_engine/src/emqx_rule_runtime.erl b/apps/emqx_rule_engine/src/emqx_rule_runtime.erl index d4cde213d..7181fb59b 100644 --- a/apps/emqx_rule_engine/src/emqx_rule_runtime.erl +++ b/apps/emqx_rule_engine/src/emqx_rule_runtime.erl @@ -471,23 +471,35 @@ do_handle_action(RuleId, #{mod := Mod, func := Func} = Action, Selected, Envs) - Result. do_handle_action_get_trace_inc_metrics_context(RuleID, Action) -> - case emqx_trace:list() of - [] -> + case {emqx_trace:list(), logger:get_process_metadata()} of + {[], #{stop_action_after_render := true}} -> + %% Even if there is no trace we still need to pass + %% stop_action_after_render in the trace meta data so that the + %% action will be stopped. + { + #{ + stop_action_after_render => true + }, + #{ + rule_id => RuleID, + action_id => Action + } + }; + {[], _} -> %% As a performance/memory optimization, we don't create any trace %% context if there are no trace patterns. {undefined, #{ rule_id => RuleID, action_id => Action }}; - _List -> - Ctx = do_handle_action_get_trace_inc_metrics_context_unconditionally(Action), + {_List, TraceMeta} -> + Ctx = do_handle_action_get_trace_inc_metrics_context_unconditionally(Action, TraceMeta), {maps:remove(action_id, Ctx), Ctx} end. -do_handle_action_get_trace_inc_metrics_context_unconditionally(Action) -> - Metadata = logger:get_process_metadata(), - StopAfterRender = maps:get(stop_action_after_render, Metadata, false), - case Metadata of +do_handle_action_get_trace_inc_metrics_context_unconditionally(Action, TraceMeta) -> + StopAfterRender = maps:get(stop_action_after_render, TraceMeta, false), + case TraceMeta of #{ rule_id := RuleID, clientid := ClientID From ed5409fb6a6fa7449a2b983094fb9e8ccc554c06 Mon Sep 17 00:00:00 2001 From: Kjell Winblad Date: Fri, 12 Apr 2024 11:54:14 +0200 Subject: [PATCH 130/234] docs(trace): add emqx_ctl documentation for the new ruleid trace --- apps/emqx_management/src/emqx_mgmt_cli.erl | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/apps/emqx_management/src/emqx_mgmt_cli.erl b/apps/emqx_management/src/emqx_mgmt_cli.erl index ddbc60d5c..36013791c 100644 --- a/apps/emqx_management/src/emqx_mgmt_cli.erl +++ b/apps/emqx_management/src/emqx_mgmt_cli.erl @@ -512,7 +512,9 @@ trace(_) -> {"trace stop topic ", "Stop tracing for a topic on local node"}, {"trace start ip_address [] ", "Traces for a client ip on local node"}, - {"trace stop ip_address ", "Stop tracing for a client ip on local node"} + {"trace stop ip_address ", "Stop tracing for a client ip on local node"}, + {"trace start ruleid [] ", "Traces for a rule ID on local node"}, + {"trace stop ruleid ", "Stop tracing for a rule ID on local node"} ]). trace_on(Name, Type, Filter, Level, LogFile) -> @@ -583,6 +585,7 @@ traces(_) -> "and will end after seconds. The default value for is " ?DEFAULT_TRACE_DURATION " seconds."}, + {"traces start ruleid []", "Traces for a rule ID in cluster"}, {"traces stop ", "Stop trace in cluster"}, {"traces delete ", "Delete trace in cluster"} ]). From 500d4feddaae56b823962416269164b0ed0524c9 Mon Sep 17 00:00:00 2001 From: Ilya Averyanov Date: Fri, 12 Apr 2024 14:37:10 +0300 Subject: [PATCH 131/234] fix(rebalance): fix start order of rebalance applications --- .../src/emqx_eviction_agent.app.src | 3 +- .../src/emqx_node_rebalance.app.src | 7 +- .../src/emqx_node_rebalance_api.erl | 108 +++++++++--------- .../test/emqx_node_rebalance_SUITE.erl | 2 +- .../test/emqx_node_rebalance_agent_SUITE.erl | 4 +- .../test/emqx_node_rebalance_api_SUITE.erl | 3 +- .../test/emqx_node_rebalance_cli_SUITE.erl | 2 +- .../emqx_node_rebalance_evacuation_SUITE.erl | 2 +- .../test/emqx_node_rebalance_purge_SUITE.erl | 1 - .../test/emqx_node_rebalance_status_SUITE.erl | 1 - changes/ee/fix-12871.en.md | 1 + mix.exs | 4 +- rebar.config.erl | 2 + 13 files changed, 73 insertions(+), 67 deletions(-) create mode 100644 changes/ee/fix-12871.en.md diff --git a/apps/emqx_eviction_agent/src/emqx_eviction_agent.app.src b/apps/emqx_eviction_agent/src/emqx_eviction_agent.app.src index 10a464f26..7e692bf9c 100644 --- a/apps/emqx_eviction_agent/src/emqx_eviction_agent.app.src +++ b/apps/emqx_eviction_agent/src/emqx_eviction_agent.app.src @@ -9,7 +9,8 @@ {applications, [ kernel, stdlib, - emqx_ctl + emqx_ctl, + emqx ]}, {mod, {emqx_eviction_agent_app, []}}, {env, []}, diff --git a/apps/emqx_node_rebalance/src/emqx_node_rebalance.app.src b/apps/emqx_node_rebalance/src/emqx_node_rebalance.app.src index beb5f2abb..e8967c556 100644 --- a/apps/emqx_node_rebalance/src/emqx_node_rebalance.app.src +++ b/apps/emqx_node_rebalance/src/emqx_node_rebalance.app.src @@ -1,6 +1,6 @@ {application, emqx_node_rebalance, [ {description, "EMQX Node Rebalance"}, - {vsn, "5.0.7"}, + {vsn, "5.0.8"}, {registered, [ emqx_node_rebalance_sup, emqx_node_rebalance, @@ -10,7 +10,10 @@ ]}, {applications, [ kernel, - stdlib + stdlib, + emqx, + emqx_ctl, + emqx_eviction_agent ]}, {mod, {emqx_node_rebalance_app, []}}, {env, []}, diff --git a/apps/emqx_node_rebalance/src/emqx_node_rebalance_api.erl b/apps/emqx_node_rebalance/src/emqx_node_rebalance_api.erl index a054cfe1f..35461ee5b 100644 --- a/apps/emqx_node_rebalance/src/emqx_node_rebalance_api.erl +++ b/apps/emqx_node_rebalance/src/emqx_node_rebalance_api.erl @@ -423,7 +423,7 @@ param_node() -> fields(rebalance_start) -> [ - {"wait_health_check", + {wait_health_check, mk( emqx_schema:timeout_duration_s(), #{ @@ -431,7 +431,7 @@ fields(rebalance_start) -> required => false } )}, - {"conn_evict_rate", + {conn_evict_rate, mk( pos_integer(), #{ @@ -439,7 +439,7 @@ fields(rebalance_start) -> required => false } )}, - {"sess_evict_rate", + {sess_evict_rate, mk( pos_integer(), #{ @@ -447,7 +447,7 @@ fields(rebalance_start) -> required => false } )}, - {"abs_conn_threshold", + {abs_conn_threshold, mk( pos_integer(), #{ @@ -455,7 +455,7 @@ fields(rebalance_start) -> required => false } )}, - {"rel_conn_threshold", + {rel_conn_threshold, mk( number(), #{ @@ -464,7 +464,7 @@ fields(rebalance_start) -> validator => [fun(Value) -> Value > 1.0 end] } )}, - {"abs_sess_threshold", + {abs_sess_threshold, mk( pos_integer(), #{ @@ -472,7 +472,7 @@ fields(rebalance_start) -> required => false } )}, - {"rel_sess_threshold", + {rel_sess_threshold, mk( number(), #{ @@ -481,7 +481,7 @@ fields(rebalance_start) -> validator => [fun(Value) -> Value > 1.0 end] } )}, - {"wait_takeover", + {wait_takeover, mk( emqx_schema:timeout_duration_s(), #{ @@ -489,7 +489,7 @@ fields(rebalance_start) -> required => false } )}, - {"nodes", + {nodes, mk( list(binary()), #{ @@ -501,7 +501,7 @@ fields(rebalance_start) -> ]; fields(rebalance_evacuation_start) -> [ - {"wait_health_check", + {wait_health_check, mk( emqx_schema:timeout_duration_s(), #{ @@ -509,7 +509,7 @@ fields(rebalance_evacuation_start) -> required => false } )}, - {"conn_evict_rate", + {conn_evict_rate, mk( pos_integer(), #{ @@ -517,7 +517,7 @@ fields(rebalance_evacuation_start) -> required => false } )}, - {"sess_evict_rate", + {sess_evict_rate, mk( pos_integer(), #{ @@ -525,7 +525,7 @@ fields(rebalance_evacuation_start) -> required => false } )}, - {"redirect_to", + {redirect_to, mk( binary(), #{ @@ -533,7 +533,7 @@ fields(rebalance_evacuation_start) -> required => false } )}, - {"wait_takeover", + {wait_takeover, mk( emqx_schema:timeout_duration_s(), #{ @@ -541,7 +541,7 @@ fields(rebalance_evacuation_start) -> required => false } )}, - {"migrate_to", + {migrate_to, mk( nonempty_list(binary()), #{ @@ -552,7 +552,7 @@ fields(rebalance_evacuation_start) -> ]; fields(purge_start) -> [ - {"purge_rate", + {purge_rate, mk( pos_integer(), #{ @@ -563,7 +563,7 @@ fields(purge_start) -> ]; fields(local_status_disabled) -> [ - {"status", + {status, mk( disabled, #{ @@ -574,7 +574,7 @@ fields(local_status_disabled) -> ]; fields(local_status_enabled) -> [ - {"status", + {status, mk( enabled, #{ @@ -582,7 +582,7 @@ fields(local_status_enabled) -> required => true } )}, - {"process", + {process, mk( hoconsc:enum([rebalance, evacuation]), #{ @@ -590,7 +590,7 @@ fields(local_status_enabled) -> required => true } )}, - {"state", + {state, mk( atom(), #{ @@ -598,7 +598,7 @@ fields(local_status_enabled) -> required => true } )}, - {"coordinator_node", + {coordinator_node, mk( binary(), #{ @@ -606,7 +606,7 @@ fields(local_status_enabled) -> required => false } )}, - {"connection_eviction_rate", + {connection_eviction_rate, mk( pos_integer(), #{ @@ -614,7 +614,7 @@ fields(local_status_enabled) -> required => false } )}, - {"session_eviction_rate", + {session_eviction_rate, mk( pos_integer(), #{ @@ -622,7 +622,7 @@ fields(local_status_enabled) -> required => false } )}, - {"connection_goal", + {connection_goal, mk( non_neg_integer(), #{ @@ -630,7 +630,7 @@ fields(local_status_enabled) -> required => false } )}, - {"session_goal", + {session_goal, mk( non_neg_integer(), #{ @@ -638,7 +638,7 @@ fields(local_status_enabled) -> required => false } )}, - {"disconnected_session_goal", + {disconnected_session_goal, mk( non_neg_integer(), #{ @@ -646,7 +646,7 @@ fields(local_status_enabled) -> required => false } )}, - {"session_recipients", + {session_recipients, mk( list(binary()), #{ @@ -654,7 +654,7 @@ fields(local_status_enabled) -> required => false } )}, - {"recipients", + {recipients, mk( list(binary()), #{ @@ -662,7 +662,7 @@ fields(local_status_enabled) -> required => false } )}, - {"stats", + {stats, mk( ref(status_stats), #{ @@ -673,7 +673,7 @@ fields(local_status_enabled) -> ]; fields(status_stats) -> [ - {"initial_connected", + {initial_connected, mk( non_neg_integer(), #{ @@ -681,7 +681,7 @@ fields(status_stats) -> required => true } )}, - {"current_connected", + {current_connected, mk( non_neg_integer(), #{ @@ -689,7 +689,7 @@ fields(status_stats) -> required => true } )}, - {"initial_sessions", + {initial_sessions, mk( non_neg_integer(), #{ @@ -697,7 +697,7 @@ fields(status_stats) -> required => true } )}, - {"current_sessions", + {current_sessions, mk( non_neg_integer(), #{ @@ -705,7 +705,7 @@ fields(status_stats) -> required => true } )}, - {"current_disconnected_sessions", + {current_disconnected_sessions, mk( non_neg_integer(), #{ @@ -716,11 +716,11 @@ fields(status_stats) -> ]; fields(global_coordinator_status) -> without( - ["status", "process", "session_goal", "session_recipients", "stats"], + [status, process, session_goal, session_recipients, stats], fields(local_status_enabled) ) ++ [ - {"donors", + {donors, mk( list(binary()), #{ @@ -728,7 +728,7 @@ fields(global_coordinator_status) -> required => false } )}, - {"donor_conn_avg", + {donor_conn_avg, mk( non_neg_integer(), #{ @@ -736,7 +736,7 @@ fields(global_coordinator_status) -> required => false } )}, - {"donor_sess_avg", + {donor_sess_avg, mk( non_neg_integer(), #{ @@ -744,7 +744,7 @@ fields(global_coordinator_status) -> required => false } )}, - {"node", + {node, mk( binary(), #{ @@ -754,9 +754,9 @@ fields(global_coordinator_status) -> )} ]; fields(global_evacuation_status) -> - without(["status", "process"], fields(local_status_enabled)) ++ + without([status, process], fields(local_status_enabled)) ++ [ - {"node", + {node, mk( binary(), #{ @@ -768,19 +768,19 @@ fields(global_evacuation_status) -> fields(global_purge_status) -> without( [ - "status", - "process", - "connection_eviction_rate", - "session_eviction_rate", - "connection_goal", - "disconnected_session_goal", - "session_recipients", - "recipients" + status, + process, + connection_eviction_rate, + session_eviction_rate, + connection_goal, + disconnected_session_goal, + session_recipients, + recipients ], fields(local_status_enabled) ) ++ [ - {"purge_rate", + {purge_rate, mk( pos_integer(), #{ @@ -788,7 +788,7 @@ fields(global_purge_status) -> required => false } )}, - {"node", + {node, mk( binary(), #{ @@ -799,7 +799,7 @@ fields(global_purge_status) -> ]; fields(global_status) -> [ - {"evacuations", + {evacuations, mk( hoconsc:array(ref(global_evacuation_status)), #{ @@ -807,7 +807,7 @@ fields(global_status) -> required => false } )}, - {"purges", + {purges, mk( hoconsc:array(ref(global_purge_status)), #{ @@ -815,7 +815,7 @@ fields(global_status) -> required => false } )}, - {"rebalances", + {rebalances, mk( hoconsc:array(ref(global_coordinator_status)), #{ diff --git a/apps/emqx_node_rebalance/test/emqx_node_rebalance_SUITE.erl b/apps/emqx_node_rebalance/test/emqx_node_rebalance_SUITE.erl index 4f0fbe3c4..04a74bf28 100644 --- a/apps/emqx_node_rebalance/test/emqx_node_rebalance_SUITE.erl +++ b/apps/emqx_node_rebalance/test/emqx_node_rebalance_SUITE.erl @@ -48,7 +48,7 @@ init_per_testcase(Case, Config) -> ClusterNodes = start_cluster( Config, NodeNames, - [emqx, emqx_eviction_agent, emqx_node_rebalance] + [emqx, emqx_node_rebalance] ), ok = snabbkaffe:start_trace(), [{cluster_nodes, ClusterNodes} | Config]. diff --git a/apps/emqx_node_rebalance/test/emqx_node_rebalance_agent_SUITE.erl b/apps/emqx_node_rebalance/test/emqx_node_rebalance_agent_SUITE.erl index ac5f809bf..bd15b6475 100644 --- a/apps/emqx_node_rebalance/test/emqx_node_rebalance_agent_SUITE.erl +++ b/apps/emqx_node_rebalance/test/emqx_node_rebalance_agent_SUITE.erl @@ -38,7 +38,7 @@ groups() -> ]. init_per_suite(Config) -> - Apps = emqx_cth_suite:start([emqx, emqx_eviction_agent, emqx_node_rebalance], #{ + Apps = emqx_cth_suite:start([emqx, emqx_node_rebalance], #{ work_dir => ?config(priv_dir, Config) }), [{apps, Apps} | Config]. @@ -60,7 +60,7 @@ init_per_testcase(Case, Config) -> ClusterNodes = emqx_cth_cluster:start( [ {case_specific_node_name(?MODULE, Case), #{ - apps => [emqx, emqx_eviction_agent, emqx_node_rebalance] + apps => [emqx, emqx_node_rebalance] }} ], #{work_dir => emqx_cth_suite:work_dir(Case, Config)} diff --git a/apps/emqx_node_rebalance/test/emqx_node_rebalance_api_SUITE.erl b/apps/emqx_node_rebalance/test/emqx_node_rebalance_api_SUITE.erl index a652dea0a..06e119532 100644 --- a/apps/emqx_node_rebalance/test/emqx_node_rebalance_api_SUITE.erl +++ b/apps/emqx_node_rebalance/test/emqx_node_rebalance_api_SUITE.erl @@ -29,7 +29,7 @@ all() -> emqx_common_test_helpers:all(?MODULE). init_per_suite(Config) -> - Apps = emqx_cth_suite:start([emqx, emqx_eviction_agent, emqx_node_rebalance], #{ + Apps = emqx_cth_suite:start([emqx, emqx_node_rebalance], #{ work_dir => ?config(priv_dir, Config) }), [{apps, Apps} | Config]. @@ -548,7 +548,6 @@ app_specs() -> #{enable => true} } }}, - emqx_eviction_agent, emqx_node_rebalance ]. diff --git a/apps/emqx_node_rebalance/test/emqx_node_rebalance_cli_SUITE.erl b/apps/emqx_node_rebalance/test/emqx_node_rebalance_cli_SUITE.erl index 55542d320..3980b4a45 100644 --- a/apps/emqx_node_rebalance/test/emqx_node_rebalance_cli_SUITE.erl +++ b/apps/emqx_node_rebalance/test/emqx_node_rebalance_cli_SUITE.erl @@ -15,7 +15,7 @@ [emqtt_connect_many/2, stop_many/1, case_specific_node_name/3] ). --define(START_APPS, [emqx, emqx_eviction_agent, emqx_node_rebalance]). +-define(START_APPS, [emqx, emqx_node_rebalance]). all() -> emqx_common_test_helpers:all(?MODULE). diff --git a/apps/emqx_node_rebalance/test/emqx_node_rebalance_evacuation_SUITE.erl b/apps/emqx_node_rebalance/test/emqx_node_rebalance_evacuation_SUITE.erl index 4c0d13788..d27f6d6d3 100644 --- a/apps/emqx_node_rebalance/test/emqx_node_rebalance_evacuation_SUITE.erl +++ b/apps/emqx_node_rebalance/test/emqx_node_rebalance_evacuation_SUITE.erl @@ -70,7 +70,7 @@ init_per_testcase(Case, Config) -> case_specific_node_name(?MODULE, Case, '_recipient') ] end, - ClusterNodes = start_cluster(Config, NodeNames, [emqx, emqx_eviction_agent, emqx_node_rebalance]), + ClusterNodes = start_cluster(Config, NodeNames, [emqx, emqx_node_rebalance]), ok = snabbkaffe:start_trace(), [{cluster_nodes, ClusterNodes} | Config]. diff --git a/apps/emqx_node_rebalance/test/emqx_node_rebalance_purge_SUITE.erl b/apps/emqx_node_rebalance/test/emqx_node_rebalance_purge_SUITE.erl index 31844c5d0..0daeac106 100644 --- a/apps/emqx_node_rebalance/test/emqx_node_rebalance_purge_SUITE.erl +++ b/apps/emqx_node_rebalance/test/emqx_node_rebalance_purge_SUITE.erl @@ -117,7 +117,6 @@ app_specs() -> config => #{delayed => #{enable => true}} }}, - emqx_eviction_agent, emqx_node_rebalance ]. diff --git a/apps/emqx_node_rebalance/test/emqx_node_rebalance_status_SUITE.erl b/apps/emqx_node_rebalance/test/emqx_node_rebalance_status_SUITE.erl index 888e63beb..6a7f20c4e 100644 --- a/apps/emqx_node_rebalance/test/emqx_node_rebalance_status_SUITE.erl +++ b/apps/emqx_node_rebalance/test/emqx_node_rebalance_status_SUITE.erl @@ -32,7 +32,6 @@ init_per_suite(Config) -> Apps = [ emqx_conf, emqx, - emqx_eviction_agent, emqx_node_rebalance ], Cluster = [ diff --git a/changes/ee/fix-12871.en.md b/changes/ee/fix-12871.en.md new file mode 100644 index 000000000..5b7520645 --- /dev/null +++ b/changes/ee/fix-12871.en.md @@ -0,0 +1 @@ +Fix startup process of evacuated node. Previously, if a node was evacuated and stoped without stopping evacuation, it would not start back. diff --git a/mix.exs b/mix.exs index 552cc75c9..0666601fc 100644 --- a/mix.exs +++ b/mix.exs @@ -332,7 +332,9 @@ defmodule EMQXUmbrella.MixProject do :emqx_s3, :emqx_opentelemetry, :emqx_durable_storage, - :rabbit_common + :rabbit_common, + :emqx_eviction_agent, + :emqx_node_rebalance ], steps: steps, strip_beams: false diff --git a/rebar.config.erl b/rebar.config.erl index a81d162a9..35f76b187 100644 --- a/rebar.config.erl +++ b/rebar.config.erl @@ -116,6 +116,8 @@ is_community_umbrella_app("apps/emqx_gateway_ocpp") -> false; is_community_umbrella_app("apps/emqx_gateway_jt808") -> false; is_community_umbrella_app("apps/emqx_bridge_syskeeper") -> false; is_community_umbrella_app("apps/emqx_message_validation") -> false; +is_community_umbrella_app("apps/emqx_eviction_agent") -> false; +is_community_umbrella_app("apps/emqx_node_rebalance") -> false; is_community_umbrella_app(_) -> true. %% BUILD_WITHOUT_JQ From 6777f04780cae222403bd371bd9357a1eaa8c08f Mon Sep 17 00:00:00 2001 From: Kjell Winblad Date: Fri, 12 Apr 2024 15:21:25 +0200 Subject: [PATCH 132/234] fix: iolist_to_binrary -> unicode:characters_to_binary It is not always safe to use iolist_to_binrary on the output of an io_lib:format call with the ~ts placeholder. --- apps/emqx/src/emqx_trace/emqx_trace.erl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/apps/emqx/src/emqx_trace/emqx_trace.erl b/apps/emqx/src/emqx_trace/emqx_trace.erl index 4e182f300..17580745a 100644 --- a/apps/emqx/src/emqx_trace/emqx_trace.erl +++ b/apps/emqx/src/emqx_trace/emqx_trace.erl @@ -101,7 +101,7 @@ rendered_action_template(ActionID, RenderResult) -> [ActionID] ) ), - MsgBin = iolist_to_binary(StopMsg), + MsgBin = unicode:characters_to_binary(StopMsg), error({unrecoverable_error, MsgBin}); _ -> ok From b4198185bc4b12f61e19beed00887e0aab7d0065 Mon Sep 17 00:00:00 2001 From: Kjell Winblad Date: Fri, 12 Apr 2024 15:27:03 +0200 Subject: [PATCH 133/234] fix(http connector): redact sensitive information from headers --- apps/emqx_bridge_http/src/emqx_bridge_http_connector.erl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/apps/emqx_bridge_http/src/emqx_bridge_http_connector.erl b/apps/emqx_bridge_http/src/emqx_bridge_http_connector.erl index 19b7ef875..ec75922a7 100644 --- a/apps/emqx_bridge_http/src/emqx_bridge_http_connector.erl +++ b/apps/emqx_bridge_http/src/emqx_bridge_http_connector.erl @@ -509,7 +509,7 @@ trace_rendered_action_template(InstId, Method, NRequest, Timeout) -> #{ path => Path, method => Method, - headers => Headers, + headers => emqx_utils_redact:redact_headers(Headers), timeout => Timeout } ); From 2577224bc7c901fe5bca60a95ddbbc2d270a1573 Mon Sep 17 00:00:00 2001 From: zmstone Date: Sat, 13 Apr 2024 00:59:56 +0200 Subject: [PATCH 134/234] fix(swagger): do not generate dummy descriptions --- apps/emqx_dashboard/src/emqx_dashboard_swagger.erl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/apps/emqx_dashboard/src/emqx_dashboard_swagger.erl b/apps/emqx_dashboard/src/emqx_dashboard_swagger.erl index 88c6e62eb..dc188426e 100644 --- a/apps/emqx_dashboard/src/emqx_dashboard_swagger.erl +++ b/apps/emqx_dashboard/src/emqx_dashboard_swagger.erl @@ -661,7 +661,7 @@ trans_desc(Init, Hocon, Func, Name, Options) -> Spec1 = trans_label(Spec0, Hocon, Name, Options), case Spec1 of #{description := _} -> Spec1; - _ -> Spec1#{description => <>} + _ -> Spec1 end end. From da5b01aa46b555de6e823525938fa1c1bceccbc9 Mon Sep 17 00:00:00 2001 From: zmstone Date: Wed, 10 Apr 2024 11:24:17 +0200 Subject: [PATCH 135/234] refactor(client_attr): allow more than one initial extraction --- apps/emqx/src/emqx_channel.erl | 145 +++++++++++------- apps/emqx/src/emqx_schema.erl | 4 +- apps/emqx/test/emqx_config_SUITE.erl | 2 +- apps/emqx/test/emqx_listeners_SUITE.erl | 2 +- .../test/emqx_authz/emqx_authz_SUITE.erl | 2 +- changes/ce/feat-12750.en.md | 2 +- rel/i18n/emqx_schema.hocon | 3 +- 7 files changed, 100 insertions(+), 60 deletions(-) diff --git a/apps/emqx/src/emqx_channel.erl b/apps/emqx/src/emqx_channel.erl index 27babfcc9..a0fbae441 100644 --- a/apps/emqx/src/emqx_channel.erl +++ b/apps/emqx/src/emqx_channel.erl @@ -269,8 +269,7 @@ init( }, Zone ), - AttrExtractionConfig = get_mqtt_conf(Zone, client_attrs_init), - ClientInfo = initialize_client_attrs_from_cert(AttrExtractionConfig, ClientInfo0, Peercert), + ClientInfo = initialize_client_attrs_from_cert(ClientInfo0, Peercert), {NClientInfo, NConnInfo} = take_ws_cookie(ClientInfo, ConnInfo), #channel{ conninfo = NConnInfo, @@ -1575,7 +1574,7 @@ enrich_client(ConnPkt, Channel = #channel{clientinfo = ClientInfo}) -> fun maybe_username_as_clientid/2, fun maybe_assign_clientid/2, %% attr init should happen after clientid and username assign - fun maybe_set_client_initial_attr/2 + fun maybe_set_client_initial_attrs/2 ], ConnPkt, ClientInfo @@ -1587,7 +1586,17 @@ enrich_client(ConnPkt, Channel = #channel{clientinfo = ClientInfo}) -> {error, ReasonCode, Channel#channel{clientinfo = NClientInfo}} end. -initialize_client_attrs_from_cert( +initialize_client_attrs_from_cert(#{zone := Zone} = ClientInfo, Peercert) -> + Inits = get_client_attrs_init_config(Zone), + lists:foldl( + fun(Init, Acc) -> + do_initialize_client_attrs_from_cert(Init, Acc, Peercert) + end, + ClientInfo, + Inits + ). + +do_initialize_client_attrs_from_cert( #{ extract_from := From, extract_regexp := Regexp, @@ -1596,21 +1605,24 @@ initialize_client_attrs_from_cert( ClientInfo, Peercert ) when From =:= cn orelse From =:= dn -> - case extract_client_attr_from_cert(From, Regexp, Peercert) of - {ok, Value} -> - ?SLOG( - debug, - #{ - msg => "client_attr_init_from_cert", - extracted_as => AttrName, - extracted_value => Value - } - ), - ClientInfo#{client_attrs => #{AttrName => Value}}; - _ -> - ClientInfo#{client_attrs => #{}} - end; -initialize_client_attrs_from_cert(_, ClientInfo, _Peercert) -> + Attrs0 = maps:get(client_attrs, ClientInfo, #{}), + Attrs = + case extract_client_attr_from_cert(From, Regexp, Peercert) of + {ok, Value} -> + ?SLOG( + debug, + #{ + msg => "client_attr_init_from_cert", + extracted_as => AttrName, + extracted_value => Value + } + ), + Attrs0#{AttrName => Value}; + _ -> + Attrs0 + end, + ClientInfo#{client_attrs => Attrs}; +do_initialize_client_attrs_from_cert(_, ClientInfo, _Peercert) -> ClientInfo. extract_client_attr_from_cert(cn, Regexp, Peercert) -> @@ -1668,27 +1680,51 @@ maybe_assign_clientid(#mqtt_packet_connect{clientid = <<>>}, ClientInfo) -> maybe_assign_clientid(#mqtt_packet_connect{clientid = ClientId}, ClientInfo) -> {ok, ClientInfo#{clientid => ClientId}}. -maybe_set_client_initial_attr(ConnPkt, #{zone := Zone} = ClientInfo0) -> - Config = get_mqtt_conf(Zone, client_attrs_init), - ClientInfo = initialize_client_attrs_from_user_property(Config, ConnPkt, ClientInfo0), - Attrs = maps:get(client_attrs, ClientInfo, #{}), - case extract_attr_from_clientinfo(Config, ClientInfo) of - {ok, Value} -> - #{extract_as := Name} = Config, - ?SLOG( - debug, - #{ - msg => "client_attr_init_from_clientinfo", - extracted_as => Name, - extracted_value => Value - } - ), - {ok, ClientInfo#{client_attrs => Attrs#{Name => Value}}}; - _ -> - {ok, ClientInfo} +get_client_attrs_init_config(Zone) -> + case get_mqtt_conf(Zone, client_attrs_init, []) of + L when is_list(L) -> L; + M when is_map(M) -> [M] end. -initialize_client_attrs_from_user_property( +maybe_set_client_initial_attrs(ConnPkt, #{zone := Zone} = ClientInfo0) -> + Inits = get_client_attrs_init_config(Zone), + ClientInfo = initialize_client_attrs_from_user_property(Inits, ConnPkt, ClientInfo0), + {ok, initialize_client_attrs_from_clientinfo(Inits, ClientInfo)}. + +initialize_client_attrs_from_clientinfo(Inits, ClientInfo) -> + lists:foldl( + fun(Init, Acc) -> + Attrs = maps:get(client_attrs, ClientInfo, #{}), + case extract_attr_from_clientinfo(Init, ClientInfo) of + {ok, Value} -> + #{extract_as := Name} = Init, + ?SLOG( + debug, + #{ + msg => "client_attr_init_from_clientinfo", + extracted_as => Name, + extracted_value => Value + } + ), + Acc#{client_attrs => Attrs#{Name => Value}}; + _ -> + Acc + end + end, + ClientInfo, + Inits + ). + +initialize_client_attrs_from_user_property(Inits, ConnPkt, ClientInfo) -> + lists:foldl( + fun(Init, Acc) -> + do_initialize_client_attrs_from_user_property(Init, ConnPkt, Acc) + end, + ClientInfo, + Inits + ). + +do_initialize_client_attrs_from_user_property( #{ extract_from := user_property, extract_as := PropertyKey @@ -1696,21 +1732,24 @@ initialize_client_attrs_from_user_property( ConnPkt, ClientInfo ) -> - case extract_client_attr_from_user_property(ConnPkt, PropertyKey) of - {ok, Value} -> - ?SLOG( - debug, - #{ - msg => "client_attr_init_from_user_property", - extracted_as => PropertyKey, - extracted_value => Value - } - ), - ClientInfo#{client_attrs => #{PropertyKey => Value}}; - _ -> - ClientInfo - end; -initialize_client_attrs_from_user_property(_, _ConnInfo, ClientInfo) -> + Attrs0 = maps:get(client_attrs, ClientInfo, #{}), + Attrs = + case extract_client_attr_from_user_property(ConnPkt, PropertyKey) of + {ok, Value} -> + ?SLOG( + debug, + #{ + msg => "client_attr_init_from_user_property", + extracted_as => PropertyKey, + extracted_value => Value + } + ), + Attrs0#{PropertyKey => Value}; + _ -> + Attrs0 + end, + ClientInfo#{client_attrs => Attrs}; +do_initialize_client_attrs_from_user_property(_, _ConnPkt, ClientInfo) -> ClientInfo. extract_client_attr_from_user_property( diff --git a/apps/emqx/src/emqx_schema.erl b/apps/emqx/src/emqx_schema.erl index 427df5db0..ef1ba9999 100644 --- a/apps/emqx/src/emqx_schema.erl +++ b/apps/emqx/src/emqx_schema.erl @@ -3552,9 +3552,9 @@ mqtt_general() -> )}, {"client_attrs_init", sc( - hoconsc:union([disabled, ref("client_attrs_init")]), + hoconsc:union([hoconsc:array(ref("client_attrs_init")), ref("client_attrs_init")]), #{ - default => disabled, + default => [], desc => ?DESC("client_attrs_init") } )} diff --git a/apps/emqx/test/emqx_config_SUITE.erl b/apps/emqx/test/emqx_config_SUITE.erl index b3e60f793..a9b4a8328 100644 --- a/apps/emqx/test/emqx_config_SUITE.erl +++ b/apps/emqx/test/emqx_config_SUITE.erl @@ -454,7 +454,7 @@ zone_global_defaults() -> upgrade_qos => false, use_username_as_clientid => false, wildcard_subscription => true, - client_attrs_init => disabled + client_attrs_init => [] }, overload_protection => #{ diff --git a/apps/emqx/test/emqx_listeners_SUITE.erl b/apps/emqx/test/emqx_listeners_SUITE.erl index acd7656d7..d49b5f893 100644 --- a/apps/emqx/test/emqx_listeners_SUITE.erl +++ b/apps/emqx/test/emqx_listeners_SUITE.erl @@ -170,7 +170,7 @@ t_client_attr_as_mountpoint(_Config) -> ?assertMatch([_], emqx_router:match_routes(MatchTopic)), emqtt:stop(Client) end), - emqx_config:put_zone_conf(default, [mqtt, client_attrs_init], disabled), + emqx_config:put_zone_conf(default, [mqtt, client_attrs_init], []), ok. t_current_conns_tcp(_Config) -> diff --git a/apps/emqx_auth/test/emqx_authz/emqx_authz_SUITE.erl b/apps/emqx_auth/test/emqx_authz/emqx_authz_SUITE.erl index c88dcc244..37ac27a9b 100644 --- a/apps/emqx_auth/test/emqx_authz/emqx_authz_SUITE.erl +++ b/apps/emqx_auth/test/emqx_authz/emqx_authz_SUITE.erl @@ -572,7 +572,7 @@ t_alias_prefix(_Config) -> ?assertMatch({ok, _, [?RC_NOT_AUTHORIZED]}, emqtt:subscribe(C, SubTopicNotAllowed)), unlink(C), emqtt:stop(C), - emqx_config:put_zone_conf(default, [mqtt, client_attrs_init], disalbed), + emqx_config:put_zone_conf(default, [mqtt, client_attrs_init], []), ok. %% client is allowed by ACL to publish to its LWT topic, is connected, diff --git a/changes/ce/feat-12750.en.md b/changes/ce/feat-12750.en.md index bd7375168..d0a70e6fc 100644 --- a/changes/ce/feat-12750.en.md +++ b/changes/ce/feat-12750.en.md @@ -7,7 +7,7 @@ an MQTT connection. ### Initialization of `client_attrs` -- The `client_attrs` field can be initially populated based on the configuration from one of the +- The `client_attrs` fields can be initially populated based on the configuration from one of the following sources: - `cn`: The common name from the TLS client's certificate. - `dn`: The distinguished name from the TLS client's certificate, that is, the certificate "Subject". diff --git a/rel/i18n/emqx_schema.hocon b/rel/i18n/emqx_schema.hocon index 0bd8c74d5..90fbfeefc 100644 --- a/rel/i18n/emqx_schema.hocon +++ b/rel/i18n/emqx_schema.hocon @@ -1575,7 +1575,8 @@ client_attrs_init { label: "Client Attributes Initialization" desc: """~ Specify how to initialize client attributes. - One initial client attribute can be initialized as `client_attrs.NAME`, + This config accepts one initialization rule, or a list of rules. + Client attributes can be initialized as `client_attrs.NAME`, where `NAME` is the name of the attribute specified in the config `extract_as`. The initialized client attribute will be stored in the `client_attrs` property with the specified name, and can be used as a placeholder in a template for authentication and authorization. From b76b6fbe636cfc14acac5d9c9e909daaa31327e8 Mon Sep 17 00:00:00 2001 From: zmstone Date: Sat, 13 Apr 2024 00:11:57 +0200 Subject: [PATCH 136/234] feat(variform): initialize client_attrs with variform Moved regular expression extraction as a variform function. --- apps/emqx/src/emqx_channel.erl | 155 +++-------------- apps/emqx/src/emqx_schema.erl | 34 +++- apps/emqx/test/emqx_client_SUITE.erl | 24 +-- apps/emqx/test/emqx_listeners_SUITE.erl | 12 +- .../test/emqx_authz/emqx_authz_SUITE.erl | 14 +- apps/emqx_rule_engine/src/emqx_rule_funcs.erl | 62 +++---- apps/emqx_utils/src/emqx_variform.erl | 159 +++++++++++++----- ...variform_str.erl => emqx_variform_bif.erl} | 39 ++++- .../test/emqx_variform_bif_tests.erl | 59 +++++++ apps/emqx_utils/test/emqx_variform_tests.erl | 50 ++++-- .../ce/{feat-12750.en.md => feat-12872.en.md} | 4 +- rel/i18n/emqx_schema.hocon | 51 +++--- scripts/spellcheck/dicts/emqx.txt | 1 + 13 files changed, 375 insertions(+), 289 deletions(-) rename apps/emqx_utils/src/{emqx_variform_str.erl => emqx_variform_bif.erl} (90%) create mode 100644 apps/emqx_utils/test/emqx_variform_bif_tests.erl rename changes/ce/{feat-12750.en.md => feat-12872.en.md} (93%) diff --git a/apps/emqx/src/emqx_channel.erl b/apps/emqx/src/emqx_channel.erl index a0fbae441..efb5133bc 100644 --- a/apps/emqx/src/emqx_channel.erl +++ b/apps/emqx/src/emqx_channel.erl @@ -251,7 +251,7 @@ init( MP -> MP end, ListenerId = emqx_listeners:listener_id(Type, Listener), - ClientInfo0 = set_peercert_infos( + ClientInfo = set_peercert_infos( Peercert, #{ zone => Zone, @@ -269,7 +269,6 @@ init( }, Zone ), - ClientInfo = initialize_client_attrs_from_cert(ClientInfo0, Peercert), {NClientInfo, NConnInfo} = take_ws_cookie(ClientInfo, ConnInfo), #channel{ conninfo = NConnInfo, @@ -1586,60 +1585,6 @@ enrich_client(ConnPkt, Channel = #channel{clientinfo = ClientInfo}) -> {error, ReasonCode, Channel#channel{clientinfo = NClientInfo}} end. -initialize_client_attrs_from_cert(#{zone := Zone} = ClientInfo, Peercert) -> - Inits = get_client_attrs_init_config(Zone), - lists:foldl( - fun(Init, Acc) -> - do_initialize_client_attrs_from_cert(Init, Acc, Peercert) - end, - ClientInfo, - Inits - ). - -do_initialize_client_attrs_from_cert( - #{ - extract_from := From, - extract_regexp := Regexp, - extract_as := AttrName - }, - ClientInfo, - Peercert -) when From =:= cn orelse From =:= dn -> - Attrs0 = maps:get(client_attrs, ClientInfo, #{}), - Attrs = - case extract_client_attr_from_cert(From, Regexp, Peercert) of - {ok, Value} -> - ?SLOG( - debug, - #{ - msg => "client_attr_init_from_cert", - extracted_as => AttrName, - extracted_value => Value - } - ), - Attrs0#{AttrName => Value}; - _ -> - Attrs0 - end, - ClientInfo#{client_attrs => Attrs}; -do_initialize_client_attrs_from_cert(_, ClientInfo, _Peercert) -> - ClientInfo. - -extract_client_attr_from_cert(cn, Regexp, Peercert) -> - CN = esockd_peercert:common_name(Peercert), - re_extract(CN, Regexp); -extract_client_attr_from_cert(dn, Regexp, Peercert) -> - DN = esockd_peercert:subject(Peercert), - re_extract(DN, Regexp). - -re_extract(Str, Regexp) when is_binary(Str) -> - case re:run(Str, Regexp, [{capture, all_but_first, list}]) of - {match, [_ | _] = List} -> {ok, iolist_to_binary(List)}; - _ -> nomatch - end; -re_extract(_NotStr, _Regexp) -> - ignored. - set_username( #mqtt_packet_connect{username = Username}, ClientInfo = #{username := undefined} @@ -1681,33 +1626,36 @@ maybe_assign_clientid(#mqtt_packet_connect{clientid = ClientId}, ClientInfo) -> {ok, ClientInfo#{clientid => ClientId}}. get_client_attrs_init_config(Zone) -> - case get_mqtt_conf(Zone, client_attrs_init, []) of - L when is_list(L) -> L; - M when is_map(M) -> [M] - end. + get_mqtt_conf(Zone, client_attrs_init, []). -maybe_set_client_initial_attrs(ConnPkt, #{zone := Zone} = ClientInfo0) -> +maybe_set_client_initial_attrs(ConnPkt, #{zone := Zone} = ClientInfo) -> Inits = get_client_attrs_init_config(Zone), - ClientInfo = initialize_client_attrs_from_user_property(Inits, ConnPkt, ClientInfo0), - {ok, initialize_client_attrs_from_clientinfo(Inits, ClientInfo)}. + UserProperty = get_user_property_as_map(ConnPkt), + {ok, initialize_client_attrs(Inits, ClientInfo#{user_property => UserProperty})}. -initialize_client_attrs_from_clientinfo(Inits, ClientInfo) -> +initialize_client_attrs(Inits, ClientInfo) -> lists:foldl( - fun(Init, Acc) -> + fun(#{expression := Variform, set_as_attr := Name}, Acc) -> Attrs = maps:get(client_attrs, ClientInfo, #{}), - case extract_attr_from_clientinfo(Init, ClientInfo) of + case emqx_variform:render(Variform, ClientInfo) of {ok, Value} -> - #{extract_as := Name} = Init, ?SLOG( debug, #{ - msg => "client_attr_init_from_clientinfo", - extracted_as => Name, - extracted_value => Value + msg => "client_attr_initialized", + set_as_attr => Name, + attr_value => Value } ), Acc#{client_attrs => Attrs#{Name => Value}}; - _ -> + {error, Reason} -> + ?SLOG( + warning, + #{ + msg => "client_attr_initialization_failed", + reason => Reason + } + ), Acc end end, @@ -1715,67 +1663,12 @@ initialize_client_attrs_from_clientinfo(Inits, ClientInfo) -> Inits ). -initialize_client_attrs_from_user_property(Inits, ConnPkt, ClientInfo) -> - lists:foldl( - fun(Init, Acc) -> - do_initialize_client_attrs_from_user_property(Init, ConnPkt, Acc) - end, - ClientInfo, - Inits - ). - -do_initialize_client_attrs_from_user_property( - #{ - extract_from := user_property, - extract_as := PropertyKey - }, - ConnPkt, - ClientInfo -) -> - Attrs0 = maps:get(client_attrs, ClientInfo, #{}), - Attrs = - case extract_client_attr_from_user_property(ConnPkt, PropertyKey) of - {ok, Value} -> - ?SLOG( - debug, - #{ - msg => "client_attr_init_from_user_property", - extracted_as => PropertyKey, - extracted_value => Value - } - ), - Attrs0#{PropertyKey => Value}; - _ -> - Attrs0 - end, - ClientInfo#{client_attrs => Attrs}; -do_initialize_client_attrs_from_user_property(_, _ConnPkt, ClientInfo) -> - ClientInfo. - -extract_client_attr_from_user_property( - #mqtt_packet_connect{properties = #{'User-Property' := UserProperty}}, PropertyKey -) -> - case lists:keyfind(PropertyKey, 1, UserProperty) of - {_, Value} -> - {ok, Value}; - _ -> - not_found - end; -extract_client_attr_from_user_property(_ConnPkt, _PropertyKey) -> - ignored. - -extract_attr_from_clientinfo(#{extract_from := clientid, extract_regexp := Regexp}, #{ - clientid := ClientId -}) -> - re_extract(ClientId, Regexp); -extract_attr_from_clientinfo(#{extract_from := username, extract_regexp := Regexp}, #{ - username := Username -}) when - Username =/= undefined +get_user_property_as_map(#mqtt_packet_connect{properties = #{'User-Property' := UserProperty}}) when + is_list(UserProperty) -> - re_extract(Username, Regexp); -extract_attr_from_clientinfo(_Config, _CLientInfo) -> - ignored. + maps:from_list(UserProperty); +get_user_property_as_map(_) -> + #{}. fix_mountpoint(#{mountpoint := undefined} = ClientInfo) -> ClientInfo; diff --git a/apps/emqx/src/emqx_schema.erl b/apps/emqx/src/emqx_schema.erl index ef1ba9999..1dab4f42f 100644 --- a/apps/emqx/src/emqx_schema.erl +++ b/apps/emqx/src/emqx_schema.erl @@ -1734,20 +1734,38 @@ fields(durable_storage) -> emqx_ds_schema:schema(); fields("client_attrs_init") -> [ - {extract_from, + {expression, sc( - hoconsc:enum([clientid, username, cn, dn, user_property]), - #{desc => ?DESC("client_attrs_init_extract_from")} + typerefl:alias("string", any()), + #{ + desc => ?DESC("client_attrs_init_expression"), + converter => fun compile_variform/2 + } )}, - {extract_regexp, sc(binary(), #{desc => ?DESC("client_attrs_init_extract_regexp")})}, - {extract_as, + {set_as_attr, sc(binary(), #{ - default => <<"alias">>, - desc => ?DESC("client_attrs_init_extract_as"), + desc => ?DESC("client_attrs_init_set_as_attr"), validator => fun restricted_string/1 })} ]. +compile_variform(undefined, _Opts) -> + undefined; +compile_variform(Expression, #{make_serializable := true}) -> + case is_binary(Expression) of + true -> + Expression; + false -> + emqx_variform:decompile(Expression) + end; +compile_variform(Expression, _Opts) -> + case emqx_variform:compile(Expression) of + {ok, Compiled} -> + Compiled; + {error, Reason} -> + throw(#{expression => Expression, reason => Reason}) + end. + restricted_string(Str) -> case emqx_utils:is_restricted_str(Str) of true -> ok; @@ -3552,7 +3570,7 @@ mqtt_general() -> )}, {"client_attrs_init", sc( - hoconsc:union([hoconsc:array(ref("client_attrs_init")), ref("client_attrs_init")]), + hoconsc:array(ref("client_attrs_init")), #{ default => [], desc => ?DESC("client_attrs_init") diff --git a/apps/emqx/test/emqx_client_SUITE.erl b/apps/emqx/test/emqx_client_SUITE.erl index ba38d92ff..f0afe6195 100644 --- a/apps/emqx/test/emqx_client_SUITE.erl +++ b/apps/emqx/test/emqx_client_SUITE.erl @@ -395,13 +395,14 @@ t_certdn_as_alias(_) -> test_cert_extraction_as_alias(Which) -> %% extract the first two chars - Re = <<"^(..).*$">>, ClientId = iolist_to_binary(["ClientIdFor_", atom_to_list(Which)]), - emqx_config:put_zone_conf(default, [mqtt, client_attrs_init], #{ - extract_from => Which, - extract_regexp => Re, - extract_as => <<"alias">> - }), + {ok, Compiled} = emqx_variform:compile("substr(" ++ atom_to_list(Which) ++ ",0,2)"), + emqx_config:put_zone_conf(default, [mqtt, client_attrs_init], [ + #{ + expression => Compiled, + set_as_attr => <<"alias">> + } + ]), SslConf = emqx_common_test_helpers:client_mtls('tlsv1.2'), {ok, Client} = emqtt:start_link([ {clientid, ClientId}, {port, 8883}, {ssl, true}, {ssl_opts, SslConf} @@ -416,10 +417,13 @@ test_cert_extraction_as_alias(Which) -> t_client_attr_from_user_property(_Config) -> ClientId = atom_to_binary(?FUNCTION_NAME), - emqx_config:put_zone_conf(default, [mqtt, client_attrs_init], #{ - extract_from => user_property, - extract_as => <<"group">> - }), + {ok, Compiled} = emqx_variform:compile("user_property.group"), + emqx_config:put_zone_conf(default, [mqtt, client_attrs_init], [ + #{ + expression => Compiled, + set_as_attr => <<"group">> + } + ]), SslConf = emqx_common_test_helpers:client_mtls('tlsv1.3'), {ok, Client} = emqtt:start_link([ {clientid, ClientId}, diff --git a/apps/emqx/test/emqx_listeners_SUITE.erl b/apps/emqx/test/emqx_listeners_SUITE.erl index d49b5f893..ba84699c6 100644 --- a/apps/emqx/test/emqx_listeners_SUITE.erl +++ b/apps/emqx/test/emqx_listeners_SUITE.erl @@ -150,11 +150,13 @@ t_client_attr_as_mountpoint(_Config) -> <<"limiter">> => #{}, <<"mountpoint">> => <<"groups/${client_attrs.ns}/">> }, - emqx_config:put_zone_conf(default, [mqtt, client_attrs_init], #{ - extract_from => clientid, - extract_regexp => <<"^(.+)-.+$">>, - extract_as => <<"ns">> - }), + {ok, Compiled} = emqx_variform:compile("nth(1,tokens(clientid,'-'))"), + emqx_config:put_zone_conf(default, [mqtt, client_attrs_init], [ + #{ + expression => Compiled, + set_as_attr => <<"ns">> + } + ]), emqx_logger:set_log_level(debug), with_listener(tcp, attr_as_moutpoint, ListenerConf, fun() -> {ok, Client} = emqtt:start_link(#{ diff --git a/apps/emqx_auth/test/emqx_authz/emqx_authz_SUITE.erl b/apps/emqx_auth/test/emqx_authz/emqx_authz_SUITE.erl index 37ac27a9b..70dd0bbb6 100644 --- a/apps/emqx_auth/test/emqx_authz/emqx_authz_SUITE.erl +++ b/apps/emqx_auth/test/emqx_authz/emqx_authz_SUITE.erl @@ -557,12 +557,14 @@ t_publish_last_will_testament_denied_topic(_Config) -> t_alias_prefix(_Config) -> {ok, _} = emqx_authz:update(?CMD_REPLACE, [?SOURCE_FILE_CLIENT_ATTR]), - ExtractSuffix = <<"^.*-(.*)$">>, - emqx_config:put_zone_conf(default, [mqtt, client_attrs_init], #{ - extract_from => clientid, - extract_regexp => ExtractSuffix, - extract_as => <<"alias">> - }), + %% '^.*-(.*)$': extract the suffix after the last '-' + {ok, Compiled} = emqx_variform:compile("concat(regex_extract(clientid,'^.*-(.*)$'))"), + emqx_config:put_zone_conf(default, [mqtt, client_attrs_init], [ + #{ + expression => Compiled, + set_as_attr => <<"alias">> + } + ]), ClientId = <<"org1-name2">>, SubTopic = <<"name2/#">>, SubTopicNotAllowed = <<"name3/#">>, diff --git a/apps/emqx_rule_engine/src/emqx_rule_funcs.erl b/apps/emqx_rule_engine/src/emqx_rule_funcs.erl index 6a719c3f1..414a3d620 100644 --- a/apps/emqx_rule_engine/src/emqx_rule_funcs.erl +++ b/apps/emqx_rule_engine/src/emqx_rule_funcs.erl @@ -771,66 +771,66 @@ is_array(_) -> false. %% String Funcs %%------------------------------------------------------------------------------ -coalesce(List) -> emqx_variform_str:coalesce(List). +coalesce(List) -> emqx_variform_bif:coalesce(List). -coalesce(A, B) -> emqx_variform_str:coalesce(A, B). +coalesce(A, B) -> emqx_variform_bif:coalesce(A, B). -lower(S) -> emqx_variform_str:lower(S). +lower(S) -> emqx_variform_bif:lower(S). -ltrim(S) -> emqx_variform_str:ltrim(S). +ltrim(S) -> emqx_variform_bif:ltrim(S). -reverse(S) -> emqx_variform_str:reverse(S). +reverse(S) -> emqx_variform_bif:reverse(S). -rtrim(S) -> emqx_variform_str:rtrim(S). +rtrim(S) -> emqx_variform_bif:rtrim(S). -strlen(S) -> emqx_variform_str:strlen(S). +strlen(S) -> emqx_variform_bif:strlen(S). -substr(S, Start) -> emqx_variform_str:substr(S, Start). +substr(S, Start) -> emqx_variform_bif:substr(S, Start). -substr(S, Start, Length) -> emqx_variform_str:substr(S, Start, Length). +substr(S, Start, Length) -> emqx_variform_bif:substr(S, Start, Length). -trim(S) -> emqx_variform_str:trim(S). +trim(S) -> emqx_variform_bif:trim(S). -upper(S) -> emqx_variform_str:upper(S). +upper(S) -> emqx_variform_bif:upper(S). -split(S, P) -> emqx_variform_str:split(S, P). +split(S, P) -> emqx_variform_bif:split(S, P). -split(S, P, Position) -> emqx_variform_str:split(S, P, Position). +split(S, P, Position) -> emqx_variform_bif:split(S, P, Position). -tokens(S, Separators) -> emqx_variform_str:tokens(S, Separators). +tokens(S, Separators) -> emqx_variform_bif:tokens(S, Separators). -tokens(S, Separators, NoCRLF) -> emqx_variform_str:tokens(S, Separators, NoCRLF). +tokens(S, Separators, NoCRLF) -> emqx_variform_bif:tokens(S, Separators, NoCRLF). -concat(S1, S2) -> emqx_variform_str:concat(S1, S2). +concat(S1, S2) -> emqx_variform_bif:concat(S1, S2). -concat(List) -> emqx_variform_str:concat(List). +concat(List) -> emqx_variform_bif:concat(List). -sprintf_s(Format, Args) -> emqx_variform_str:sprintf_s(Format, Args). +sprintf_s(Format, Args) -> emqx_variform_bif:sprintf_s(Format, Args). -pad(S, Len) -> emqx_variform_str:pad(S, Len). +pad(S, Len) -> emqx_variform_bif:pad(S, Len). -pad(S, Len, Position) -> emqx_variform_str:pad(S, Len, Position). +pad(S, Len, Position) -> emqx_variform_bif:pad(S, Len, Position). -pad(S, Len, Position, Char) -> emqx_variform_str:pad(S, Len, Position, Char). +pad(S, Len, Position, Char) -> emqx_variform_bif:pad(S, Len, Position, Char). -replace(SrcStr, Pattern, RepStr) -> emqx_variform_str:replace(SrcStr, Pattern, RepStr). +replace(SrcStr, Pattern, RepStr) -> emqx_variform_bif:replace(SrcStr, Pattern, RepStr). replace(SrcStr, Pattern, RepStr, Position) -> - emqx_variform_str:replace(SrcStr, Pattern, RepStr, Position). + emqx_variform_bif:replace(SrcStr, Pattern, RepStr, Position). -regex_match(Str, RE) -> emqx_variform_str:regex_match(Str, RE). +regex_match(Str, RE) -> emqx_variform_bif:regex_match(Str, RE). -regex_replace(SrcStr, RE, RepStr) -> emqx_variform_str:regex_replace(SrcStr, RE, RepStr). +regex_replace(SrcStr, RE, RepStr) -> emqx_variform_bif:regex_replace(SrcStr, RE, RepStr). -ascii(Char) -> emqx_variform_str:ascii(Char). +ascii(Char) -> emqx_variform_bif:ascii(Char). -find(S, P) -> emqx_variform_str:find(S, P). +find(S, P) -> emqx_variform_bif:find(S, P). -find(S, P, Position) -> emqx_variform_str:find(S, P, Position). +find(S, P, Position) -> emqx_variform_bif:find(S, P, Position). -join_to_string(Str) -> emqx_variform_str:join_to_string(Str). +join_to_string(Str) -> emqx_variform_bif:join_to_string(Str). -join_to_string(Sep, List) -> emqx_variform_str:join_to_string(Sep, List). +join_to_string(Sep, List) -> emqx_variform_bif:join_to_string(Sep, List). join_to_sql_values_string(List) -> QuotedList = @@ -878,7 +878,7 @@ jq(FilterProgram, JSONBin) -> ]) ). -unescape(Str) -> emqx_variform_str:unescape(Str). +unescape(Str) -> emqx_variform_bif:unescape(Str). %%------------------------------------------------------------------------------ %% Array Funcs diff --git a/apps/emqx_utils/src/emqx_variform.erl b/apps/emqx_utils/src/emqx_variform.erl index 0a26f7480..09a673851 100644 --- a/apps/emqx_utils/src/emqx_variform.erl +++ b/apps/emqx_utils/src/emqx_variform.erl @@ -28,14 +28,35 @@ erase_allowed_module/1, erase_allowed_modules/1 ]). + -export([render/2, render/3]). +-export([compile/1, decompile/1]). + +-export_type([compiled/0]). + +-type compiled() :: #{expr := string(), form := term()}. +-define(BIF_MOD, emqx_variform_bif). +-define(IS_ALLOWED_MOD(M), + (M =:= ?BIF_MOD orelse + M =:= lists orelse + M =:= maps) +). + +-define(COALESCE_BADARG, + throw(#{ + reason => coalesce_badarg, + explain => + "must be an array, or a call to a function which returns an array, " + "for example: coalesce([a,b,c]) or coalesce(tokens(var,','))" + }) +). %% @doc Render a variform expression with bindings. %% A variform expression is a template string which supports variable substitution %% and function calls. %% %% The function calls are in the form of `module.function(arg1, arg2, ...)` where `module` -%% is optional, and if not provided, the function is assumed to be in the `emqx_variform_str` module. +%% is optional, and if not provided, the function is assumed to be in the `emqx_variform_bif` module. %% Both module and function must be existing atoms, and only whitelisted functions are allowed. %% %% A function arg can be a constant string or a number. @@ -49,18 +70,54 @@ %% %% For unresolved variables, empty string (but not "undefined") is used. %% In case of runtime exeption, an error is returned. +%% In case of unbound variable is referenced, error is returned. -spec render(string(), map()) -> {ok, binary()} | {error, term()}. render(Expression, Bindings) -> render(Expression, Bindings, #{}). -render(Expression, Bindings, Opts) when is_binary(Expression) -> - render(unicode:characters_to_list(Expression), Bindings, Opts); +render(#{form := Form}, Bindings, Opts) -> + eval_as_string(Form, Bindings, Opts); render(Expression, Bindings, Opts) -> + case compile(Expression) of + {ok, Compiled} -> + render(Compiled, Bindings, Opts); + {error, Reason} -> + {error, Reason} + end. + +eval_as_string(Expr, Bindings, _Opts) -> + try + {ok, return_str(eval(Expr, Bindings, #{}))} + catch + throw:Reason -> + {error, Reason}; + C:E:S -> + {error, #{exception => C, reason => E, stack_trace => S}} + end. + +%% Force the expression to return binary string. +return_str(Str) when is_binary(Str) -> Str; +return_str(Num) when is_integer(Num) -> integer_to_binary(Num); +return_str(Num) when is_float(Num) -> float_to_binary(Num, [{decimals, 10}, compact]); +return_str(Other) -> + throw(#{ + reason => bad_return, + expected => string, + got => Other + }). + +%% @doc Compile varifom expression. +-spec compile(string() | binary() | compiled()) -> {ok, compiled()} | {error, any()}. +compile(#{form := _} = Compiled) -> + {ok, Compiled}; +compile(Expression) when is_binary(Expression) -> + compile(unicode:characters_to_list(Expression)); +compile(Expression) -> case emqx_variform_scan:string(Expression) of {ok, Tokens, _Line} -> case emqx_variform_parser:parse(Tokens) of - {ok, Expr} -> - eval_as_string(Expr, Bindings, Opts); + {ok, Form} -> + {ok, #{expr => Expression, form => Form}}; {error, {_, emqx_variform_parser, Msg}} -> %% syntax error {error, lists:flatten(Msg)}; @@ -71,40 +128,59 @@ render(Expression, Bindings, Opts) -> {error, Reason} end. -eval_as_string(Expr, Bindings, _Opts) -> - try - {ok, str(eval(Expr, Bindings))} - catch - throw:Reason -> - {error, Reason}; - C:E:S -> - {error, #{exception => C, reason => E, stack_trace => S}} - end. +decompile(#{expr := Expression}) -> + Expression; +decompile(Expression) -> + Expression. -eval({str, Str}, _Bindings) -> - str(Str); -eval({integer, Num}, _Bindings) -> +eval({str, Str}, _Bindings, _Opts) -> + unicode:characters_to_binary(Str); +eval({integer, Num}, _Bindings, _Opts) -> Num; -eval({float, Num}, _Bindings) -> +eval({float, Num}, _Bindings, _Opts) -> Num; -eval({array, Args}, Bindings) -> - eval(Args, Bindings); -eval({call, FuncNameStr, Args}, Bindings) -> +eval({array, Args}, Bindings, Opts) -> + eval_loop(Args, Bindings, Opts); +eval({call, FuncNameStr, Args}, Bindings, Opts) -> {Mod, Fun} = resolve_func_name(FuncNameStr), ok = assert_func_exported(Mod, Fun, length(Args)), - call(Mod, Fun, eval(Args, Bindings)); -eval({var, VarName}, Bindings) -> - resolve_var_value(VarName, Bindings); -eval([Arg | Args], Bindings) -> - [eval(Arg, Bindings) | eval(Args, Bindings)]; -eval([], _Bindings) -> - []. + case {Mod, Fun} of + {?BIF_MOD, coalesce} -> + eval_coalesce(Args, Bindings, Opts); + _ -> + call(Mod, Fun, eval_loop(Args, Bindings, Opts)) + end; +eval({var, VarName}, Bindings, Opts) -> + resolve_var_value(VarName, Bindings, Opts). + +eval_loop([], _, _) -> []; +eval_loop([H | T], Bindings, Opts) -> [eval(H, Bindings, Opts) | eval_loop(T, Bindings, Opts)]. + +%% coalesce treats var_unbound exception as empty string '' +eval_coalesce([{array, Args}], Bindings, Opts) -> + NewArgs = [lists:map(fun(Arg) -> try_eval(Arg, Bindings, Opts) end, Args)], + call(?BIF_MOD, coalesce, NewArgs); +eval_coalesce([Arg], Bindings, Opts) -> + case try_eval(Arg, Bindings, Opts) of + List when is_list(List) -> + call(?BIF_MOD, coalesce, List); + <<>> -> + <<>>; + _ -> + ?COALESCE_BADARG + end; +eval_coalesce(_Args, _Bindings, _Opts) -> + ?COALESCE_BADARG. + +try_eval(Arg, Bindings, Opts) -> + try + eval(Arg, Bindings, Opts) + catch + throw:#{reason := var_unbound} -> + <<>> + end. %% Some functions accept arbitrary number of arguments but implemented as /1. -call(emqx_variform_str, concat, Args) -> - str(emqx_variform_str:concat(Args)); -call(emqx_variform_str, coalesce, Args) -> - str(emqx_variform_str:coalesce(Args)); call(Mod, Fun, Args) -> erlang:apply(Mod, Fun, Args). @@ -144,23 +220,23 @@ resolve_func_name(FuncNameStr) -> function => Fun }) end, - {emqx_variform_str, FuncName}; + {?BIF_MOD, FuncName}; _ -> throw(#{reason => invalid_function_reference, function => FuncNameStr}) end. -resolve_var_value(VarName, Bindings) -> +%% _Opts can be extended in the future. For example, unbound var as 'undfeined' +resolve_var_value(VarName, Bindings, _Opts) -> case emqx_template:lookup_var(split(VarName), Bindings) of {ok, Value} -> Value; {error, _Reason} -> - <<>> + throw(#{ + var_name => VarName, + reason => var_unbound + }) end. -assert_func_exported(emqx_variform_str, concat, _Arity) -> - ok; -assert_func_exported(emqx_variform_str, coalesce, _Arity) -> - ok; assert_func_exported(Mod, Fun, Arity) -> ok = try_load(Mod), case erlang:function_exported(Mod, Fun, Arity) of @@ -187,7 +263,7 @@ try_load(Mod) -> ok end. -assert_module_allowed(emqx_variform_str) -> +assert_module_allowed(Mod) when ?IS_ALLOWED_MOD(Mod) -> ok; assert_module_allowed(Mod) -> Allowed = get_allowed_modules(), @@ -220,8 +296,5 @@ erase_allowed_modules(Modules) when is_list(Modules) -> get_allowed_modules() -> persistent_term:get({emqx_variform, allowed_modules}, []). -str(Value) -> - emqx_utils_conv:bin(Value). - split(VarName) -> lists:map(fun erlang:iolist_to_binary/1, string:tokens(VarName, ".")). diff --git a/apps/emqx_utils/src/emqx_variform_str.erl b/apps/emqx_utils/src/emqx_variform_bif.erl similarity index 90% rename from apps/emqx_utils/src/emqx_variform_str.erl rename to apps/emqx_utils/src/emqx_variform_bif.erl index a53e1e216..fe5cb2369 100644 --- a/apps/emqx_utils/src/emqx_variform_str.erl +++ b/apps/emqx_utils/src/emqx_variform_bif.erl @@ -14,13 +14,11 @@ %% limitations under the License. %%-------------------------------------------------------------------- -%% Predefined functions string templating --module(emqx_variform_str). +%% Predefined functions for variform expressions. +-module(emqx_variform_bif). %% String Funcs -export([ - coalesce/1, - coalesce/2, lower/1, ltrim/1, ltrim/2, @@ -47,15 +45,22 @@ replace/4, regex_match/2, regex_replace/3, + regex_extract/2, ascii/1, find/2, find/3, join_to_string/1, join_to_string/2, unescape/1, - nth/2 + any_to_str/1 ]). +%% Array functions +-export([nth/2]). + +%% Control functions +-export([coalesce/1, coalesce/2]). + -define(IS_EMPTY(X), (X =:= <<>> orelse X =:= "" orelse X =:= undefined)). %%------------------------------------------------------------------------------ @@ -143,8 +148,10 @@ tokens(S, Separators, <<"nocrlf">>) -> concat(S1, S2) -> concat([S1, S2]). +%% @doc Concatenate a list of strings. +%% NOTE: it converts non-string elements to Erlang term literals for backward compatibility concat(List) -> - unicode:characters_to_binary(lists:map(fun str/1, List), unicode). + unicode:characters_to_binary(lists:map(fun any_to_str/1, List), unicode). sprintf_s(Format, Args) when is_list(Args) -> erlang:iolist_to_binary(io_lib:format(binary_to_list(Format), Args)). @@ -190,6 +197,22 @@ regex_match(Str, RE) -> regex_replace(SrcStr, RE, RepStr) -> re:replace(SrcStr, RE, RepStr, [global, {return, binary}]). +%% @doc Searches the string Str for patterns specified by Regexp. +%% If matches are found, it returns a list of all captured groups from these matches. +%% If no matches are found or there are no groups captured, it returns an empty list. +%% This function can be used to extract parts of a string based on a regular expression, +%% excluding the complete match itself. +%% Examples: +%% ("Number: 12345", "(\\d+)") -> [<<"12345">>] +%% ("Hello, world!", "(\\w+)") -> [<<"Hello">>, <<"world">>] +%% ("No numbers here!", "(\\d+)") -> [] +%% ("Date: 2021-05-20", "(\\d{4})-(\\d{2})-(\\d{2})") -> [<<"2021">>, <<"05">>, <<"20">>] +regex_extract(Str, Regexp) -> + case re:run(Str, Regexp, [{capture, all_but_first, list}]) of + {match, [_ | _] = L} -> lists:map(fun erlang:iolist_to_binary/1, L); + _ -> [] + end. + ascii(Char) when is_binary(Char) -> [FirstC | _] = binary_to_list(Char), FirstC. @@ -212,7 +235,7 @@ join_to_string(List) when is_list(List) -> join_to_string(<<", ">>, List). join_to_string(Sep, List) when is_list(List), is_binary(Sep) -> - iolist_to_binary(lists:join(Sep, [str(Item) || Item <- List])). + iolist_to_binary(lists:join(Sep, [any_to_str(Item) || Item <- List])). unescape(Bin) when is_binary(Bin) -> UnicodeList = unicode:characters_to_list(Bin, utf8), @@ -364,5 +387,5 @@ is_hex_digit(_) -> false. %% Data Type Conversion Funcs %%------------------------------------------------------------------------------ -str(Data) -> +any_to_str(Data) -> emqx_utils_conv:bin(Data). diff --git a/apps/emqx_utils/test/emqx_variform_bif_tests.erl b/apps/emqx_utils/test/emqx_variform_bif_tests.erl new file mode 100644 index 000000000..b74f6fcac --- /dev/null +++ b/apps/emqx_utils/test/emqx_variform_bif_tests.erl @@ -0,0 +1,59 @@ +%%-------------------------------------------------------------------- +%% Copyright (c) 2024 EMQ Technologies Co., Ltd. All Rights Reserved. +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. +%%-------------------------------------------------------------------- + +%% Most of the functions are tested as rule-engine string funcs +-module(emqx_variform_bif_tests). + +-compile(export_all). +-compile(nowarn_export_all). + +-include_lib("eunit/include/eunit.hrl"). + +regex_extract_test_() -> + [ + ?_assertEqual([<<"12345">>], regex_extract("Order number: 12345", "(\\d+)")), + ?_assertEqual( + [<<"Hello">>, <<"world">>], regex_extract("Hello, world!", "(\\w+).*\s(\\w+)") + ), + ?_assertEqual([], regex_extract("No numbers here!", "(\\d+)")), + ?_assertEqual( + [<<"2021">>, <<"05">>, <<"20">>], + regex_extract("Date: 2021-05-20", "(\\d{4})-(\\d{2})-(\\d{2})") + ), + ?_assertEqual([<<"Hello">>], regex_extract("Hello, world!", "(Hello)")), + ?_assertEqual( + [<<"12">>, <<"34">>], regex_extract("Items: 12, Price: 34", "(\\d+).*\s(\\d+)") + ), + ?_assertEqual( + [<<"john.doe@example.com">>], + regex_extract("Contact: john.doe@example.com", "([\\w\\.]+@[\\w\\.]+)") + ), + ?_assertEqual([], regex_extract("Just some text, nothing more.", "([A-Z]\\d{3})")), + ?_assertEqual( + [<<"admin">>, <<"1234">>], + regex_extract("User: admin, Pass: 1234", "User: (\\w+), Pass: (\\d+)") + ), + ?_assertEqual([], regex_extract("", "(\\d+)")), + ?_assertEqual([], regex_extract("$$$###!!!", "(\\d+)")), + ?_assertEqual([<<"23.1">>], regex_extract("Erlang 23.1 version", "(\\d+\\.\\d+)")), + ?_assertEqual( + [<<"192.168.1.1">>], + regex_extract("Server IP: 192.168.1.1 at port 8080", "(\\d+\\.\\d+\\.\\d+\\.\\d+)") + ) + ]. + +regex_extract(Str, RegEx) -> + emqx_variform_bif:regex_extract(Str, RegEx). diff --git a/apps/emqx_utils/test/emqx_variform_tests.erl b/apps/emqx_utils/test/emqx_variform_tests.erl index 72fbf2637..91da471c9 100644 --- a/apps/emqx_utils/test/emqx_variform_tests.erl +++ b/apps/emqx_utils/test/emqx_variform_tests.erl @@ -27,14 +27,16 @@ redner_test_() -> [ {"direct var reference", fun() -> ?assertEqual({ok, <<"1">>}, render("a", #{a => 1})) end}, {"concat strings", fun() -> - ?assertEqual({ok, <<"a,b">>}, render("concat('a',',','b')", #{})) + ?assertEqual({ok, <<"a,b">>}, render("concat(['a',',','b'])", #{})) + end}, + {"concat empty string", fun() -> + ?assertEqual({ok, <<"">>}, render("concat([''])", #{})) end}, - {"concat empty string", fun() -> ?assertEqual({ok, <<"">>}, render("concat('')", #{})) end}, {"tokens 1st", fun() -> ?assertEqual({ok, <<"a">>}, render("nth(1,tokens(var, ','))", #{var => <<"a,b">>})) end}, - {"unknown var as empty str", fun() -> - ?assertEqual({ok, <<>>}, render("var", #{})) + {"unknown var return error", fun() -> + ?assertMatch({error, #{reason := var_unbound}}, render("var", #{})) end}, {"out of range nth index", fun() -> ?assertEqual({ok, <<>>}, render("nth(2, tokens(var, ','))", #{var => <<"a">>})) @@ -97,7 +99,7 @@ unknown_func_test_() -> {"unknown function in a known module", fun() -> ?assertMatch( {error, #{reason := unknown_variform_function}}, - render("emqx_variform_str.nonexistingatom__(a)", #{}) + render("emqx_variform_bif.nonexistingatom__(a)", #{}) ) end}, {"invalid func reference", fun() -> @@ -133,19 +135,39 @@ inject_allowed_module_test() -> coalesce_test_() -> [ - {"coalesce first", fun() -> - ?assertEqual({ok, <<"a">>}, render("coalesce('a','b')", #{})) + {"first", fun() -> + ?assertEqual({ok, <<"a">>}, render("coalesce(['a','b'])", #{})) end}, - {"coalesce second", fun() -> - ?assertEqual({ok, <<"b">>}, render("coalesce('', 'b')", #{})) + {"second", fun() -> + ?assertEqual({ok, <<"b">>}, render("coalesce(['', 'b'])", #{})) end}, - {"coalesce first var", fun() -> - ?assertEqual({ok, <<"a">>}, render("coalesce(a,b)", #{a => <<"a">>, b => <<"b">>})) + {"first var", fun() -> + ?assertEqual({ok, <<"a">>}, render("coalesce([a,b])", #{a => <<"a">>, b => <<"b">>})) end}, - {"coalesce second var", fun() -> - ?assertEqual({ok, <<"b">>}, render("coalesce(a,b)", #{b => <<"b">>})) + {"second var", fun() -> + ?assertEqual({ok, <<"b">>}, render("coalesce([a,b])", #{b => <<"b">>})) end}, - {"coalesce empty", fun() -> ?assertEqual({ok, <<>>}, render("coalesce(a,b)", #{})) end} + {"empty", fun() -> ?assertEqual({ok, <<>>}, render("coalesce([a,b])", #{})) end}, + {"arg from other func", fun() -> + ?assertEqual({ok, <<"b">>}, render("coalesce(tokens(a,','))", #{a => <<",,b,c">>})) + end}, + {"var unbound", fun() -> ?assertEqual({ok, <<>>}, render("coalesce(a)", #{})) end}, + {"var unbound in call", fun() -> + ?assertEqual({ok, <<>>}, render("coalesce(concat(a))", #{})) + end}, + {"var unbound in calls", fun() -> + ?assertEqual({ok, <<"c">>}, render("coalesce([any_to_str(a),any_to_str(b),'c'])", #{})) + end}, + {"badarg", fun() -> + ?assertMatch( + {error, #{reason := coalesce_badarg}}, render("coalesce(a,b)", #{a => 1, b => 2}) + ) + end}, + {"badarg from return", fun() -> + ?assertMatch( + {error, #{reason := coalesce_badarg}}, render("coalesce(any_to_str(a))", #{a => 1}) + ) + end} ]. syntax_error_test_() -> diff --git a/changes/ce/feat-12750.en.md b/changes/ce/feat-12872.en.md similarity index 93% rename from changes/ce/feat-12750.en.md rename to changes/ce/feat-12872.en.md index d0a70e6fc..dfc799bb2 100644 --- a/changes/ce/feat-12750.en.md +++ b/changes/ce/feat-12872.en.md @@ -7,8 +7,8 @@ an MQTT connection. ### Initialization of `client_attrs` -- The `client_attrs` fields can be initially populated based on the configuration from one of the - following sources: +- The `client_attrs` fields can be initially populated from one of the + following `clientinfo` fields: - `cn`: The common name from the TLS client's certificate. - `dn`: The distinguished name from the TLS client's certificate, that is, the certificate "Subject". - `clientid`: The MQTT client ID provided by the client. diff --git a/rel/i18n/emqx_schema.hocon b/rel/i18n/emqx_schema.hocon index 90fbfeefc..0868bf93c 100644 --- a/rel/i18n/emqx_schema.hocon +++ b/rel/i18n/emqx_schema.hocon @@ -1575,48 +1575,37 @@ client_attrs_init { label: "Client Attributes Initialization" desc: """~ Specify how to initialize client attributes. - This config accepts one initialization rule, or a list of rules. - Client attributes can be initialized as `client_attrs.NAME`, - where `NAME` is the name of the attribute specified in the config `extract_as`. + Each client attribute can be initialized as `client_attrs.{NAME}`, + where `{NAME}` is the name of the attribute specified in the config field `set_as_attr`. The initialized client attribute will be stored in the `client_attrs` property with the specified name, and can be used as a placeholder in a template for authentication and authorization. - For example, use `${client_attrs.alias}` to render an HTTP POST body when `extract_as = alias`, + For example, use `${client_attrs.alias}` to render an HTTP POST body when `set_as_attr = alias`, or render listener config `moutpoint = devices/${client_attrs.alias}/` to initialize a per-client topic namespace.""" } -client_attrs_init_extract_from { - label: "Client Property to Extract Attribute" - desc: """~ - Specify from which client property the client attribute should be extracted. - - Supported values: - - `clientid`: Extract from the client ID. - - `username`: Extract from the username. - - `cn`: Extract from the Common Name (CN) field of the client certificate. - - `dn`: Extract from the Distinguished Name (DN) field of the client certificate. - - `user_property`: Extract from the user property sent in the MQTT v5 `CONNECT` packet. - In this case, `extract_regexp` is not applicable, and `extract_as` should be the user property key. - - NOTE: this extraction happens **after** `clientid` or `username` is initialized - from `peer_cert_as_clientid` or `peer_cert_as_username` config.""" -} - -client_attrs_init_extract_regexp { +client_attrs_init_expression { label: "Client Attribute Extraction Regular Expression" desc: """~ - The regular expression to extract a client attribute from the client property specified by `client_attrs_init.extract_from` config. - The expression should match the entire client property value, and capturing groups are concatenated to make the client attribute. - For example if the client attribute is the first part of the client ID delimited by a dash, the regular expression would be `^(.+?)-.*$`. - Note that failure to match the regular expression will result in the client attribute being absent but not an empty string. - Note also that currently only printable ASCII characters are allowed as input for the regular expression extraction.""" + A one line expression to evaluate a set of predefined string functions (like in the rule engine SQL statements). + The expression accepts direct variable reference, or one function call with nested calls for its arguments, + but it does not provide variable binding or user-defined functions and pre-bound variables. + For example, to extract the prefix of client ID delimited by a dot: `nth(1, tokens(clientid, '.'))`. + + The variables pre-bound variables are: + - `cn`: Client's TLS certificate common name. + - `dn`: Client's TLS certificate distinguished name (the subject). + - `clientid`: MQTT Client ID. + - `username`: MQTT Client's username. + - `user_property.{NAME}`: User properties in the CONNECT packet. + + You can read more about variform expressions in EMQX docs.""" } -client_attrs_init_extract_as { +client_attrs_init_set_as_attr { label: "Name The Extracted Attribute" desc: """~ - The name of the client attribute extracted from the client property specified by `client_attrs_init.extract_from` config. - The extracted attribute will be stored in the `client_attrs` property with this name. - In case `extract_from = user_property`, this should be the key of the user property.""" + The name of the client attribute extracted from the client data. + The extracted attribute will be stored in the `client_attrs` property with this name.""" } } diff --git a/scripts/spellcheck/dicts/emqx.txt b/scripts/spellcheck/dicts/emqx.txt index 7e8fed96f..c7c266434 100644 --- a/scripts/spellcheck/dicts/emqx.txt +++ b/scripts/spellcheck/dicts/emqx.txt @@ -259,6 +259,7 @@ uplink url utc util +variform ver vm vsn From 8ed397d4fa9bf67d5a8d60c88f74ccebcedaf3a3 Mon Sep 17 00:00:00 2001 From: JimMoen Date: Mon, 15 Apr 2024 14:46:55 +0800 Subject: [PATCH 137/234] fix(influx): literal number values in tag set --- .../src/emqx_bridge_influxdb_connector.erl | 40 ++++++++++++------- 1 file changed, 26 insertions(+), 14 deletions(-) diff --git a/apps/emqx_bridge_influxdb/src/emqx_bridge_influxdb_connector.erl b/apps/emqx_bridge_influxdb/src/emqx_bridge_influxdb_connector.erl index e17b9b3fe..94419c7d9 100644 --- a/apps/emqx_bridge_influxdb/src/emqx_bridge_influxdb_connector.erl +++ b/apps/emqx_bridge_influxdb/src/emqx_bridge_influxdb_connector.erl @@ -59,6 +59,9 @@ -define(DEFAULT_TIMESTAMP_TMPL, "${timestamp}"). +-define(set_tag, set_tag). +-define(set_field, set_field). + -define(IS_HTTP_ERROR(STATUS_CODE), (is_integer(STATUS_CODE) andalso (STATUS_CODE < 200 orelse STATUS_CODE >= 300)) @@ -710,8 +713,8 @@ line_to_point( precision := Precision } = Item ) -> - {_, EncodedTags} = maps:fold(fun maps_config_to_data/3, {Data, #{}}, Tags), - {_, EncodedFields} = maps:fold(fun maps_config_to_data/3, {Data, #{}}, Fields), + {_, EncodedTags, _} = maps:fold(fun maps_config_to_data/3, {Data, #{}, ?set_tag}, Tags), + {_, EncodedFields, _} = maps:fold(fun maps_config_to_data/3, {Data, #{}, ?set_field}, Fields), maps:without([precision], Item#{ measurement => emqx_placeholder:proc_tmpl(Measurement, Data), tags => EncodedTags, @@ -727,34 +730,43 @@ time_unit(ms) -> millisecond; time_unit(us) -> microsecond; time_unit(ns) -> nanosecond. -maps_config_to_data(K, V, {Data, Res}) -> +maps_config_to_data(K, V, {Data, Res, SetType}) -> KTransOptions = #{return => rawlist, var_trans => fun key_filter/1}, VTransOptions = #{return => rawlist, var_trans => fun data_filter/1}, NK = emqx_placeholder:proc_tmpl(K, Data, KTransOptions), NV = proc_quoted(V, Data, VTransOptions), case {NK, NV} of {[undefined], _} -> - {Data, Res}; + {Data, Res, SetType}; %% undefined value in normal format [undefined] or int/uint format [undefined, <<"i">>] {_, [undefined | _]} -> - {Data, Res}; + {Data, Res, SetType}; {_, {quoted, [undefined | _]}} -> - {Data, Res}; + {Data, Res, SetType}; _ -> - {Data, Res#{ - list_to_binary(NK) => value_type(NV, tmpl_type(V)) - }} + NRes = Res#{ + list_to_binary(NK) => value_type(NV, #{ + tmpl_type => tmpl_type(V), set_type => SetType + }) + }, + {Data, NRes, SetType} end. +value_type([Number], #{set_type := ?set_tag}) when is_number(Number) -> + %% all `tag` values are treated as string + %% See also: https://docs.influxdata.com/influxdb/v2/reference/syntax/line-protocol/#tag-set + emqx_utils_conv:bin(Number); +value_type([Str], #{set_type := ?set_tag}) when is_binary(Str) -> + Str; value_type({quoted, ValList}, _) -> {string_list, ValList}; -value_type([Int, <<"i">>], mixed) when is_integer(Int) -> +value_type([Int, <<"i">>], #{tmpl_type := mixed}) when is_integer(Int) -> {int, Int}; -value_type([UInt, <<"u">>], mixed) when is_integer(UInt) -> +value_type([UInt, <<"u">>], #{tmpl_type := mixed}) when is_integer(UInt) -> {uint, UInt}; %% write `1`, `1.0`, `-1.0` all as float %% see also: https://docs.influxdata.com/influxdb/v2.7/reference/syntax/line-protocol/#float -value_type([Number], _) when is_number(Number) -> +value_type([Number], #{set_type := ?set_field}) when is_number(Number) -> {float, Number}; value_type([<<"t">>], _) -> 't'; @@ -776,9 +788,9 @@ value_type([<<"FALSE">>], _) -> 'FALSE'; value_type([<<"False">>], _) -> 'False'; -value_type([Str], variable) when is_binary(Str) -> +value_type([Str], #{tmpl_type := variable}) when is_binary(Str) -> Str; -value_type([Str], literal) when is_binary(Str) -> +value_type([Str], #{tmpl_type := literal, set_type := ?set_field}) when is_binary(Str) -> %% if Str is a literal string suffixed with `i` or `u`, we should convert it to int/uint. %% otherwise, we should convert it to float. NumStr = binary:part(Str, 0, byte_size(Str) - 1), From 084e920c6eab60a53eda552756cb28a6569909ea Mon Sep 17 00:00:00 2001 From: JimMoen Date: Mon, 15 Apr 2024 17:45:42 +0800 Subject: [PATCH 138/234] test(influx): literal values or variable in tag set --- .../test/emqx_bridge_influxdb_SUITE.erl | 47 +++++++++++++++++++ 1 file changed, 47 insertions(+) diff --git a/apps/emqx_bridge_influxdb/test/emqx_bridge_influxdb_SUITE.erl b/apps/emqx_bridge_influxdb/test/emqx_bridge_influxdb_SUITE.erl index e30e8b361..3d50282ab 100644 --- a/apps/emqx_bridge_influxdb/test/emqx_bridge_influxdb_SUITE.erl +++ b/apps/emqx_bridge_influxdb/test/emqx_bridge_influxdb_SUITE.erl @@ -864,6 +864,53 @@ t_any_num_as_float(Config) -> TimeReturned = pad_zero(TimeReturned0), ?assertEqual(TsStr, TimeReturned). +t_tag_set_use_literal_value(Config) -> + QueryMode = ?config(query_mode, Config), + Const = erlang:system_time(nanosecond), + ConstBin = integer_to_binary(Const), + TsStr = iolist_to_binary( + calendar:system_time_to_rfc3339(Const, [{unit, nanosecond}, {offset, "Z"}]) + ), + ?assertMatch( + {ok, _}, + create_bridge( + Config, + #{ + <<"write_syntax">> => + <<"mqtt,clientid=${clientid},tag_key1=100,tag_key2=123.4,tag_key3=66i,tag_key4=${payload.float_dp}", + " ", + "field_key1=100.1,field_key2=100i,field_key3=${payload.float_dp},bar=5i", + " ", ConstBin/binary>> + } + ) + ), + ClientId = emqx_guid:to_hexstr(emqx_guid:gen()), + Payload = #{ + %% with decimal point + float_dp => 123.4 + }, + SentData = #{ + <<"clientid">> => ClientId, + <<"topic">> => atom_to_binary(?FUNCTION_NAME), + <<"payload">> => Payload, + <<"timestamp">> => erlang:system_time(millisecond) + }, + case QueryMode of + sync -> + ?assertMatch({ok, 204, _}, send_message(Config, SentData)), + ok; + async -> + ?assertMatch(ok, send_message(Config, SentData)) + end, + %% sleep is still need even in sync mode, or we would get an empty result sometimes + ct:sleep(1500), + PersistedData = query_by_clientid(ClientId, Config), + Expected = #{field_key1 => <<"100.1">>, field_key2 => <<"100">>, field_key3 => <<"123.4">>}, + assert_persisted_data(ClientId, Expected, PersistedData), + TimeReturned0 = maps:get(<<"_time">>, maps:get(<<"field_key1">>, PersistedData)), + TimeReturned = pad_zero(TimeReturned0), + ?assertEqual(TsStr, TimeReturned). + t_bad_timestamp(Config) -> InfluxDBType = ?config(influxdb_type, Config), InfluxDBName = ?config(influxdb_name, Config), From 13ab9f098781985748a21ecb5bfc1981f0da63a7 Mon Sep 17 00:00:00 2001 From: JimMoen Date: Mon, 15 Apr 2024 18:08:05 +0800 Subject: [PATCH 139/234] docs: add changelog entry for #12880 --- changes/fix-12880.en.md | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 changes/fix-12880.en.md diff --git a/changes/fix-12880.en.md b/changes/fix-12880.en.md new file mode 100644 index 000000000..7d7a53777 --- /dev/null +++ b/changes/fix-12880.en.md @@ -0,0 +1,3 @@ +Fixed the issue where serialization failed when the value in the tag set used a literal value (int or float) in the influxdb action configuration. + +Which Tag Set value's type is always `String`. See also: [Line Protocol - Tag Set](https://docs.influxdata.com/influxdb/v2/reference/syntax/line-protocol/#tag-set) From c4d1360b96b18925151c45b61691ab8c5608b340 Mon Sep 17 00:00:00 2001 From: Andrew Mayorov Date: Mon, 15 Apr 2024 16:42:29 +0200 Subject: [PATCH 140/234] fix(dsrepl): trigger election for new ra servers unconditionallly Otherwise we might end up in a situation when there's no member online yet at the time of the election trigger, and the election will never happen. --- .../src/emqx_ds_replication_layer_shard.erl | 38 ++++++++----------- .../test/emqx_ds_replication_SUITE.erl | 4 +- 2 files changed, 17 insertions(+), 25 deletions(-) diff --git a/apps/emqx_durable_storage/src/emqx_ds_replication_layer_shard.erl b/apps/emqx_durable_storage/src/emqx_ds_replication_layer_shard.erl index f4c0d3b01..20d9ef481 100644 --- a/apps/emqx_durable_storage/src/emqx_ds_replication_layer_shard.erl +++ b/apps/emqx_durable_storage/src/emqx_ds_replication_layer_shard.erl @@ -341,29 +341,21 @@ start_shard(DB, Shard, #{replication_options := ReplicationOpts}) -> log_init_args => LogOpts }) end, - case Servers of - [LocalServer | _] -> - %% TODO - %% Not super robust, but we probably don't expect nodes to be down - %% when we bring up a fresh consensus group. Triggering election - %% is not really required otherwise. - %% TODO - %% Ensure that doing that on node restart does not disrupt consensus. - %% Edit: looks like it doesn't, this could actually be quite useful - %% to "steal" leadership from nodes that have too much leader load. - %% TODO - %% It doesn't really work that way. There's `ra:transfer_leadership/2` - %% for that. - try - ra:trigger_election(LocalServer, _Timeout = 1_000) - catch - %% TODO - %% Tolerating exceptions because server might be occupied with log - %% replay for a while. - exit:{timeout, _} when not Bootstrap -> - ok - end; - _ -> + %% NOTE + %% Triggering election is necessary when a new consensus group is being brought up. + %% TODO + %% It's probably a good idea to rebalance leaders across the cluster from time to + %% time. There's `ra:transfer_leadership/2` for that. + try Bootstrap andalso ra:trigger_election(LocalServer, _Timeout = 1_000) of + false -> + ok; + ok -> + ok + catch + %% TODO + %% Tolerating exceptions because server might be occupied with log replay for + %% a while. + exit:{timeout, _} when not Bootstrap -> ok end. diff --git a/apps/emqx_durable_storage/test/emqx_ds_replication_SUITE.erl b/apps/emqx_durable_storage/test/emqx_ds_replication_SUITE.erl index 9fc55d170..3b0e37c7f 100644 --- a/apps/emqx_durable_storage/test/emqx_ds_replication_SUITE.erl +++ b/apps/emqx_durable_storage/test/emqx_ds_replication_SUITE.erl @@ -435,8 +435,8 @@ t_rebalance_offline_restarts(Config) -> erpc:multicall(Nodes, emqx_ds, open_db, [?DB, Opts]) ), ?retry( - 500, - 10, + 1000, + 5, ?assertEqual([8 || _ <- Nodes], [n_shards_online(N, ?DB) || N <- Nodes]) ), From 89f42f117198645b8f08d59b1abad83bd21e7668 Mon Sep 17 00:00:00 2001 From: Andrew Mayorov Date: Mon, 15 Apr 2024 16:43:52 +0200 Subject: [PATCH 141/234] fix(dsrepl): make placeholder shard process permanent under supervisor --- apps/emqx_durable_storage/src/emqx_ds_builtin_db_sup.erl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/apps/emqx_durable_storage/src/emqx_ds_builtin_db_sup.erl b/apps/emqx_durable_storage/src/emqx_ds_builtin_db_sup.erl index 195db7c34..2dd9ae332 100644 --- a/apps/emqx_durable_storage/src/emqx_ds_builtin_db_sup.erl +++ b/apps/emqx_durable_storage/src/emqx_ds_builtin_db_sup.erl @@ -232,7 +232,7 @@ shard_replication_spec(DB, Shard, Opts) -> #{ id => {Shard, replication}, start => {emqx_ds_replication_layer_shard, start_link, [DB, Shard, Opts]}, - restart => transient, + restart => permanent, type => worker }. From f80d078de348670eae93e5372616a1ed2e681e87 Mon Sep 17 00:00:00 2001 From: zmstone Date: Sun, 14 Apr 2024 15:17:43 +0200 Subject: [PATCH 142/234] feat(variform): Add more functions --- apps/emqx_rule_engine/src/emqx_rule_funcs.erl | 26 +--- apps/emqx_utils/src/emqx_variform_bif.erl | 134 ++++++++++++++++++ .../test/emqx_variform_bif_tests.erl | 15 ++ apps/emqx_utils/test/emqx_variform_tests.erl | 36 +++++ 4 files changed, 192 insertions(+), 19 deletions(-) diff --git a/apps/emqx_rule_engine/src/emqx_rule_funcs.erl b/apps/emqx_rule_engine/src/emqx_rule_funcs.erl index 414a3d620..4e28efb5f 100644 --- a/apps/emqx_rule_engine/src/emqx_rule_funcs.erl +++ b/apps/emqx_rule_engine/src/emqx_rule_funcs.erl @@ -202,7 +202,8 @@ -export([ md5/1, sha/1, - sha256/1 + sha256/1, + hash/2 ]). %% zip Funcs @@ -710,24 +711,11 @@ map(Map = #{}) -> map(Data) -> error(badarg, [Data]). -bin2hexstr(Bin) when is_binary(Bin) -> - emqx_utils:bin_to_hexstr(Bin, upper); -%% If Bin is a bitstring which is not divisible by 8, we pad it and then do the -%% conversion -bin2hexstr(Bin) when is_bitstring(Bin), (8 - (bit_size(Bin) rem 8)) >= 4 -> - PadSize = 8 - (bit_size(Bin) rem 8), - Padding = <<0:PadSize>>, - BinToConvert = <>, - <<_FirstByte:8, HexStr/binary>> = emqx_utils:bin_to_hexstr(BinToConvert, upper), - HexStr; -bin2hexstr(Bin) when is_bitstring(Bin) -> - PadSize = 8 - (bit_size(Bin) rem 8), - Padding = <<0:PadSize>>, - BinToConvert = <>, - emqx_utils:bin_to_hexstr(BinToConvert, upper). +bin2hexstr(Bin) -> + emqx_variform_bif:bin2hexstr(Bin). -hexstr2bin(Str) when is_binary(Str) -> - emqx_utils:hexstr_to_bin(Str). +hexstr2bin(Str) -> + emqx_variform_bif:hexstr2bin(Str). %%------------------------------------------------------------------------------ %% NULL Funcs @@ -1001,7 +989,7 @@ sha256(S) when is_binary(S) -> hash(sha256, S). hash(Type, Data) -> - emqx_utils:bin_to_hexstr(crypto:hash(Type, Data), lower). + emqx_variform_bif:hash(Type, Data). %%------------------------------------------------------------------------------ %% gzip Funcs diff --git a/apps/emqx_utils/src/emqx_variform_bif.erl b/apps/emqx_utils/src/emqx_variform_bif.erl index fe5cb2369..5c598efbd 100644 --- a/apps/emqx_utils/src/emqx_variform_bif.erl +++ b/apps/emqx_utils/src/emqx_variform_bif.erl @@ -61,6 +61,21 @@ %% Control functions -export([coalesce/1, coalesce/2]). +%% Random functions +-export([rand_str/1, rand_int/1]). + +%% Schema-less encod/decode +-export([ + bin2hexstr/1, + hexstr2bin/1, + int2hexstr/1, + base64_encode/1, + base64_decode/1 +]). + +%% Hash functions +-export([hash/2, hash_to_range/3, map_to_range/3]). + -define(IS_EMPTY(X), (X =:= <<>> orelse X =:= "" orelse X =:= undefined)). %%------------------------------------------------------------------------------ @@ -389,3 +404,122 @@ is_hex_digit(_) -> false. any_to_str(Data) -> emqx_utils_conv:bin(Data). + +%%------------------------------------------------------------------------------ +%% Random functions +%%------------------------------------------------------------------------------ + +%% @doc Make a random string with urlsafe-base64 charset. +rand_str(Length) when is_integer(Length) andalso Length > 0 -> + RawBytes = erlang:ceil((Length * 3) / 4), + RandomData = rand:bytes(RawBytes), + urlsafe(binary:part(base64_encode(RandomData), 0, Length)); +rand_str(_) -> + throw(#{reason => badarg, function => ?FUNCTION_NAME}). + +%% @doc Make a random integer in the range `[1, N]`. +rand_int(N) when is_integer(N) andalso N >= 1 -> + rand:uniform(N); +rand_int(N) -> + throw(#{reason => badarg, function => ?FUNCTION_NAME, expected => "positive integer", got => N}). + +%% TODO: call base64:encode(Bin, #{mode => urlsafe, padding => false}) +%% when oldest OTP to support is 26 or newer. +urlsafe(Str0) -> + Str = replace(Str0, <<"+">>, <<"-">>), + replace(Str, <<"/">>, <<"_">>). + +%%------------------------------------------------------------------------------ +%% Data encoding +%%------------------------------------------------------------------------------ + +%% @doc Encode an integer to hex string. e.g. 15 as 'f' +int2hexstr(Int) -> + erlang:integer_to_binary(Int, 16). + +%% @doc Encode bytes in hex string format. +bin2hexstr(Bin) when is_binary(Bin) -> + emqx_utils:bin_to_hexstr(Bin, upper); +%% If Bin is a bitstring which is not divisible by 8, we pad it and then do the +%% conversion +bin2hexstr(Bin) when is_bitstring(Bin), (8 - (bit_size(Bin) rem 8)) >= 4 -> + PadSize = 8 - (bit_size(Bin) rem 8), + Padding = <<0:PadSize>>, + BinToConvert = <>, + <<_FirstByte:8, HexStr/binary>> = emqx_utils:bin_to_hexstr(BinToConvert, upper), + HexStr; +bin2hexstr(Bin) when is_bitstring(Bin) -> + PadSize = 8 - (bit_size(Bin) rem 8), + Padding = <<0:PadSize>>, + BinToConvert = <>, + emqx_utils:bin_to_hexstr(BinToConvert, upper). + +%% @doc Decode hex string into its original bytes. +hexstr2bin(Str) when is_binary(Str) -> + emqx_utils:hexstr_to_bin(Str). + +%% @doc Encode any bytes to base64. +base64_encode(Bin) -> + base64:encode(Bin). + +%% @doc Decode base64 encoded string. +base64_decode(Bin) -> + base64:decode(Bin). + +%%------------------------------------------------------------------------------ +%% Hash functions +%%------------------------------------------------------------------------------ + +%% @doc Hash with all available algorithm provided by crypto module. +%% Return hex format string. +%% - md4 | md5 +%% - sha (sha1) +%% - sha224 | sha256 | sha384 | sha512 +%% - sha3_224 | sha3_256 | sha3_384 | sha3_512 +%% - shake128 | shake256 +%% - blake2b | blake2s +hash(<<"sha1">>, Bin) -> + hash(sha, Bin); +hash(Algorithm, Bin) when is_binary(Algorithm) -> + Type = + try + binary_to_existing_atom(Algorithm) + catch + _:_ -> + throw(#{ + reason => unknown_hash_algorithm, + algorithm => Algorithm + }) + end, + hash(Type, Bin); +hash(Type, Bin) when is_atom(Type) -> + %% lower is for backward compatibility + emqx_utils:bin_to_hexstr(crypto:hash(Type, Bin), lower). + +%% @doc Hash binary data to an integer within a specified range [Min, Max] +hash_to_range(Bin, Min, Max) when + is_binary(Bin) andalso + size(Bin) > 0 andalso + is_integer(Min) andalso + is_integer(Max) andalso + Min =< Max +-> + Hash = hash(sha256, Bin), + HashNum = binary_to_integer(Hash, 16), + map_to_range(HashNum, Min, Max); +hash_to_range(_, _, _) -> + throw(#{reason => badarg, function => ?FUNCTION_NAME}). + +map_to_range(Bin, Min, Max) when is_binary(Bin) andalso size(Bin) > 0 -> + HashNum = binary:decode_unsigned(Bin), + map_to_range(HashNum, Min, Max); +map_to_range(Int, Min, Max) when + is_integer(Int) andalso + is_integer(Min) andalso + is_integer(Max) andalso + Min =< Max +-> + Range = Max - Min + 1, + Min + (Int rem Range); +map_to_range(_, _, _) -> + throw(#{reason => badarg, function => ?FUNCTION_NAME}). diff --git a/apps/emqx_utils/test/emqx_variform_bif_tests.erl b/apps/emqx_utils/test/emqx_variform_bif_tests.erl index b74f6fcac..92144ff43 100644 --- a/apps/emqx_utils/test/emqx_variform_bif_tests.erl +++ b/apps/emqx_utils/test/emqx_variform_bif_tests.erl @@ -57,3 +57,18 @@ regex_extract_test_() -> regex_extract(Str, RegEx) -> emqx_variform_bif:regex_extract(Str, RegEx). + +rand_str_test() -> + ?assertEqual(3, size(emqx_variform_bif:rand_str(3))), + ?assertThrow(#{reason := badarg}, size(emqx_variform_bif:rand_str(0))). + +rand_int_test() -> + N = emqx_variform_bif:rand_int(10), + ?assert(N =< 10 andalso N >= 1), + ?assertThrow(#{reason := badarg}, emqx_variform_bif:rand_int(0)), + ?assertThrow(#{reason := badarg}, emqx_variform_bif:rand_int(-1)). + +base64_encode_decode_test() -> + RandBytes = crypto:strong_rand_bytes(100), + Encoded = emqx_variform_bif:base64_encode(RandBytes), + ?assertEqual(RandBytes, emqx_variform_bif:base64_decode(Encoded)). diff --git a/apps/emqx_utils/test/emqx_variform_tests.erl b/apps/emqx_utils/test/emqx_variform_tests.erl index 91da471c9..5f9a13326 100644 --- a/apps/emqx_utils/test/emqx_variform_tests.erl +++ b/apps/emqx_utils/test/emqx_variform_tests.erl @@ -182,3 +182,39 @@ syntax_error_test_() -> render(Expression, Bindings) -> emqx_variform:render(Expression, Bindings). + +hash_pick_test() -> + lists:foreach( + fun(_) -> + {ok, Res} = render("nth(hash_to_range(rand_str(10),1,5),[1,2,3,4,5])", #{}), + ?assert(Res >= <<"1">> andalso Res =< <<"5">>) + end, + lists:seq(1, 100) + ). + +map_to_range_pick_test() -> + lists:foreach( + fun(_) -> + {ok, Res} = render("nth(map_to_range(rand_str(10),1,5),[1,2,3,4,5])", #{}), + ?assert(Res >= <<"1">> andalso Res =< <<"5">>) + end, + lists:seq(1, 100) + ). + +-define(ASSERT_BADARG(FUNC, ARGS), + ?_assertEqual( + {error, #{reason => badarg, function => FUNC}}, + render(atom_to_list(FUNC) ++ ARGS, #{}) + ) +). + +to_range_badarg_test_() -> + [ + ?ASSERT_BADARG(hash_to_range, "(1,1,2)"), + ?ASSERT_BADARG(hash_to_range, "('',1,2)"), + ?ASSERT_BADARG(hash_to_range, "('a','1',2)"), + ?ASSERT_BADARG(hash_to_range, "('a',2,1)"), + ?ASSERT_BADARG(map_to_range, "('',1,2)"), + ?ASSERT_BADARG(map_to_range, "('a','1',2)"), + ?ASSERT_BADARG(map_to_range, "('a',2,1)") + ]. From 7f9a311988b3d3c8b42bd7e3f9c528c126288e7b Mon Sep 17 00:00:00 2001 From: Kjell Winblad Date: Mon, 15 Apr 2024 20:08:56 +0200 Subject: [PATCH 143/234] fix(rocketmq action): all actions used only one topic This commit makes sure that a set of rocketmq workers are started for each topic. Before this commit all actions for a rocketmq connector used the same workers which all were bound to a single topic so all messages got delivered to that topic regardless of the action configuration. We should have automatic tests to check that the messages actually go to different topics but this needs to go into another PR since we have to deliver the fix fast and the rocketmq library does not support reading from topics. Fixes: https://emqx.atlassian.net/browse/EEC-1006 --- .../rocketmq/conf/plain_acl.yml | 1 + .../src/emqx_bridge_rocketmq.app.src | 2 +- .../src/emqx_bridge_rocketmq_connector.erl | 9 +++- .../test/emqx_bridge_rocketmq_SUITE.erl | 54 +++++++++++++++++++ 4 files changed, 63 insertions(+), 3 deletions(-) diff --git a/.ci/docker-compose-file/rocketmq/conf/plain_acl.yml b/.ci/docker-compose-file/rocketmq/conf/plain_acl.yml index e2c41a87f..e78e47fe5 100644 --- a/.ci/docker-compose-file/rocketmq/conf/plain_acl.yml +++ b/.ci/docker-compose-file/rocketmq/conf/plain_acl.yml @@ -9,3 +9,4 @@ accounts: defaultGroupPerm: PUB|SUB topicPerms: - TopicTest=PUB|SUB + - Topic2=PUB|SUB diff --git a/apps/emqx_bridge_rocketmq/src/emqx_bridge_rocketmq.app.src b/apps/emqx_bridge_rocketmq/src/emqx_bridge_rocketmq.app.src index 564e36a88..1f001218c 100644 --- a/apps/emqx_bridge_rocketmq/src/emqx_bridge_rocketmq.app.src +++ b/apps/emqx_bridge_rocketmq/src/emqx_bridge_rocketmq.app.src @@ -1,6 +1,6 @@ {application, emqx_bridge_rocketmq, [ {description, "EMQX Enterprise RocketMQ Bridge"}, - {vsn, "0.1.5"}, + {vsn, "0.1.6"}, {registered, []}, {applications, [kernel, stdlib, emqx_resource, rocketmq]}, {env, [{emqx_action_info_modules, [emqx_bridge_rocketmq_action_info]}]}, diff --git a/apps/emqx_bridge_rocketmq/src/emqx_bridge_rocketmq_connector.erl b/apps/emqx_bridge_rocketmq/src/emqx_bridge_rocketmq_connector.erl index 1af520a93..bd5154df5 100644 --- a/apps/emqx_bridge_rocketmq/src/emqx_bridge_rocketmq_connector.erl +++ b/apps/emqx_bridge_rocketmq/src/emqx_bridge_rocketmq_connector.erl @@ -139,13 +139,14 @@ on_add_channel( ChannelId, ChannelConfig ) -> - {ok, ChannelState} = create_channel_state(ChannelConfig, ACLInfo), + {ok, ChannelState} = create_channel_state(ChannelId, ChannelConfig, ACLInfo), NewInstalledChannels = maps:put(ChannelId, ChannelState, InstalledChannels), %% Update state NewState = OldState#{installed_channels => NewInstalledChannels}, {ok, NewState}. create_channel_state( + ChannelId, #{parameters := Conf} = _ChannelConfig, ACLInfo ) -> @@ -154,7 +155,7 @@ create_channel_state( sync_timeout := SyncTimeout } = Conf, TopicTks = emqx_placeholder:preproc_tmpl(Topic), - ProducerOpts = make_producer_opts(Conf, ACLInfo), + ProducerOpts = make_producer_opts(ChannelId, Conf, ACLInfo), Templates = parse_template(Conf), State = #{ topic => Topic, @@ -349,6 +350,7 @@ is_sensitive_key(_) -> false. make_producer_opts( + ChannelId, #{ send_buffer := SendBuff, refresh_interval := RefreshInterval @@ -356,6 +358,9 @@ make_producer_opts( ACLInfo ) -> #{ + %% TODO: the name needs to be an atom but this may cause atom leak so we + %% should figure out a way to avoid this + name => binary_to_atom(ChannelId), tcp_opts => [{sndbuf, SendBuff}], ref_topic_route_interval => RefreshInterval, acl_info => emqx_secret:wrap(ACLInfo) diff --git a/apps/emqx_bridge_rocketmq/test/emqx_bridge_rocketmq_SUITE.erl b/apps/emqx_bridge_rocketmq/test/emqx_bridge_rocketmq_SUITE.erl index a056ae3d2..7af6c7eea 100644 --- a/apps/emqx_bridge_rocketmq/test/emqx_bridge_rocketmq_SUITE.erl +++ b/apps/emqx_bridge_rocketmq/test/emqx_bridge_rocketmq_SUITE.erl @@ -263,6 +263,60 @@ t_setup_via_http_api_and_publish(Config) -> ), ok. +t_setup_two_actions_via_http_api_and_publish(Config) -> + BridgeType = ?GET_CONFIG(rocketmq_bridge_type, Config), + Name = ?GET_CONFIG(rocketmq_name, Config), + RocketMQConf = ?GET_CONFIG(rocketmq_config, Config), + RocketMQConf2 = RocketMQConf#{ + <<"name">> => Name, + <<"type">> => BridgeType + }, + ?assertMatch( + {ok, _}, + create_bridge_http(RocketMQConf2) + ), + {ok, #{raw_config := ActionConf}} = emqx_bridge_v2:lookup(actions, BridgeType, Name), + Topic2 = <<"Topic2">>, + ActionConf2 = emqx_utils_maps:deep_force_put( + [<<"parameters">>, <<"topic">>], ActionConf, Topic2 + ), + Action2Name = atom_to_binary(?FUNCTION_NAME), + {ok, _} = emqx_bridge_v2:create(BridgeType, Action2Name, ActionConf2), + SentData = #{payload => ?PAYLOAD}, + ?check_trace( + begin + ?wait_async_action( + ?assertEqual(ok, send_message(Config, SentData)), + #{?snk_kind := rocketmq_connector_query_return}, + 10_000 + ), + ok + end, + fun(Trace0) -> + Trace = ?of_kind(rocketmq_connector_query_return, Trace0), + ?assertMatch([#{result := ok}], Trace), + ok + end + ), + Config2 = proplists:delete(rocketmq_name, Config), + Config3 = [{rocketmq_name, Action2Name} | Config2], + ?check_trace( + begin + ?wait_async_action( + ?assertEqual(ok, send_message(Config3, SentData)), + #{?snk_kind := rocketmq_connector_query_return}, + 10_000 + ), + ok + end, + fun(Trace0) -> + Trace = ?of_kind(rocketmq_connector_query_return, Trace0), + ?assertMatch([#{result := ok}], Trace), + ok + end + ), + ok. + t_get_status(Config) -> ?assertMatch( {ok, _}, From 2fe36776b5cd15c4328e00c0beb593018ab1da82 Mon Sep 17 00:00:00 2001 From: Kjell Winblad Date: Mon, 15 Apr 2024 20:21:16 +0200 Subject: [PATCH 144/234] docs: add change log entry --- changes/ee/fix-12882.en.md | 1 + 1 file changed, 1 insertion(+) create mode 100644 changes/ee/fix-12882.en.md diff --git a/changes/ee/fix-12882.en.md b/changes/ee/fix-12882.en.md new file mode 100644 index 000000000..804665fef --- /dev/null +++ b/changes/ee/fix-12882.en.md @@ -0,0 +1 @@ +The RocketMQ action has been fixed so that the topic configiuration works correctly. If more than one action used a single connector before this fix, all actions messages got delivered to the topic that was used first. From 1fe92bddd032fece0c10a45994166a659af058af Mon Sep 17 00:00:00 2001 From: Kjell Winblad Date: Mon, 15 Apr 2024 20:37:23 +0200 Subject: [PATCH 145/234] fix(rocketmq action): make sure that topic template is respected --- .../src/emqx_bridge_rocketmq_connector.erl | 34 +++++++++---------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/apps/emqx_bridge_rocketmq/src/emqx_bridge_rocketmq_connector.erl b/apps/emqx_bridge_rocketmq/src/emqx_bridge_rocketmq_connector.erl index bd5154df5..6a7b8d4bc 100644 --- a/apps/emqx_bridge_rocketmq/src/emqx_bridge_rocketmq_connector.erl +++ b/apps/emqx_bridge_rocketmq/src/emqx_bridge_rocketmq_connector.erl @@ -139,14 +139,13 @@ on_add_channel( ChannelId, ChannelConfig ) -> - {ok, ChannelState} = create_channel_state(ChannelId, ChannelConfig, ACLInfo), + {ok, ChannelState} = create_channel_state(ChannelConfig, ACLInfo), NewInstalledChannels = maps:put(ChannelId, ChannelState, InstalledChannels), %% Update state NewState = OldState#{installed_channels => NewInstalledChannels}, {ok, NewState}. create_channel_state( - ChannelId, #{parameters := Conf} = _ChannelConfig, ACLInfo ) -> @@ -155,7 +154,7 @@ create_channel_state( sync_timeout := SyncTimeout } = Conf, TopicTks = emqx_placeholder:preproc_tmpl(Topic), - ProducerOpts = make_producer_opts(ChannelId, Conf, ACLInfo), + ProducerOpts = make_producer_opts(Conf, ACLInfo), Templates = parse_template(Conf), State = #{ topic => Topic, @@ -203,7 +202,7 @@ on_stop(InstanceId, _State) -> ({_, client_id, ClientId}) -> destory_producers_map(ClientId), ok = rocketmq:stop_and_delete_supervised_client(ClientId); - ({_, _Topic, Producer}) -> + ({_, _ChannelId, Producer}) -> _ = rocketmq:stop_and_delete_supervised_producers(Producer) end, emqx_resource:get_allocated_resources_list(InstanceId) @@ -259,7 +258,7 @@ do_query( Data = apply_template(Query, Templates), Result = safe_do_produce( - InstanceId, QueryFunc, ClientId, TopicKey, Data, ProducerOpts, RequestTimeout + ChannelId, InstanceId, QueryFunc, ClientId, TopicKey, Data, ProducerOpts, RequestTimeout ), case Result of {error, Reason} -> @@ -285,9 +284,11 @@ do_query( get_channel_id({ChannelId, _}) -> ChannelId; get_channel_id([{ChannelId, _} | _]) -> ChannelId. -safe_do_produce(InstanceId, QueryFunc, ClientId, TopicKey, Data, ProducerOpts, RequestTimeout) -> +safe_do_produce( + ChannelId, InstanceId, QueryFunc, ClientId, TopicKey, Data, ProducerOpts, RequestTimeout +) -> try - Producers = get_producers(InstanceId, ClientId, TopicKey, ProducerOpts), + Producers = get_producers(ChannelId, InstanceId, ClientId, TopicKey, ProducerOpts), produce(InstanceId, QueryFunc, Producers, Data, RequestTimeout) catch _Type:Reason -> @@ -350,7 +351,6 @@ is_sensitive_key(_) -> false. make_producer_opts( - ChannelId, #{ send_buffer := SendBuff, refresh_interval := RefreshInterval @@ -358,9 +358,6 @@ make_producer_opts( ACLInfo ) -> #{ - %% TODO: the name needs to be an atom but this may cause atom leak so we - %% should figure out a way to avoid this - name => binary_to_atom(ChannelId), tcp_opts => [{sndbuf, SendBuff}], ref_topic_route_interval => RefreshInterval, acl_info => emqx_secret:wrap(ACLInfo) @@ -396,16 +393,19 @@ destory_producers_map(ClientId) -> ets:delete(Tid) end. -get_producers(InstanceId, ClientId, Topic, ProducerOpts) -> - case ets:lookup(ClientId, Topic) of +get_producers(ChannelId, InstanceId, ClientId, Topic, ProducerOpts) -> + case ets:lookup(ClientId, ChannelId) of [{_, Producers}] -> Producers; _ -> - ProducerGroup = iolist_to_binary([atom_to_list(ClientId), "_", Topic]), + ProducerGroup = ChannelId, + %% TODO: the name needs to be an atom but this may cause atom leak so we + %% should figure out a way to avoid this + ProducerOpts2 = ProducerOpts#{name => binary_to_atom(ChannelId)}, {ok, Producers} = rocketmq:ensure_supervised_producers( - ClientId, ProducerGroup, Topic, ProducerOpts + ClientId, ProducerGroup, Topic, ProducerOpts2 ), - ok = emqx_resource:allocate_resource(InstanceId, Topic, Producers), - ets:insert(ClientId, {Topic, Producers}), + ok = emqx_resource:allocate_resource(InstanceId, ChannelId, Producers), + ets:insert(ClientId, {ChannelId, Producers}), Producers end. From f4818717926765ad5f2846d7ee17b3f6daa16846 Mon Sep 17 00:00:00 2001 From: Kjell Winblad Date: Mon, 15 Apr 2024 20:52:08 +0200 Subject: [PATCH 146/234] fix(rocketmq action): we need one producer group per channel and topic We need one producer group per channel and topic because we can have several topics per channel due to templating. --- .../src/emqx_bridge_rocketmq_connector.erl | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/apps/emqx_bridge_rocketmq/src/emqx_bridge_rocketmq_connector.erl b/apps/emqx_bridge_rocketmq/src/emqx_bridge_rocketmq_connector.erl index 6a7b8d4bc..011d4074f 100644 --- a/apps/emqx_bridge_rocketmq/src/emqx_bridge_rocketmq_connector.erl +++ b/apps/emqx_bridge_rocketmq/src/emqx_bridge_rocketmq_connector.erl @@ -202,7 +202,7 @@ on_stop(InstanceId, _State) -> ({_, client_id, ClientId}) -> destory_producers_map(ClientId), ok = rocketmq:stop_and_delete_supervised_client(ClientId); - ({_, _ChannelId, Producer}) -> + ({_, _ProducerGroup, Producer}) -> _ = rocketmq:stop_and_delete_supervised_producers(Producer) end, emqx_resource:get_allocated_resources_list(InstanceId) @@ -394,18 +394,20 @@ destory_producers_map(ClientId) -> end. get_producers(ChannelId, InstanceId, ClientId, Topic, ProducerOpts) -> - case ets:lookup(ClientId, ChannelId) of + %% The topic need to be included in the name since we can have multiple + %% topics per channel due to templating. + ProducerGroup = iolist_to_binary([ChannelId, "_", Topic]), + case ets:lookup(ClientId, ProducerGroup) of [{_, Producers}] -> Producers; _ -> - ProducerGroup = ChannelId, %% TODO: the name needs to be an atom but this may cause atom leak so we %% should figure out a way to avoid this - ProducerOpts2 = ProducerOpts#{name => binary_to_atom(ChannelId)}, + ProducerOpts2 = ProducerOpts#{name => binary_to_atom(ProducerGroup)}, {ok, Producers} = rocketmq:ensure_supervised_producers( ClientId, ProducerGroup, Topic, ProducerOpts2 ), - ok = emqx_resource:allocate_resource(InstanceId, ChannelId, Producers), - ets:insert(ClientId, {ChannelId, Producers}), + ok = emqx_resource:allocate_resource(InstanceId, ProducerGroup, Producers), + ets:insert(ClientId, {ProducerGroup, Producers}), Producers end. From a0ffe5e7ae66cdecaea8d4726f4c97e695d5a338 Mon Sep 17 00:00:00 2001 From: Ivan Dyachkov Date: Mon, 15 Apr 2024 22:05:05 +0200 Subject: [PATCH 147/234] chore: 5.6.1-rc.1 --- apps/emqx/include/emqx_release.hrl | 4 ++-- deploy/charts/emqx-enterprise/Chart.yaml | 4 ++-- deploy/charts/emqx/Chart.yaml | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/apps/emqx/include/emqx_release.hrl b/apps/emqx/include/emqx_release.hrl index 2637a0270..dec05f866 100644 --- a/apps/emqx/include/emqx_release.hrl +++ b/apps/emqx/include/emqx_release.hrl @@ -32,7 +32,7 @@ %% `apps/emqx/src/bpapi/README.md' %% Opensource edition --define(EMQX_RELEASE_CE, "5.6.1-beta.1"). +-define(EMQX_RELEASE_CE, "5.6.1-rc.1"). %% Enterprise edition --define(EMQX_RELEASE_EE, "5.6.1-beta.1"). +-define(EMQX_RELEASE_EE, "5.6.1-rc.1"). diff --git a/deploy/charts/emqx-enterprise/Chart.yaml b/deploy/charts/emqx-enterprise/Chart.yaml index 573277cac..9077d7adb 100644 --- a/deploy/charts/emqx-enterprise/Chart.yaml +++ b/deploy/charts/emqx-enterprise/Chart.yaml @@ -14,8 +14,8 @@ type: application # This is the chart version. This version number should be incremented each time you make changes # to the chart and its templates, including the app version. -version: 5.6.1-beta.1 +version: 5.6.1-rc.1 # This is the version number of the application being deployed. This version number should be # incremented each time you make changes to the application. -appVersion: 5.6.1-beta.1 +appVersion: 5.6.1-rc.1 diff --git a/deploy/charts/emqx/Chart.yaml b/deploy/charts/emqx/Chart.yaml index e771499b6..50377d827 100644 --- a/deploy/charts/emqx/Chart.yaml +++ b/deploy/charts/emqx/Chart.yaml @@ -14,8 +14,8 @@ type: application # This is the chart version. This version number should be incremented each time you make changes # to the chart and its templates, including the app version. -version: 5.6.1-beta.1 +version: 5.6.1-rc.1 # This is the version number of the application being deployed. This version number should be # incremented each time you make changes to the application. -appVersion: 5.6.1-beta.1 +appVersion: 5.6.1-rc.1 From 5d7b2e2ce69ab9b4e01f2b2ac7882d1c277f8f60 Mon Sep 17 00:00:00 2001 From: Andrew Mayorov Date: Mon, 15 Apr 2024 16:58:46 +0200 Subject: [PATCH 148/234] fix(dsrepl): attempt leadership transfer on terminate In addition to on removal. The reasoning is basically the same: try to avoid situations when log entries are replicated (or will be considered replicated when the new leader is elected) but the leader terminates before replying to the client. To be clear: this is a stupid solution. Something much more robust is needed. --- .../src/emqx_ds_builtin_db_sup.erl | 1 + .../src/emqx_ds_replication_layer_shard.erl | 58 ++++++++++++------- 2 files changed, 37 insertions(+), 22 deletions(-) diff --git a/apps/emqx_durable_storage/src/emqx_ds_builtin_db_sup.erl b/apps/emqx_durable_storage/src/emqx_ds_builtin_db_sup.erl index 2dd9ae332..ef1600500 100644 --- a/apps/emqx_durable_storage/src/emqx_ds_builtin_db_sup.erl +++ b/apps/emqx_durable_storage/src/emqx_ds_builtin_db_sup.erl @@ -232,6 +232,7 @@ shard_replication_spec(DB, Shard, Opts) -> #{ id => {Shard, replication}, start => {emqx_ds_replication_layer_shard, start_link, [DB, Shard, Opts]}, + shutdown => 10_000, restart => permanent, type => worker }. diff --git a/apps/emqx_durable_storage/src/emqx_ds_replication_layer_shard.erl b/apps/emqx_durable_storage/src/emqx_ds_replication_layer_shard.erl index 20d9ef481..e0e70596a 100644 --- a/apps/emqx_durable_storage/src/emqx_ds_replication_layer_shard.erl +++ b/apps/emqx_durable_storage/src/emqx_ds_replication_layer_shard.erl @@ -189,22 +189,9 @@ add_local_server(DB, Shard) -> -spec drop_local_server(emqx_ds:db(), emqx_ds_replication_layer:shard_id()) -> ok | emqx_ds:error(_Reason). drop_local_server(DB, Shard) -> - ShardServers = shard_servers(DB, Shard), + %% NOTE: Timeouts are ignored, it's a best effort attempt. + _ = prep_stop_server(DB, Shard), LocalServer = local_server(DB, Shard), - case lookup_leader(DB, Shard) of - LocalServer -> - %% NOTE - %% Trigger leadership transfer *and* force to wait until the new leader - %% is elected and updated in the leaderboard. This should help to avoid - %% edge cases where entries appended right before removal are duplicated - %% due to client retries. - %% Timeouts are ignored, it's a best effort attempt. - [Candidate | _] = lists:delete(LocalServer, ShardServers), - _ = ra:transfer_leadership(LocalServer, Candidate), - _ = wait_until(fun() -> lookup_leader(DB, Shard) == Candidate end); - _Another -> - ok - end, case remove_server(DB, Shard, LocalServer) of ok -> ra:force_delete_server(DB, LocalServer); @@ -300,7 +287,7 @@ ra_overview(Server) -> init({DB, Shard, Opts}) -> _ = process_flag(trap_exit, true), - ok = start_shard(DB, Shard, Opts), + ok = start_server(DB, Shard, Opts), {ok, {DB, Shard}}. handle_call(_Call, _From, State) -> @@ -310,18 +297,18 @@ handle_cast(_Msg, State) -> {noreply, State}. terminate(_Reason, {DB, Shard}) -> + %% NOTE: Timeouts are ignored, it's a best effort attempt. + catch prep_stop_server(DB, Shard), LocalServer = get_local_server(DB, Shard), ok = ra:stop_server(DB, LocalServer). %% -start_shard(DB, Shard, #{replication_options := ReplicationOpts}) -> +start_server(DB, Shard, #{replication_options := ReplicationOpts}) -> ClusterName = cluster_name(DB, Shard), LocalServer = local_server(DB, Shard), Servers = shard_servers(DB, Shard), case ra:restart_server(DB, LocalServer) of - ok -> - Bootstrap = false; {error, name_not_registered} -> Bootstrap = true, Machine = {module, emqx_ds_replication_layer, #{db => DB, shard => Shard}}, @@ -339,7 +326,11 @@ start_shard(DB, Shard, #{replication_options := ReplicationOpts}) -> initial_members => Servers, machine => Machine, log_init_args => LogOpts - }) + }); + ok -> + Bootstrap = false; + {error, {already_started, _}} -> + Bootstrap = false end, %% NOTE %% Triggering election is necessary when a new consensus group is being brought up. @@ -371,6 +362,29 @@ server_uid(_DB, Shard) -> %% +prep_stop_server(DB, Shard) -> + prep_stop_server(DB, Shard, 5_000). + +prep_stop_server(DB, Shard, Timeout) -> + LocalServer = get_local_server(DB, Shard), + Candidates = lists:delete(LocalServer, shard_servers(DB, Shard)), + case lookup_leader(DB, Shard) of + LocalServer when Candidates =/= [] -> + %% NOTE + %% Trigger leadership transfer *and* force to wait until the new leader + %% is elected and updated in the leaderboard. This should help to avoid + %% edge cases where entries appended right before removal are duplicated + %% due to client retries. + %% TODO: Candidate may be offline. + [Candidate | _] = Candidates, + _ = ra:transfer_leadership(LocalServer, Candidate), + wait_until(fun() -> lookup_leader(DB, Shard) == Candidate end, Timeout); + _Another -> + ok + end. + +%% + memoize(Fun, Args) -> %% NOTE: Assuming that the function is pure and never returns `undefined`. case persistent_term:get([Fun | Args], undefined) of @@ -382,8 +396,8 @@ memoize(Fun, Args) -> Result end. -wait_until(Fun) -> - wait_until(Fun, 5_000, 250). +wait_until(Fun, Timeout) -> + wait_until(Fun, Timeout, 100). wait_until(Fun, Timeout, Sleep) -> Deadline = erlang:monotonic_time(millisecond) + Timeout, From d82f7c3f715c0643dbc6baf200f19f4d78ac2d96 Mon Sep 17 00:00:00 2001 From: Ivan Dyachkov Date: Tue, 16 Apr 2024 07:55:44 +0200 Subject: [PATCH 149/234] fix: qlc could not create temp files in docker container (#12875) qlc uses file_sorter that puts temporary files in the working directory by default, which is not writable by emqx user since 58d0f040569a2a9a20dcaf6351c6493127c2067a. One of the consequences is that users cannot access retained messages from the dashboard, but there are likely other issues as well. This patch fixes this by making /opt/emqx directory owned by emqx:emqx. --- deploy/docker/Dockerfile | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/deploy/docker/Dockerfile b/deploy/docker/Dockerfile index d43b4a19f..ea7bb27cc 100644 --- a/deploy/docker/Dockerfile +++ b/deploy/docker/Dockerfile @@ -47,18 +47,19 @@ ENV LC_ALL=C.UTF-8 ENV LANG=C.UTF-8 COPY deploy/docker/docker-entrypoint.sh /usr/bin/ -COPY --from=builder /emqx-rel /opt/ RUN set -eu; \ apt-get update; \ apt-get install -y --no-install-recommends ca-certificates procps $(echo "${EXTRA_DEPS}" | tr ',' ' '); \ rm -rf /var/lib/apt/lists/*; \ - find /opt/emqx -name 'swagger*.js.map' -exec rm {} +; \ - ln -s /opt/emqx/bin/* /usr/local/bin/; \ groupadd -r -g 1000 emqx; \ - useradd -r -m -u 1000 -g emqx emqx; \ - mkdir -p /opt/emqx/log /opt/emqx/data /opt/emqx/plugins; \ - chown -R emqx:emqx /opt/emqx/log /opt/emqx/data /opt/emqx/plugins + useradd -r -m -u 1000 -g emqx emqx; + +COPY --from=builder --chown=emqx:emqx /emqx-rel /opt/ + +RUN set -eu; \ + find /opt/emqx -name 'swagger*.js.map' -exec rm {} +; \ + ln -s /opt/emqx/bin/* /usr/local/bin/; WORKDIR /opt/emqx From 3b71fba9e39d89365ebecd9064e1eb7f58965c50 Mon Sep 17 00:00:00 2001 From: Ivan Dyachkov Date: Tue, 16 Apr 2024 07:56:00 +0200 Subject: [PATCH 150/234] chore(build): when using make *-docker, load docker image by default --- build | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/build b/build index d63260d1d..73b83a5b6 100755 --- a/build +++ b/build @@ -493,7 +493,7 @@ make_docker() { if [ "${DOCKER_PUSH:-false}" = true ]; then DOCKER_BUILDX_ARGS+=(--push) fi - if [ "${DOCKER_LOAD:-false}" = true ]; then + if [ "${DOCKER_LOAD:-true}" = true ]; then DOCKER_BUILDX_ARGS+=(--load) fi if [ -d "${REBAR_GIT_CACHE_DIR:-}" ]; then From ab0f1888bf3601ff713335ac31e6e833297c9fb6 Mon Sep 17 00:00:00 2001 From: zmstone Date: Tue, 16 Apr 2024 08:31:07 +0200 Subject: [PATCH 151/234] docs: refine client_attrs doc --- rel/i18n/emqx_schema.hocon | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/rel/i18n/emqx_schema.hocon b/rel/i18n/emqx_schema.hocon index 0868bf93c..cb504694c 100644 --- a/rel/i18n/emqx_schema.hocon +++ b/rel/i18n/emqx_schema.hocon @@ -1587,9 +1587,9 @@ client_attrs_init_expression { label: "Client Attribute Extraction Regular Expression" desc: """~ A one line expression to evaluate a set of predefined string functions (like in the rule engine SQL statements). - The expression accepts direct variable reference, or one function call with nested calls for its arguments, - but it does not provide variable binding or user-defined functions and pre-bound variables. - For example, to extract the prefix of client ID delimited by a dot: `nth(1, tokens(clientid, '.'))`. + The expression can be a function call with nested calls as its arguments, or direct variable reference. + So far, it does not provide user-defined variable binding (like `var a=1`) or user-defined functions. + As an example, to extract the prefix of client ID delimited by a dot: `nth(1, tokens(clientid, '.'))`. The variables pre-bound variables are: - `cn`: Client's TLS certificate common name. From f3bb28c6af688239265728bcc0c96278eea3e33a Mon Sep 17 00:00:00 2001 From: Ivan Dyachkov Date: Tue, 16 Apr 2024 09:47:32 +0200 Subject: [PATCH 152/234] chore: 5.6.1-rc.2 --- apps/emqx/include/emqx_release.hrl | 4 +-- changes/e5.6.1.en.md | 46 ++++++++++++++++++++++++ changes/v5.6.1.en.md | 44 +++++++++++++++++++++++ deploy/charts/emqx-enterprise/Chart.yaml | 4 +-- deploy/charts/emqx/Chart.yaml | 4 +-- 5 files changed, 96 insertions(+), 6 deletions(-) create mode 100644 changes/e5.6.1.en.md create mode 100644 changes/v5.6.1.en.md diff --git a/apps/emqx/include/emqx_release.hrl b/apps/emqx/include/emqx_release.hrl index dec05f866..9d4437a3f 100644 --- a/apps/emqx/include/emqx_release.hrl +++ b/apps/emqx/include/emqx_release.hrl @@ -32,7 +32,7 @@ %% `apps/emqx/src/bpapi/README.md' %% Opensource edition --define(EMQX_RELEASE_CE, "5.6.1-rc.1"). +-define(EMQX_RELEASE_CE, "5.6.1-rc.2"). %% Enterprise edition --define(EMQX_RELEASE_EE, "5.6.1-rc.1"). +-define(EMQX_RELEASE_EE, "5.6.1-rc.2"). diff --git a/changes/e5.6.1.en.md b/changes/e5.6.1.en.md new file mode 100644 index 000000000..3deb7466b --- /dev/null +++ b/changes/e5.6.1.en.md @@ -0,0 +1,46 @@ +# e5.6.1 + +## Bug Fixes + +- [#12759](https://github.com/emqx/emqx/pull/12759) Do not save invalid uploaded backup files. + +- [#12766](https://github.com/emqx/emqx/pull/12766) Rename `message_queue_too_long` error reason to `mailbox_overflow` + + `mailbox_overflow` is consistent with the corresponding config parameter: `force_shutdown.max_mailbox_size`. + +- [#12773](https://github.com/emqx/emqx/pull/12773) Upgrade HTTP client libraries. + + The HTTP client library (`gun-1.3`) incorrectly appends a `:portnumber` suffix to the `Host` header for + standard ports (`http` on port 80, `https` on port 443). This could cause compatibility issues with servers or + gateways performing strict `Host` header checks (e.g., AWS Lambda, Alibaba Cloud HTTP gateways), leading to + errors such as `InvalidCustomDomain.NotFound` or "The specified CustomDomain does not exist." + +- [#12802](https://github.com/emqx/emqx/pull/12802) Improve cluster discovery behaviour when a node is manually removed from a cluster using 'emqx ctl cluster leave' command. + Previously, if the configured cluster 'discovery_strategy' was not 'manual', the left node might re-discover and re-join the same cluster shortly after it left (unless it was stopped). + After this change, 'cluster leave' command disables automatic cluster_discovery, so that the left node won't re-join the same cluster again. Cluster discovery can be re-enabled by running 'emqx ctl discovery enable` or by restarting the left node. + +- [#12814](https://github.com/emqx/emqx/pull/12814) Handle several errors in `/clients/{clientid}/mqueue_messages` and `/clients/{clientid}/inflight_messages` APIs: + + - Internal timeout, which means that EMQX failed to get the list of Inflight/Mqueue messages within the default timeout of 5 s. This error may occur when the system is under a heavy load. The API will return 500 `{"code":"INTERNAL_ERROR","message":"timeout"}` response and log additional details. + - Client shutdown. The error may occur if the client connection is shutdown during the API call. The API will return 404 `{"code": "CLIENT_SHUTDOWN", "message": "Client connection has been shutdown"}` response in this case. + +- [#12824](https://github.com/emqx/emqx/pull/12824) Make sure stats `'subscribers.count'` `'subscribers.max'` countains shared-subscribers. + It only contains non-shared subscribers previously. + +- [#12826](https://github.com/emqx/emqx/pull/12826) Fixed an issue that prevented importing source data integrations and retained messages. + + Before the fix: + + - source data integrations are ignored from the backup file + - importing the `mnesia` table for retained messages are not supported + +- [#12843](https://github.com/emqx/emqx/pull/12843) Fixed `cluster_rpc_commit` transaction ID cleanup procedure after `cluster leave` on replicant nodes. + Previously, the transaction id of the core node would be deleted prematurely, blocking configuration updates on the core node. + +- [#12882](https://github.com/emqx/emqx/pull/12882) The RocketMQ action has been fixed so that the topic configiuration works correctly. If more than one action used a single connector before this fix, all actions messages got delivered to the topic that was used first. + +- [#12885](https://github.com/emqx/emqx/pull/12885) Fixed an issue when users were not able to see the "Retained Messages" under the "Monitoring" menu in the admin dashboard. + +"Retained messages" backend API uses `qlc`, and `qlc` uses `file_sorter` that puts temporary files in the working directory by default, which is not writable by emqx user since 58d0f04. + +This patch fixes this by making `/opt/emqx` directory owned by `emqx:emqx`. diff --git a/changes/v5.6.1.en.md b/changes/v5.6.1.en.md new file mode 100644 index 000000000..e33af057e --- /dev/null +++ b/changes/v5.6.1.en.md @@ -0,0 +1,44 @@ +# v5.6.1 + +## Bug Fixes + +- [#12759](https://github.com/emqx/emqx/pull/12759) Do not save invalid uploaded backup files. + +- [#12766](https://github.com/emqx/emqx/pull/12766) Rename `message_queue_too_long` error reason to `mailbox_overflow` + + `mailbox_overflow` is consistent with the corresponding config parameter: `force_shutdown.max_mailbox_size`. + +- [#12773](https://github.com/emqx/emqx/pull/12773) Upgrade HTTP client libraries. + + The HTTP client library (`gun-1.3`) incorrectly appends a `:portnumber` suffix to the `Host` header for + standard ports (`http` on port 80, `https` on port 443). This could cause compatibility issues with servers or + gateways performing strict `Host` header checks (e.g., AWS Lambda, Alibaba Cloud HTTP gateways), leading to + errors such as `InvalidCustomDomain.NotFound` or "The specified CustomDomain does not exist." + +- [#12802](https://github.com/emqx/emqx/pull/12802) Improve cluster discovery behaviour when a node is manually removed from a cluster using 'emqx ctl cluster leave' command. + Previously, if the configured cluster 'discovery_strategy' was not 'manual', the left node might re-discover and re-join the same cluster shortly after it left (unless it was stopped). + After this change, 'cluster leave' command disables automatic cluster_discovery, so that the left node won't re-join the same cluster again. Cluster discovery can be re-enabled by running 'emqx ctl discovery enable` or by restarting the left node. + +- [#12814](https://github.com/emqx/emqx/pull/12814) Handle several errors in `/clients/{clientid}/mqueue_messages` and `/clients/{clientid}/inflight_messages` APIs: + + - Internal timeout, which means that EMQX failed to get the list of Inflight/Mqueue messages within the default timeout of 5 s. This error may occur when the system is under a heavy load. The API will return 500 `{"code":"INTERNAL_ERROR","message":"timeout"}` response and log additional details. + - Client shutdown. The error may occur if the client connection is shutdown during the API call. The API will return 404 `{"code": "CLIENT_SHUTDOWN", "message": "Client connection has been shutdown"}` response in this case. + +- [#12824](https://github.com/emqx/emqx/pull/12824) Make sure stats `'subscribers.count'` `'subscribers.max'` countains shared-subscribers. + It only contains non-shared subscribers previously. + +- [#12826](https://github.com/emqx/emqx/pull/12826) Fixed an issue that prevented importing source data integrations and retained messages. + + Before the fix: + + - source data integrations are ignored from the backup file + - importing the `mnesia` table for retained messages are not supported + +- [#12843](https://github.com/emqx/emqx/pull/12843) Fixed `cluster_rpc_commit` transaction ID cleanup procedure after `cluster leave` on replicant nodes. + Previously, the transaction id of the core node would be deleted prematurely, blocking configuration updates on the core node. + +- [#12885](https://github.com/emqx/emqx/pull/12885) Fixed an issue when users were not able to see the "Retained Messages" under the "Monitoring" menu in the admin dashboard. + +"Retained messages" backend API uses `qlc`, and `qlc` uses `file_sorter` that puts temporary files in the working directory by default, which is not writable by emqx user since 58d0f04. + +This patch fixes this by making `/opt/emqx` directory owned by `emqx:emqx`. diff --git a/deploy/charts/emqx-enterprise/Chart.yaml b/deploy/charts/emqx-enterprise/Chart.yaml index 9077d7adb..1ee736e4b 100644 --- a/deploy/charts/emqx-enterprise/Chart.yaml +++ b/deploy/charts/emqx-enterprise/Chart.yaml @@ -14,8 +14,8 @@ type: application # This is the chart version. This version number should be incremented each time you make changes # to the chart and its templates, including the app version. -version: 5.6.1-rc.1 +version: 5.6.1-rc.2 # This is the version number of the application being deployed. This version number should be # incremented each time you make changes to the application. -appVersion: 5.6.1-rc.1 +appVersion: 5.6.1-rc.2 diff --git a/deploy/charts/emqx/Chart.yaml b/deploy/charts/emqx/Chart.yaml index 50377d827..b2a755e72 100644 --- a/deploy/charts/emqx/Chart.yaml +++ b/deploy/charts/emqx/Chart.yaml @@ -14,8 +14,8 @@ type: application # This is the chart version. This version number should be incremented each time you make changes # to the chart and its templates, including the app version. -version: 5.6.1-rc.1 +version: 5.6.1-rc.2 # This is the version number of the application being deployed. This version number should be # incremented each time you make changes to the application. -appVersion: 5.6.1-rc.1 +appVersion: 5.6.1-rc.2 From 8fe471c9673cff5ed6493e4c5a4c114e6bcb6915 Mon Sep 17 00:00:00 2001 From: William Yang Date: Tue, 16 Apr 2024 10:02:57 +0200 Subject: [PATCH 153/234] fix(mqtt): enhanced auth with scram Bump esasl to 0.2.1 --- changes/ce/fix-12887.en.md | 2 ++ mix.exs | 2 +- rebar.config | 2 +- 3 files changed, 4 insertions(+), 2 deletions(-) create mode 100644 changes/ce/fix-12887.en.md diff --git a/changes/ce/fix-12887.en.md b/changes/ce/fix-12887.en.md new file mode 100644 index 000000000..c25d3a320 --- /dev/null +++ b/changes/ce/fix-12887.en.md @@ -0,0 +1,2 @@ +Fix MQTT enhanced auth with sasl scram. + diff --git a/mix.exs b/mix.exs index 09116bff0..46aa00280 100644 --- a/mix.exs +++ b/mix.exs @@ -74,7 +74,7 @@ defmodule EMQXUmbrella.MixProject do {:snabbkaffe, github: "kafka4beam/snabbkaffe", tag: "1.0.8", override: true}, {:hocon, github: "emqx/hocon", tag: "0.42.1", override: true}, {:emqx_http_lib, github: "emqx/emqx_http_lib", tag: "0.5.3", override: true}, - {:esasl, github: "emqx/esasl", tag: "0.2.0"}, + {:esasl, github: "emqx/esasl", tag: "0.2.1"}, {:jose, github: "potatosalad/erlang-jose", tag: "1.11.2"}, # in conflict by ehttpc and emqtt {:gun, github: "emqx/gun", tag: "1.3.11", override: true}, diff --git a/rebar.config b/rebar.config index 042c57b66..ee28b32c7 100644 --- a/rebar.config +++ b/rebar.config @@ -99,7 +99,7 @@ {snabbkaffe, {git, "https://github.com/kafka4beam/snabbkaffe.git", {tag, "1.0.8"}}}, {hocon, {git, "https://github.com/emqx/hocon.git", {tag, "0.42.1"}}}, {emqx_http_lib, {git, "https://github.com/emqx/emqx_http_lib.git", {tag, "0.5.3"}}}, - {esasl, {git, "https://github.com/emqx/esasl", {tag, "0.2.0"}}}, + {esasl, {git, "https://github.com/emqx/esasl", {tag, "0.2.1"}}}, {jose, {git, "https://github.com/potatosalad/erlang-jose", {tag, "1.11.2"}}}, {telemetry, "1.1.0"}, {hackney, {git, "https://github.com/emqx/hackney.git", {tag, "1.18.1-1"}}}, From c645cfa5d63196441f8edab48fcd23d79498697f Mon Sep 17 00:00:00 2001 From: ieQu1 <99872536+ieQu1@users.noreply.github.com> Date: Fri, 12 Apr 2024 18:04:27 +0200 Subject: [PATCH 154/234] fix(sessds): Graceful handling of shared subscription error --- apps/emqx/src/emqx_persistent_session_ds.erl | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/apps/emqx/src/emqx_persistent_session_ds.erl b/apps/emqx/src/emqx_persistent_session_ds.erl index 83ed5d465..0f1e77370 100644 --- a/apps/emqx/src/emqx_persistent_session_ds.erl +++ b/apps/emqx/src/emqx_persistent_session_ds.erl @@ -337,6 +337,13 @@ print_session(ClientId) -> -spec subscribe(topic_filter(), emqx_types:subopts(), session()) -> {ok, session()} | {error, emqx_types:reason_code()}. +subscribe( + #share{}, + _SubOpts, + _Session +) -> + %% TODO: Shared subscriptions are not supported yet: + {error, ?RC_SHARED_SUBSCRIPTIONS_NOT_SUPPORTED}; subscribe( TopicFilter, SubOpts, @@ -421,6 +428,9 @@ do_unsubscribe(SessionId, TopicFilter, Subscription = #{id := SubId}, S0) -> -spec get_subscription(topic_filter(), session()) -> emqx_types:subopts() | undefined. +get_subscription(#share{}, _) -> + %% TODO: shared subscriptions are not supported yet: + undefined; get_subscription(TopicFilter, #{s := S}) -> case emqx_persistent_session_ds_subs:lookup(TopicFilter, S) of _Subscription = #{props := SubOpts} -> From 6b8111c0665bc3df393b85cd3cc8af10e540f007 Mon Sep 17 00:00:00 2001 From: JianBo He Date: Tue, 16 Apr 2024 19:07:17 +0800 Subject: [PATCH 155/234] fix(license): license file is not taking effect after importing backup files --- apps/emqx_license/src/emqx_license.app.src | 2 +- apps/emqx_license/src/emqx_license.erl | 33 +++++++++++++------ apps/emqx_license/test/emqx_license_SUITE.erl | 30 +++++++++++++++++ 3 files changed, 54 insertions(+), 11 deletions(-) diff --git a/apps/emqx_license/src/emqx_license.app.src b/apps/emqx_license/src/emqx_license.app.src index 18545cbed..e24a152c7 100644 --- a/apps/emqx_license/src/emqx_license.app.src +++ b/apps/emqx_license/src/emqx_license.app.src @@ -1,6 +1,6 @@ {application, emqx_license, [ {description, "EMQX License"}, - {vsn, "5.0.16"}, + {vsn, "5.0.17"}, {modules, []}, {registered, [emqx_license_sup]}, {applications, [kernel, stdlib, emqx_ctl]}, diff --git a/apps/emqx_license/src/emqx_license.erl b/apps/emqx_license/src/emqx_license.erl index c95ad0e7f..73c1cdf4e 100644 --- a/apps/emqx_license/src/emqx_license.erl +++ b/apps/emqx_license/src/emqx_license.erl @@ -10,6 +10,7 @@ -include_lib("typerefl/include/types.hrl"). -behaviour(emqx_config_handler). +-behaviour(emqx_config_backup). -export([ pre_config_update/3, @@ -26,6 +27,8 @@ update_setting/1 ]). +-export([import_config/1]). + -define(CONF_KEY_PATH, [license]). %% Give the license app the highest priority. @@ -58,21 +61,20 @@ unload() -> -spec update_key(binary() | string()) -> {ok, emqx_config:update_result()} | {error, emqx_config:update_error()}. update_key(Value) when is_binary(Value); is_list(Value) -> - Result = emqx_conf:update( - ?CONF_KEY_PATH, - {key, Value}, - #{rawconf_with_defaults => true, override_to => cluster} - ), + Result = exec_config_update({key, Value}), handle_config_update_result(Result). update_setting(Setting) when is_map(Setting) -> - Result = emqx_conf:update( - ?CONF_KEY_PATH, - {setting, Setting}, - #{rawconf_with_defaults => true, override_to => cluster} - ), + Result = exec_config_update({setting, Setting}), handle_config_update_result(Result). +exec_config_update(Param) -> + emqx_conf:update( + ?CONF_KEY_PATH, + Param, + #{rawconf_with_defaults => true, override_to => cluster} + ). + %%------------------------------------------------------------------------------ %% emqx_hooks %%------------------------------------------------------------------------------ @@ -106,6 +108,17 @@ check(_ConnInfo, AckProps) -> {stop, {error, ?RC_QUOTA_EXCEEDED}} end. +import_config(#{<<"license">> := Config}) -> + OldConf = emqx:get_config(?CONF_KEY_PATH), + case exec_config_update(Config) of + {ok, #{config := NewConf}} -> + Changed = maps:get(changed, emqx_utils_maps:diff_maps(NewConf, OldConf)), + Changed1 = lists:map(fun(Key) -> [license, Key] end, maps:keys(Changed)), + {ok, #{root_key => license, changed => Changed1}}; + Error -> + {error, #{root_key => license, reason => Error}} + end. + %%------------------------------------------------------------------------------ %% emqx_config_handler callbacks %%------------------------------------------------------------------------------ diff --git a/apps/emqx_license/test/emqx_license_SUITE.erl b/apps/emqx_license/test/emqx_license_SUITE.erl index 1aa370359..7c041aad1 100644 --- a/apps/emqx_license/test/emqx_license_SUITE.erl +++ b/apps/emqx_license/test/emqx_license_SUITE.erl @@ -149,6 +149,36 @@ t_check_not_loaded(_Config) -> emqx_license:check(#{}, #{}) ). +t_import_config(_Config) -> + %% Import to default license + ?assertMatch( + {ok, #{root_key := license, changed := _}}, + emqx_license:import_config(#{<<"license">> => #{<<"key">> => <<"default">>}}) + ), + ?assertEqual(default, emqx:get_config([license, key])), + ?assertMatch({ok, #{max_connections := 10}}, emqx_license_checker:limits()), + + %% Import to a new license + EncodedLicense = emqx_license_test_lib:make_license(#{max_connections => "100"}), + ?assertMatch( + {ok, #{root_key := license, changed := _}}, + emqx_license:import_config( + #{ + <<"license">> => + #{ + <<"key">> => EncodedLicense, + <<"connection_low_watermark">> => <<"20%">>, + <<"connection_high_watermark">> => <<"50%">> + } + } + ) + ), + ?assertMatch({ok, #{max_connections := 100}}, emqx_license_checker:limits()), + ?assertMatch( + #{connection_low_watermark := 0.2, connection_high_watermark := 0.5}, + emqx:get_config([license]) + ). + %%------------------------------------------------------------------------------ %% Helpers %%------------------------------------------------------------------------------ From 2f0b72e0aab820b1f075375385ae3b2fd77bc6ac Mon Sep 17 00:00:00 2001 From: JianBo He Date: Tue, 16 Apr 2024 19:11:28 +0800 Subject: [PATCH 156/234] chore: update changes --- changes/ee/fix-12888.en.md | 1 + 1 file changed, 1 insertion(+) create mode 100644 changes/ee/fix-12888.en.md diff --git a/changes/ee/fix-12888.en.md b/changes/ee/fix-12888.en.md new file mode 100644 index 000000000..98b42065c --- /dev/null +++ b/changes/ee/fix-12888.en.md @@ -0,0 +1 @@ +Fix License related configuration loss after importing backup data. From 2bd72aab44efb51c46a8df5364c92f7c9a8c55f1 Mon Sep 17 00:00:00 2001 From: zmstone Date: Tue, 16 Apr 2024 16:58:45 +0200 Subject: [PATCH 157/234] chore: bump dashboard schema version to 0.2.0 --- .../src/emqx_dashboard_schema_api.erl | 23 +++++++++++++++---- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/apps/emqx_dashboard/src/emqx_dashboard_schema_api.erl b/apps/emqx_dashboard/src/emqx_dashboard_schema_api.erl index 4a708cd78..2bc1c5b39 100644 --- a/apps/emqx_dashboard/src/emqx_dashboard_schema_api.erl +++ b/apps/emqx_dashboard/src/emqx_dashboard_schema_api.erl @@ -44,6 +44,8 @@ ]) ). +-define(SCHEMA_VERSION, <<"0.2.0">>). + %%-------------------------------------------------------------------- %% minirest API and schema %%-------------------------------------------------------------------- @@ -97,20 +99,31 @@ gen_schema(connectors) -> connectors_schema_json(). hotconf_schema_json() -> - SchemaInfo = #{title => <<"EMQX Hot Conf API Schema">>, version => <<"0.1.0">>}, + SchemaInfo = #{ + title => <<"EMQX Hot Conf Schema">>, + version => ?SCHEMA_VERSION + }, gen_api_schema_json_iodata(emqx_mgmt_api_configs, SchemaInfo). bridge_schema_json() -> - SchemaInfo = #{title => <<"EMQX Data Bridge API Schema">>, version => <<"0.1.0">>}, + SchemaInfo = #{ + title => <<"EMQX Data Bridge Schema">>, + version => ?SCHEMA_VERSION + }, gen_api_schema_json_iodata(emqx_bridge_api, SchemaInfo). actions_schema_json() -> - SchemaInfo = #{title => <<"EMQX Data Actions API Schema">>, version => <<"0.1.0">>}, - %% Note: this will be moved to `emqx_actions' application in the future. + SchemaInfo = #{ + title => <<"EMQX Data Actions and Sources Schema">>, + version => ?SCHEMA_VERSION + }, gen_api_schema_json_iodata(emqx_bridge_v2_api, SchemaInfo). connectors_schema_json() -> - SchemaInfo = #{title => <<"EMQX Connectors Schema">>, version => <<"0.1.0">>}, + SchemaInfo = #{ + title => <<"EMQX Connectors Schema">>, + version => ?SCHEMA_VERSION + }, gen_api_schema_json_iodata(emqx_connector_api, SchemaInfo). gen_api_schema_json_iodata(SchemaMod, SchemaInfo) -> From df458b98d77f8c76c17842bdceec2ff765ea0318 Mon Sep 17 00:00:00 2001 From: zmstone Date: Tue, 16 Apr 2024 17:28:34 +0200 Subject: [PATCH 158/234] refactor(dashboard_schema): no need to translate labels the trans_label implementation was ugly, it compares an anonymous function to check if the label should be translated. since we have stopped generating i18n message ids for dashboard schema, this entire function is now stale, so this function is deleted. --- .../src/emqx_dashboard_swagger.erl | 36 +++---------------- 1 file changed, 5 insertions(+), 31 deletions(-) diff --git a/apps/emqx_dashboard/src/emqx_dashboard_swagger.erl b/apps/emqx_dashboard/src/emqx_dashboard_swagger.erl index dc188426e..7a5ea1939 100644 --- a/apps/emqx_dashboard/src/emqx_dashboard_swagger.erl +++ b/apps/emqx_dashboard/src/emqx_dashboard_swagger.erl @@ -652,19 +652,6 @@ trans_required(Spec, true, _) -> Spec#{required => true}; trans_required(Spec, _, path) -> Spec#{required => true}; trans_required(Spec, _, _) -> Spec. -trans_desc(Init, Hocon, Func, Name, Options) -> - Spec0 = trans_description(Init, Hocon, Options), - case Func =:= fun hocon_schema_to_spec/2 of - true -> - Spec0; - false -> - Spec1 = trans_label(Spec0, Hocon, Name, Options), - case Spec1 of - #{description := _} -> Spec1; - _ -> Spec1 - end - end. - trans_description(Spec, Hocon, Options) -> Desc = case desc_struct(Hocon) of @@ -702,19 +689,6 @@ get_i18n_text(Lang, Namespace, Id, Tag, Default) -> get_lang(#{i18n_lang := Lang}) -> Lang; get_lang(_) -> emqx:get_config([dashboard, i18n_lang]). -trans_label(Spec, Hocon, Default, Options) -> - Label = - case desc_struct(Hocon) of - ?DESC(_, _) = Struct -> get_i18n(<<"label">>, Struct, Default, Options); - _ -> Default - end, - case Label =:= undefined of - true -> - Spec; - false -> - Spec#{label => Label} - end. - desc_struct(Hocon) -> R = case hocon_schema:field_schema(Hocon, desc) of @@ -772,7 +746,7 @@ response(Status, #{content := _} = Content, {Acc, RefsAcc, Module, Options}) -> response(Status, ?REF(StructName), {Acc, RefsAcc, Module, Options}) -> response(Status, ?R_REF(Module, StructName), {Acc, RefsAcc, Module, Options}); response(Status, ?R_REF(_Mod, _Name) = RRef, {Acc, RefsAcc, Module, Options}) -> - SchemaToSpec = schema_converter(Options), + SchemaToSpec = get_schema_converter(Options), {Spec, Refs} = SchemaToSpec(RRef, Module), Content = content(Spec), { @@ -910,7 +884,7 @@ parse_object(PropList = [_ | _], Module, Options) when is_list(PropList) -> parse_object(Other, Module, Options) -> erlang:throw( {error, #{ - msg => <<"Object only supports not empty proplists">>, + msg => <<"Object only supports non-empty fields list">>, args => Other, module => Module, options => Options @@ -950,10 +924,10 @@ parse_object_loop([{Name, Hocon} | Rest], Module, Options, Props, Required, Refs true -> HoconType = hocon_schema:field_schema(Hocon, type), Init0 = init_prop([default | ?DEFAULT_FIELDS], #{}, Hocon), - SchemaToSpec = schema_converter(Options), + SchemaToSpec = get_schema_converter(Options), Init = maps:remove( summary, - trans_desc(Init0, Hocon, SchemaToSpec, NameBin, Options) + trans_description(Init0, Hocon, Options) ), {Prop, Refs1} = SchemaToSpec(HoconType, Module), NewRequiredAcc = @@ -1002,7 +976,7 @@ to_ref(Mod, StructName, Acc, RefsAcc) -> Ref = #{<<"$ref">> => ?TO_COMPONENTS_PARAM(Mod, StructName)}, {[Ref | Acc], [{Mod, StructName, parameter} | RefsAcc]}. -schema_converter(Options) -> +get_schema_converter(Options) -> maps:get(schema_converter, Options, fun hocon_schema_to_spec/2). hocon_error_msg(Reason) -> From 3e0c649e8e5cd0810469771d0b21ef2e95631066 Mon Sep 17 00:00:00 2001 From: ieQu1 <99872536+ieQu1@users.noreply.github.com> Date: Fri, 12 Apr 2024 16:33:28 +0200 Subject: [PATCH 159/234] feat(sessds): Store awaiting rel --- apps/emqx/src/emqx_persistent_session_ds.erl | 111 ++++++++++++++---- .../src/emqx_persistent_session_ds_state.erl | 63 +++++++++- .../src/emqx_persistent_session_ds_subs.erl | 10 +- 3 files changed, 152 insertions(+), 32 deletions(-) diff --git a/apps/emqx/src/emqx_persistent_session_ds.erl b/apps/emqx/src/emqx_persistent_session_ds.erl index 0f1e77370..4517fa1b7 100644 --- a/apps/emqx/src/emqx_persistent_session_ds.erl +++ b/apps/emqx/src/emqx_persistent_session_ds.erl @@ -184,7 +184,9 @@ seqno_q2_dup, seqno_q2_rec, seqno_q2_next, - n_streams + n_streams, + awaiting_rel_cnt, + awaiting_rel_max ]). %% @@ -206,7 +208,8 @@ open(#{clientid := ClientID} = ClientInfo, ConnInfo, MaybeWillMsg, Conf) -> ok = emqx_cm:takeover_kick(ClientID), case session_open(ClientID, ClientInfo, ConnInfo, MaybeWillMsg) of Session0 = #{} -> - Session = Session0#{props => Conf}, + Session1 = Session0#{props => Conf}, + Session = do_expire(ClientInfo, Session1), {true, ensure_timers(Session), []}; false -> false @@ -262,21 +265,21 @@ info(inflight_max, #{inflight := Inflight}) -> emqx_persistent_session_ds_inflight:receive_maximum(Inflight); info(retry_interval, #{props := Conf}) -> maps:get(retry_interval, Conf); -% info(mqueue, #sessmem{mqueue = MQueue}) -> -% MQueue; info(mqueue_len, #{inflight := Inflight}) -> emqx_persistent_session_ds_inflight:n_buffered(all, Inflight); -% info(mqueue_max, #sessmem{mqueue = MQueue}) -> -% emqx_mqueue:max_len(MQueue); info(mqueue_dropped, _Session) -> 0; %% info(next_pkt_id, #{s := S}) -> %% {PacketId, _} = emqx_persistent_message_ds_replayer:next_packet_id(S), %% PacketId; -% info(awaiting_rel, #sessmem{awaiting_rel = AwaitingRel}) -> -% AwaitingRel; -%% info(awaiting_rel_cnt, #{s := S}) -> -%% seqno_diff(?QOS_2, ?rec, ?committed(?QOS_2), S); +info(awaiting_rel, #{s := S}) -> + emqx_persistent_session_ds_state:fold_awaiting_rel(fun maps:put/3, #{}, S); +info(awaiting_rel_max, #{props := Conf}) -> + maps:get(max_awaiting_rel, Conf); +info(awaiting_rel_cnt, #{s := S}) -> + emqx_persistent_session_ds_state:n_awaiting_rel(S); +info(await_rel_timeout, #{props := Conf}) -> + maps:get(await_rel_timeout, Conf); info(seqno_q1_comm, #{s := S}) -> emqx_persistent_session_ds_state:get_seqno(?committed(?QOS_1), S); info(seqno_q1_dup, #{s := S}) -> @@ -292,17 +295,7 @@ info(seqno_q2_rec, #{s := S}) -> info(seqno_q2_next, #{s := S}) -> emqx_persistent_session_ds_state:get_seqno(?next(?QOS_2), S); info(n_streams, #{s := S}) -> - emqx_persistent_session_ds_state:fold_streams( - fun(_, _, Acc) -> Acc + 1 end, - 0, - S - ); -info(awaiting_rel_max, #{props := Conf}) -> - maps:get(max_awaiting_rel, Conf); -info(await_rel_timeout, #{props := _Conf}) -> - %% TODO: currently this setting is ignored: - %% maps:get(await_rel_timeout, Conf). - 0; + emqx_persistent_session_ds_state:n_streams(S); info({MsgsQ, _PagerParams}, _Session) when MsgsQ =:= mqueue_msgs; MsgsQ =:= inflight_msgs -> {error, not_implemented}. @@ -446,11 +439,72 @@ get_subscription(TopicFilter, #{s := S}) -> -spec publish(emqx_types:packet_id(), emqx_types:message(), session()) -> {ok, emqx_types:publish_result(), session()} | {error, emqx_types:reason_code()}. +publish( + PacketId, + Msg = #message{qos = ?QOS_2, timestamp = Ts}, + Session = #{s := S0} +) -> + case is_awaiting_full(Session) of + false -> + case emqx_persistent_session_ds_state:get_awaiting_rel(PacketId, S0) of + undefined -> + Results = emqx_broker:publish(Msg), + S = emqx_persistent_session_ds_state:put_awaiting_rel(PacketId, Ts, S0), + {ok, Results, Session#{s => S}}; + _Ts -> + {error, ?RC_PACKET_IDENTIFIER_IN_USE} + end; + true -> + {error, ?RC_RECEIVE_MAXIMUM_EXCEEDED} + end; publish(_PacketId, Msg, Session) -> - %% TODO: QoS2 Result = emqx_broker:publish(Msg), {ok, Result, Session}. +is_awaiting_full(#{s := S, props := Props}) -> + emqx_persistent_session_ds_state:n_awaiting_rel(S) >= + maps:get(max_awaiting_rel, Props, infinity). + +-spec expire(emqx_types:clientinfo(), session()) -> + {ok, [], timeout(), session()} | {ok, [], session()}. +expire(ClientInfo, Session0 = #{props := Props}) -> + Session = #{s := S} = do_expire(ClientInfo, Session0), + case emqx_persistent_session_ds_state:n_awaiting_rel(S) of + 0 -> + {ok, [], Session}; + _ -> + AwaitRelTimeout = maps:get(await_rel_timeout, Props), + {ok, [], AwaitRelTimeout, Session} + end. + +do_expire(ClientInfo, Session = #{s := S0, props := Props}) -> + %% 1. Find expired packet IDs: + Now = erlang:system_time(millisecond), + AwaitRelTimeout = maps:get(await_rel_timeout, Props), + ExpiredPacketIds = + emqx_persistent_session_ds_state:fold_awaiting_rel( + fun(PacketId, Ts, Acc) -> + Age = Now - Ts, + case Age > AwaitRelTimeout of + true -> + [PacketId | Acc]; + false -> + Acc + end + end, + [], + S0 + ), + %% 2. Perform side effects: + _ = emqx_session_events:handle_event(ClientInfo, {expired_rel, length(ExpiredPacketIds)}), + %% 3. Update state: + S = lists:foldl( + fun emqx_persistent_session_ds_state:del_awaiting_rel/2, + S0, + ExpiredPacketIds + ), + Session#{s => S}. + %%-------------------------------------------------------------------- %% Client -> Broker: PUBACK %%-------------------------------------------------------------------- @@ -487,9 +541,14 @@ pubrec(PacketId, Session0) -> -spec pubrel(emqx_types:packet_id(), session()) -> {ok, session()} | {error, emqx_types:reason_code()}. -pubrel(_PacketId, Session = #{}) -> - % TODO: stub - {ok, Session}. +pubrel(PacketId, Session = #{s := S0}) -> + case emqx_persistent_session_ds_state:get_awaiting_rel(PacketId, S0) of + undefined -> + {error, ?RC_PACKET_IDENTIFIER_NOT_FOUND}; + _TS -> + S = emqx_persistent_session_ds_state:del_awaiting_rel(PacketId, S0), + {ok, Session#{s => S}} + end. %%-------------------------------------------------------------------- %% Client -> Broker: PUBCOMP @@ -562,6 +621,8 @@ handle_timeout(_ClientInfo, #req_sync{from = From, ref = Ref}, Session = #{s := S = emqx_persistent_session_ds_state:commit(S0), From ! Ref, {ok, [], Session#{s => S}}; +handle_timeout(ClientInfo, expire_awaiting_rel, Session) -> + expire(ClientInfo, Session); handle_timeout(_ClientInfo, Timeout, Session) -> ?SLOG(warning, #{msg => "unknown_ds_timeout", timeout => Timeout}), {ok, [], Session}. diff --git a/apps/emqx/src/emqx_persistent_session_ds_state.erl b/apps/emqx/src/emqx_persistent_session_ds_state.erl index 28297964d..fc2da1317 100644 --- a/apps/emqx/src/emqx_persistent_session_ds_state.erl +++ b/apps/emqx/src/emqx_persistent_session_ds_state.erl @@ -34,10 +34,17 @@ -export([get_will_message/1, set_will_message/2, clear_will_message/1, clear_will_message_now/1]). -export([get_peername/1, set_peername/2]). -export([new_id/1]). --export([get_stream/2, put_stream/3, del_stream/2, fold_streams/3]). +-export([get_stream/2, put_stream/3, del_stream/2, fold_streams/3, n_streams/1]). -export([get_seqno/2, put_seqno/3]). -export([get_rank/2, put_rank/3, del_rank/2, fold_ranks/3]). -export([get_subscriptions/1, put_subscription/4, del_subscription/3]). +-export([ + get_awaiting_rel/2, + put_awaiting_rel/3, + del_awaiting_rel/2, + fold_awaiting_rel/3, + n_awaiting_rel/1 +]). -export([make_session_iterator/0, session_iterator_next/2]). @@ -117,7 +124,8 @@ subscriptions := subscriptions(), seqnos := pmap(seqno_type(), emqx_persistent_session_ds:seqno()), streams := pmap(emqx_ds:stream(), emqx_persistent_session_ds:stream_state()), - ranks := pmap(term(), integer()) + ranks := pmap(term(), integer()), + awaiting_rel := pmap(emqx_types:packet_id(), _Timestamp :: integer()) }. -define(session_tab, emqx_ds_session_tab). @@ -125,7 +133,8 @@ -define(stream_tab, emqx_ds_session_streams). -define(seqno_tab, emqx_ds_session_seqnos). -define(rank_tab, emqx_ds_session_ranks). --define(pmap_tables, [?stream_tab, ?seqno_tab, ?rank_tab, ?subscription_tab]). +-define(awaiting_rel_tab, emqx_ds_session_awaiting_rel). +-define(pmap_tables, [?stream_tab, ?seqno_tab, ?rank_tab, ?subscription_tab, ?awaiting_rel_tab]). %% Enable this flag if you suspect some code breaks the sequence: -ifndef(CHECK_SEQNO). @@ -167,6 +176,7 @@ open(SessionId) -> streams => pmap_open(?stream_tab, SessionId), seqnos => pmap_open(?seqno_tab, SessionId), ranks => pmap_open(?rank_tab, SessionId), + awaiting_rel => pmap_open(?awaiting_rel_tab, SessionId), ?unset_dirty }, {ok, Rec}; @@ -190,7 +200,8 @@ format(#{ subscriptions := SubsGBT, streams := Streams, seqnos := Seqnos, - ranks := Ranks + ranks := Ranks, + awaiting_rel := AwaitingRel }) -> Subs = emqx_topic_gbt:fold( fun(Key, Sub, Acc) -> @@ -204,7 +215,8 @@ format(#{ subscriptions => Subs, streams => pmap_format(Streams), seqnos => pmap_format(Seqnos), - ranks => pmap_format(Ranks) + ranks => pmap_format(Ranks), + awaiting_rel => pmap_format(AwaitingRel) }. -spec list_sessions() -> [emqx_persistent_session_ds:id()]. @@ -229,7 +241,8 @@ commit( metadata := Metadata, streams := Streams, seqnos := SeqNos, - ranks := Ranks + ranks := Ranks, + awaiting_rel := AwaitingRel } ) -> check_sequence(Rec), @@ -239,6 +252,7 @@ commit( streams => pmap_commit(SessionId, Streams), seqnos => pmap_commit(SessionId, SeqNos), ranks => pmap_commit(SessionId, Ranks), + awaiting_rel => pmap_commit(SessionId, AwaitingRel), ?unset_dirty } end). @@ -254,6 +268,7 @@ create_new(SessionId) -> streams => pmap_open(?stream_tab, SessionId), seqnos => pmap_open(?seqno_tab, SessionId), ranks => pmap_open(?rank_tab, SessionId), + awaiting_rel => pmap_open(?awaiting_rel_tab, SessionId), ?set_dirty } end). @@ -382,6 +397,10 @@ del_stream(Key, Rec) -> fold_streams(Fun, Acc, Rec) -> gen_fold(streams, Fun, Acc, Rec). +-spec n_streams(t()) -> non_neg_integer(). +n_streams(Rec) -> + gen_size(streams, Rec). + %% -spec get_seqno(seqno_type(), t()) -> emqx_persistent_session_ds:seqno() | undefined. @@ -412,6 +431,30 @@ del_rank(Key, Rec) -> fold_ranks(Fun, Acc, Rec) -> gen_fold(ranks, Fun, Acc, Rec). +%% + +-spec get_awaiting_rel(emqx_types:packet_id(), t()) -> integer() | undefined. +get_awaiting_rel(Key, Rec) -> + gen_get(awaiting_rel, Key, Rec). + +-spec put_awaiting_rel(emqx_types:packet_id(), _Timestamp :: integer(), t()) -> t(). +put_awaiting_rel(Key, Val, Rec) -> + gen_put(awaiting_rel, Key, Val, Rec). + +-spec del_awaiting_rel(emqx_types:packet_id(), t()) -> t(). +del_awaiting_rel(Key, Rec) -> + gen_del(awaiting_rel, Key, Rec). + +-spec fold_awaiting_rel(fun(), Acc, t()) -> Acc. +fold_awaiting_rel(Fun, Acc, Rec) -> + gen_fold(awaiting_rel, Fun, Acc, Rec). + +-spec n_awaiting_rel(t()) -> non_neg_integer(). +n_awaiting_rel(Rec) -> + gen_size(awaiting_rel, Rec). + +%% + -spec make_session_iterator() -> session_iterator(). make_session_iterator() -> mnesia:dirty_first(?session_tab). @@ -475,6 +518,10 @@ gen_del(Field, Key, Rec) -> Rec#{?set_dirty} ). +gen_size(Field, Rec) -> + check_sequence(Rec), + pmap_size(maps:get(Field, Rec)). + %% read_subscriptions(SessionId) -> @@ -547,6 +594,10 @@ pmap_commit( pmap_format(#pmap{cache = Cache}) -> Cache. +-spec pmap_size(pmap(_K, _V)) -> non_neg_integer(). +pmap_size(#pmap{cache = Cache}) -> + maps:size(Cache). + %% Functions dealing with set tables: kv_persist(Tab, SessionId, Val0) -> diff --git a/apps/emqx/src/emqx_persistent_session_ds_subs.erl b/apps/emqx/src/emqx_persistent_session_ds_subs.erl index 92f17b108..9071ad9d9 100644 --- a/apps/emqx/src/emqx_persistent_session_ds_subs.erl +++ b/apps/emqx/src/emqx_persistent_session_ds_subs.erl @@ -24,7 +24,15 @@ -module(emqx_persistent_session_ds_subs). %% API: --export([on_subscribe/3, on_unsubscribe/3, gc/1, lookup/2, to_map/1, fold/3, fold_all/3]). +-export([ + on_subscribe/3, + on_unsubscribe/3, + gc/1, + lookup/2, + to_map/1, + fold/3, + fold_all/3 +]). -export_type([]). From b30ddc206e9244050764f4bff32151b07b83ea00 Mon Sep 17 00:00:00 2001 From: ieQu1 <99872536+ieQu1@users.noreply.github.com> Date: Sat, 13 Apr 2024 01:17:32 +0200 Subject: [PATCH 160/234] fix(sessds): Immutable subscriptions This commit fixes two issues: - Behavior of overlapping subscriptions has been aligned with the in-memory session. - Fixed handling of replays when subscription changes (either by client or EMQX configuration) --- apps/emqx/src/emqx_persistent_session_ds.erl | 166 +++++++------- apps/emqx/src/emqx_persistent_session_ds.hrl | 4 +- .../src/emqx_persistent_session_ds_state.erl | 205 ++++++++++-------- ...persistent_session_ds_stream_scheduler.erl | 46 +++- .../src/emqx_persistent_session_ds_subs.erl | 183 ++++++++++------ apps/emqx/src/emqx_session.erl | 5 + ...emqx_persistent_session_ds_state_tests.erl | 64 +----- 7 files changed, 370 insertions(+), 303 deletions(-) diff --git a/apps/emqx/src/emqx_persistent_session_ds.erl b/apps/emqx/src/emqx_persistent_session_ds.erl index 4517fa1b7..0829b3fd3 100644 --- a/apps/emqx/src/emqx_persistent_session_ds.erl +++ b/apps/emqx/src/emqx_persistent_session_ds.erl @@ -116,15 +116,42 @@ %% Currently, this is the clientid. We avoid `emqx_types:clientid()' because that can be %% an atom, in theory (?). -type id() :: binary(). --type topic_filter() :: emqx_types:topic(). +-type topic_filter() :: emqx_types:topic() | #share{}. + +%% Subscription and subscription states: +%% +%% Persistent sessions cannot simply update or delete subscriptions, +%% since subscription parameters must be exactly the same during +%% replay. +%% +%% To solve this problem, we store subscriptions in a twofold manner: +%% +%% - `subscription' is an object that holds up-to-date information +%% about the client's subscription and a reference to the latest +%% subscription state id +%% +%% - `subscription_state' is an immutable object that holds +%% information about the subcription parameters at a certain point of +%% time +%% +%% New subscription states are created whenever the client subscribes +%% to a topics, or updates an existing subscription. +%% +%% Stream replay states contain references to the subscription states. +%% +%% Outdated subscription states are discarded when they are not +%% referenced by either subscription or stream replay state objects. -type subscription_id() :: integer(). +%% This type is a result of merging +%% `emqx_persistent_session_ds_subs:subscription()' with its current +%% state. -type subscription() :: #{ id := subscription_id(), start_time := emqx_ds:time(), - props := map(), - deleted := boolean() + current_state := emqx_persistent_session_ds_subs:subscription_state_id(), + subopts := map() }. -define(TIMER_PULL, timer_pull). @@ -252,7 +279,7 @@ info(is_persistent, #{}) -> info(subscriptions, #{s := S}) -> emqx_persistent_session_ds_subs:to_map(S); info(subscriptions_cnt, #{s := S}) -> - emqx_topic_gbt:size(emqx_persistent_session_ds_state:get_subscriptions(S)); + emqx_persistent_session_ds_state:n_subscriptions(S); info(subscriptions_max, #{props := Conf}) -> maps:get(max_subscriptions, Conf); info(upgrade_qos, #{props := Conf}) -> @@ -340,53 +367,20 @@ subscribe( subscribe( TopicFilter, SubOpts, - Session = #{id := ID, s := S0} + Session = #{id := ID, s := S0, props := #{upgrade_qos := UpgradeQoS}} ) -> - case emqx_persistent_session_ds_subs:lookup(TopicFilter, S0) of - undefined -> - %% TODO: max subscriptions - - %% N.B.: we chose to update the router before adding the - %% subscription to the session/iterator table. The - %% reasoning for this is as follows: - %% - %% Messages matching this topic filter should start to be - %% persisted as soon as possible to avoid missing - %% messages. If this is the first such persistent session - %% subscription, it's important to do so early on. - %% - %% This could, in turn, lead to some inconsistency: if - %% such a route gets created but the session/iterator data - %% fails to be updated accordingly, we have a dangling - %% route. To remove such dangling routes, we may have a - %% periodic GC process that removes routes that do not - %% have a matching persistent subscription. Also, route - %% operations use dirty mnesia operations, which - %% inherently have room for inconsistencies. - %% - %% In practice, we use the iterator reference table as a - %% source of truth, since it is guarded by a transaction - %% context: we consider a subscription operation to be - %% successful if it ended up changing this table. Both - %% router and iterator information can be reconstructed - %% from this table, if needed. - ok = emqx_persistent_session_ds_router:do_add_route(TopicFilter, ID), - {SubId, S1} = emqx_persistent_session_ds_state:new_id(S0), - Subscription = #{ - start_time => now_ms(), - props => SubOpts, - id => SubId, - deleted => false - }, - IsNew = true; - Subscription0 = #{} -> - Subscription = Subscription0#{props => SubOpts}, - IsNew = false, - S1 = S0 + {UpdateRouter, S1} = emqx_persistent_session_ds_subs:on_subscribe( + TopicFilter, UpgradeQoS, SubOpts, S0 + ), + case UpdateRouter of + true -> + ok = emqx_persistent_session_ds_router:do_add_route(TopicFilter, ID); + false -> + ok end, - S = emqx_persistent_session_ds_subs:on_subscribe(TopicFilter, Subscription, S1), + S = emqx_persistent_session_ds_state:commit(S1), ?tp(persistent_session_ds_subscription_added, #{ - topic_filter => TopicFilter, sub => Subscription, is_new => IsNew + topic_filter => TopicFilter, is_new => UpdateRouter }), {ok, Session#{s => S}}. @@ -399,15 +393,15 @@ unsubscribe( case emqx_persistent_session_ds_subs:lookup(TopicFilter, S0) of undefined -> {error, ?RC_NO_SUBSCRIPTION_EXISTED}; - Subscription = #{props := SubOpts} -> + Subscription = #{subopts := SubOpts} -> S = do_unsubscribe(ID, TopicFilter, Subscription, S0), {ok, Session#{s => S}, SubOpts} end. -spec do_unsubscribe(id(), topic_filter(), subscription(), emqx_persistent_session_ds_state:t()) -> emqx_persistent_session_ds_state:t(). -do_unsubscribe(SessionId, TopicFilter, Subscription = #{id := SubId}, S0) -> - S1 = emqx_persistent_session_ds_subs:on_unsubscribe(TopicFilter, Subscription, S0), +do_unsubscribe(SessionId, TopicFilter, #{id := SubId}, S0) -> + S1 = emqx_persistent_session_ds_subs:on_unsubscribe(TopicFilter, S0), ?tp(persistent_session_ds_subscription_delete, #{ session_id => SessionId, topic_filter => TopicFilter }), @@ -426,7 +420,7 @@ get_subscription(#share{}, _) -> undefined; get_subscription(TopicFilter, #{s := S}) -> case emqx_persistent_session_ds_subs:lookup(TopicFilter, S) of - _Subscription = #{props := SubOpts} -> + #{subopts := SubOpts} -> SubOpts; undefined -> undefined @@ -716,7 +710,7 @@ list_client_subscriptions(ClientId) -> %% TODO: this is not the most optimal implementation, since it %% should be possible to avoid reading extra data (streams, etc.) case print_session(ClientId) of - Sess = #{s := #{subscriptions := Subs}} -> + Sess = #{s := #{subscriptions := Subs, subscription_states := SStates}} -> Node = case Sess of #{'_alive' := {true, Pid}} -> @@ -726,8 +720,9 @@ list_client_subscriptions(ClientId) -> end, SubList = maps:fold( - fun(Topic, #{props := SubProps}, Acc) -> - Elem = {Topic, SubProps}, + fun(Topic, #{current_state := CS}, Acc) -> + #{subopts := SubOpts} = maps:get(CS, SStates), + Elem = {Topic, SubOpts}, [Elem | Acc] end, [], @@ -945,22 +940,31 @@ new_batch({StreamKey, Srs0}, BatchSize, Session0 = #{s := S0}, ClientInfo) -> Session0 end. -enqueue_batch(IsReplay, BatchSize, Srs0, Session = #{inflight := Inflight0}, ClientInfo) -> +enqueue_batch(IsReplay, BatchSize, Srs0, Session = #{inflight := Inflight0, s := S}, ClientInfo) -> #srs{ it_begin = ItBegin0, it_end = ItEnd0, first_seqno_qos1 = FirstSeqnoQos1, - first_seqno_qos2 = FirstSeqnoQos2 + first_seqno_qos2 = FirstSeqnoQos2, + sub_state_id = SubStateId } = Srs0, ItBegin = case IsReplay of true -> ItBegin0; false -> ItEnd0 end, + SubState = #{} = emqx_persistent_session_ds_state:get_subscription_state(SubStateId, S), case emqx_ds:next(?PERSISTENT_MESSAGE_DB, ItBegin, BatchSize) of {ok, ItEnd, Messages} -> {Inflight, LastSeqnoQos1, LastSeqnoQos2} = process_batch( - IsReplay, Session, ClientInfo, FirstSeqnoQos1, FirstSeqnoQos2, Messages, Inflight0 + IsReplay, + Session, + SubState, + ClientInfo, + FirstSeqnoQos1, + FirstSeqnoQos2, + Messages, + Inflight0 ), Srs = Srs0#srs{ it_begin = ItBegin, @@ -984,27 +988,29 @@ enqueue_batch(IsReplay, BatchSize, Srs0, Session = #{inflight := Inflight0}, Cli %% key_of_iter(#{3 := #{3 := #{5 := K}}}) -> %% K. -process_batch(_IsReplay, _Session, _ClientInfo, LastSeqNoQos1, LastSeqNoQos2, [], Inflight) -> +process_batch( + _IsReplay, _Session, _SubState, _ClientInfo, LastSeqNoQos1, LastSeqNoQos2, [], Inflight +) -> {Inflight, LastSeqNoQos1, LastSeqNoQos2}; process_batch( - IsReplay, Session, ClientInfo, FirstSeqNoQos1, FirstSeqNoQos2, [KV | Messages], Inflight0 + IsReplay, + Session, + SubState, + ClientInfo, + FirstSeqNoQos1, + FirstSeqNoQos2, + [KV | Messages], + Inflight0 ) -> - #{s := S, props := #{upgrade_qos := UpgradeQoS}} = Session, - {_DsMsgKey, Msg0 = #message{topic = Topic}} = KV, + #{s := S} = Session, + #{upgrade_qos := UpgradeQoS, subopts := SubOpts} = SubState, + {_DsMsgKey, Msg0} = KV, Comm1 = emqx_persistent_session_ds_state:get_seqno(?committed(?QOS_1), S), Comm2 = emqx_persistent_session_ds_state:get_seqno(?committed(?QOS_2), S), Dup1 = emqx_persistent_session_ds_state:get_seqno(?dup(?QOS_1), S), Dup2 = emqx_persistent_session_ds_state:get_seqno(?dup(?QOS_2), S), Rec = emqx_persistent_session_ds_state:get_seqno(?rec, S), - Subs = emqx_persistent_session_ds_state:get_subscriptions(S), - Msgs = [ - Msg - || SubMatch <- emqx_topic_gbt:matches(Topic, Subs, []), - Msg <- begin - #{props := SubOpts} = emqx_topic_gbt:get_record(SubMatch, Subs), - emqx_session:enrich_message(ClientInfo, Msg0, SubOpts, UpgradeQoS) - end - ], + Msgs = emqx_session:enrich_message(ClientInfo, Msg0, SubOpts, UpgradeQoS), {Inflight, LastSeqNoQos1, LastSeqNoQos2} = lists:foldl( fun(Msg = #message{qos = Qos}, {Acc, SeqNoQos10, SeqNoQos20}) -> case Qos of @@ -1060,7 +1066,7 @@ process_batch( Msgs ), process_batch( - IsReplay, Session, ClientInfo, LastSeqNoQos1, LastSeqNoQos2, Messages, Inflight + IsReplay, Session, SubState, ClientInfo, LastSeqNoQos1, LastSeqNoQos2, Messages, Inflight ). %%-------------------------------------------------------------------- @@ -1077,15 +1083,13 @@ enqueue_transient(ClientInfo, Msg0, Session = #{s := S, props := #{upgrade_qos : %% queued messages. Since streams in this DB are exclusive to the %% session, messages from the queue can be dropped as soon as they %% are acked. - Subs = emqx_persistent_session_ds_state:get_subscriptions(S), - Msgs = [ - Msg - || SubMatch <- emqx_topic_gbt:matches(Msg0#message.topic, Subs, []), - Msg <- begin - #{props := SubOpts} = emqx_topic_gbt:get_record(SubMatch, Subs), - emqx_session:enrich_message(ClientInfo, Msg0, SubOpts, UpgradeQoS) - end - ], + case emqx_persistent_session_ds_state:get_subscription(Msg0#message.topic, S) of + #{current_state := CS} -> + #{subopts := SubOpts} = emqx_persistent_session_ds_state:get_subscription_state(CS, S); + undefined -> + SubOpts = undefined + end, + Msgs = emqx_session:enrich_message(ClientInfo, Msg0, SubOpts, UpgradeQoS), lists:foldl(fun do_enqueue_transient/2, Session, Msgs). do_enqueue_transient(Msg = #message{qos = Qos}, Session = #{inflight := Inflight0, s := S0}) -> diff --git a/apps/emqx/src/emqx_persistent_session_ds.hrl b/apps/emqx/src/emqx_persistent_session_ds.hrl index 56862dfa5..e2b52e36d 100644 --- a/apps/emqx/src/emqx_persistent_session_ds.hrl +++ b/apps/emqx/src/emqx_persistent_session_ds.hrl @@ -65,7 +65,9 @@ last_seqno_qos2 = 0 :: emqx_persistent_session_ds:seqno(), %% This stream belongs to an unsubscribed topic-filter, and is %% marked for deletion: - unsubscribed = false :: boolean() + unsubscribed = false :: boolean(), + %% Reference to the subscription state: + sub_state_id :: emqx_persistent_session_ds_subs:subscription_state_id() }). %% Session metadata keys: diff --git a/apps/emqx/src/emqx_persistent_session_ds_state.erl b/apps/emqx/src/emqx_persistent_session_ds_state.erl index fc2da1317..90d86bb1d 100644 --- a/apps/emqx/src/emqx_persistent_session_ds_state.erl +++ b/apps/emqx/src/emqx_persistent_session_ds_state.erl @@ -37,7 +37,19 @@ -export([get_stream/2, put_stream/3, del_stream/2, fold_streams/3, n_streams/1]). -export([get_seqno/2, put_seqno/3]). -export([get_rank/2, put_rank/3, del_rank/2, fold_ranks/3]). --export([get_subscriptions/1, put_subscription/4, del_subscription/3]). +-export([ + get_subscription_state/2, + fold_subscription_states/3, + put_subscription_state/3, + del_subscription_state/2 +]). +-export([ + get_subscription/2, + fold_subscriptions/3, + n_subscriptions/1, + put_subscription/3, + del_subscription/2 +]). -export([ get_awaiting_rel/2, put_awaiting_rel/3, @@ -51,7 +63,6 @@ -export_type([ t/0, metadata/0, - subscriptions/0, seqno_type/0, stream_key/0, rank_key/0, @@ -69,8 +80,6 @@ -type message() :: emqx_types:message(). --type subscriptions() :: emqx_topic_gbt:t(_SubId, emqx_persistent_session_ds:subscription()). - -opaque session_iterator() :: emqx_persistent_session_ds:id() | '$end_of_table'. %% Generic key-value wrapper that is used for exporting arbitrary @@ -121,7 +130,13 @@ id := emqx_persistent_session_ds:id(), dirty := boolean(), metadata := metadata(), - subscriptions := subscriptions(), + subscriptions := pmap( + emqx_persistent_session_ds:topic_filter(), emqx_persistent_session_ds_subs:subscription() + ), + subscription_states := pmap( + emqx_persistent_session_ds_subs:subscription_state_id(), + emqx_persistent_session_ds_subs:subscription_state() + ), seqnos := pmap(seqno_type(), emqx_persistent_session_ds:seqno()), streams := pmap(emqx_ds:stream(), emqx_persistent_session_ds:stream_state()), ranks := pmap(term(), integer()), @@ -130,11 +145,20 @@ -define(session_tab, emqx_ds_session_tab). -define(subscription_tab, emqx_ds_session_subscriptions). +-define(subscription_states_tab, emqx_ds_session_subscription_states). -define(stream_tab, emqx_ds_session_streams). -define(seqno_tab, emqx_ds_session_seqnos). -define(rank_tab, emqx_ds_session_ranks). -define(awaiting_rel_tab, emqx_ds_session_awaiting_rel). --define(pmap_tables, [?stream_tab, ?seqno_tab, ?rank_tab, ?subscription_tab, ?awaiting_rel_tab]). + +-define(pmaps, [ + {subscriptions, ?subscription_tab}, + {subscription_states, ?subscription_states_tab}, + {streams, ?stream_tab}, + {seqnos, ?seqno_tab}, + {ranks, ?rank_tab}, + {awaiting_rel, ?awaiting_rel_tab} +]). %% Enable this flag if you suspect some code breaks the sequence: -ifndef(CHECK_SEQNO). @@ -161,24 +185,25 @@ create_tables() -> {attributes, record_info(fields, kv)} ] ), - [create_kv_pmap_table(Table) || Table <- ?pmap_tables], - mria:wait_for_tables([?session_tab | ?pmap_tables]). + {_, PmapTables} = lists:unzip(?pmaps), + [create_kv_pmap_table(Table) || Table <- PmapTables], + mria:wait_for_tables([?session_tab | PmapTables]). -spec open(emqx_persistent_session_ds:id()) -> {ok, t()} | undefined. open(SessionId) -> ro_transaction(fun() -> case kv_restore(?session_tab, SessionId) of [Metadata] -> - Rec = #{ - id => SessionId, - metadata => Metadata, - subscriptions => read_subscriptions(SessionId), - streams => pmap_open(?stream_tab, SessionId), - seqnos => pmap_open(?seqno_tab, SessionId), - ranks => pmap_open(?rank_tab, SessionId), - awaiting_rel => pmap_open(?awaiting_rel_tab, SessionId), - ?unset_dirty - }, + Rec = update_pmaps( + fun(_Pmap, Table) -> + pmap_open(Table, SessionId) + end, + #{ + id => SessionId, + metadata => Metadata, + ?unset_dirty + } + ), {ok, Rec}; [] -> undefined @@ -195,29 +220,13 @@ print_session(SessionId) -> end. -spec format(t()) -> map(). -format(#{ - metadata := Metadata, - subscriptions := SubsGBT, - streams := Streams, - seqnos := Seqnos, - ranks := Ranks, - awaiting_rel := AwaitingRel -}) -> - Subs = emqx_topic_gbt:fold( - fun(Key, Sub, Acc) -> - maps:put(emqx_topic_gbt:get_topic(Key), Sub, Acc) +format(Rec) -> + update_pmaps( + fun(Pmap, _Table) -> + pmap_format(Pmap) end, - #{}, - SubsGBT - ), - #{ - metadata => Metadata, - subscriptions => Subs, - streams => pmap_format(Streams), - seqnos => pmap_format(Seqnos), - ranks => pmap_format(Ranks), - awaiting_rel => pmap_format(AwaitingRel) - }. + maps:without([id, dirty], Rec) + ). -spec list_sessions() -> [emqx_persistent_session_ds:id()]. list_sessions() -> @@ -227,7 +236,7 @@ list_sessions() -> delete(Id) -> transaction( fun() -> - [kv_pmap_delete(Table, Id) || Table <- ?pmap_tables], + [kv_pmap_delete(Table, Id) || {_, Table} <- ?pmaps], mnesia:delete(?session_tab, Id, write) end ). @@ -238,39 +247,34 @@ commit(Rec = #{dirty := false}) -> commit( Rec = #{ id := SessionId, - metadata := Metadata, - streams := Streams, - seqnos := SeqNos, - ranks := Ranks, - awaiting_rel := AwaitingRel + metadata := Metadata } ) -> check_sequence(Rec), transaction(fun() -> kv_persist(?session_tab, SessionId, Metadata), - Rec#{ - streams => pmap_commit(SessionId, Streams), - seqnos => pmap_commit(SessionId, SeqNos), - ranks => pmap_commit(SessionId, Ranks), - awaiting_rel => pmap_commit(SessionId, AwaitingRel), - ?unset_dirty - } + update_pmaps( + fun(Pmap, _Table) -> + pmap_commit(SessionId, Pmap) + end, + Rec#{?unset_dirty} + ) end). -spec create_new(emqx_persistent_session_ds:id()) -> t(). create_new(SessionId) -> transaction(fun() -> delete(SessionId), - #{ - id => SessionId, - metadata => #{}, - subscriptions => emqx_topic_gbt:new(), - streams => pmap_open(?stream_tab, SessionId), - seqnos => pmap_open(?seqno_tab, SessionId), - ranks => pmap_open(?rank_tab, SessionId), - awaiting_rel => pmap_open(?awaiting_rel_tab, SessionId), - ?set_dirty - } + update_pmaps( + fun(_Pmap, Table) -> + pmap_open(Table, SessionId) + end, + #{ + id => SessionId, + metadata => #{}, + ?set_dirty + } + ) end). %% @@ -351,30 +355,53 @@ new_id(Rec) -> %% --spec get_subscriptions(t()) -> subscriptions(). -get_subscriptions(#{subscriptions := Subs}) -> - Subs. +-spec get_subscription(emqx_persistent_session_ds:topic_filter(), t()) -> + emqx_persistent_session_ds_subs:subscription() | undefined. +get_subscription(TopicFilter, Rec) -> + gen_get(subscriptions, TopicFilter, Rec). + +-spec fold_subscriptions(fun(), Acc, t()) -> Acc. +fold_subscriptions(Fun, Acc, Rec) -> + gen_fold(subscriptions, Fun, Acc, Rec). + +-spec n_subscriptions(t()) -> non_neg_integer(). +n_subscriptions(Rec) -> + gen_size(subscriptions, Rec). -spec put_subscription( emqx_persistent_session_ds:topic_filter(), - _SubId, - emqx_persistent_session_ds:subscription(), + emqx_persistent_session_ds_subs:subscription(), t() ) -> t(). -put_subscription(TopicFilter, SubId, Subscription, Rec = #{id := Id, subscriptions := Subs0}) -> - %% Note: currently changes to the subscriptions are persisted immediately. - Key = {TopicFilter, SubId}, - transaction(fun() -> kv_pmap_persist(?subscription_tab, Id, Key, Subscription) end), - Subs = emqx_topic_gbt:insert(TopicFilter, SubId, Subscription, Subs0), - Rec#{subscriptions => Subs}. +put_subscription(TopicFilter, Subscription, Rec) -> + gen_put(subscriptions, TopicFilter, Subscription, Rec). --spec del_subscription(emqx_persistent_session_ds:topic_filter(), _SubId, t()) -> t(). -del_subscription(TopicFilter, SubId, Rec = #{id := Id, subscriptions := Subs0}) -> - %% Note: currently the subscriptions are persisted immediately. - Key = {TopicFilter, SubId}, - transaction(fun() -> kv_pmap_delete(?subscription_tab, Id, Key) end), - Subs = emqx_topic_gbt:delete(TopicFilter, SubId, Subs0), - Rec#{subscriptions => Subs}. +-spec del_subscription(emqx_persistent_session_ds:topic_filter(), t()) -> t(). +del_subscription(TopicFilter, Rec) -> + gen_del(subscriptions, TopicFilter, Rec). + +%% + +-spec get_subscription_state(emqx_persistent_session_ds_subs:subscription_state_id(), t()) -> + emqx_persistent_session_ds_subs:subscription_state() | undefined. +get_subscription_state(SStateId, Rec) -> + gen_get(subscription_states, SStateId, Rec). + +-spec fold_subscription_states(fun(), Acc, t()) -> Acc. +fold_subscription_states(Fun, Acc, Rec) -> + gen_fold(subscription_states, Fun, Acc, Rec). + +-spec put_subscription_state( + emqx_persistent_session_ds_subs:subscription_state_id(), + emqx_persistent_session_ds_subs:subscription_state(), + t() +) -> t(). +put_subscription_state(SStateId, SState, Rec) -> + gen_put(subscription_states, SStateId, SState, Rec). + +-spec del_subscription_state(emqx_persistent_session_ds_subs:subscription_state_id(), t()) -> t(). +del_subscription_state(SStateId, Rec) -> + gen_del(subscription_states, SStateId, Rec). %% @@ -522,16 +549,16 @@ gen_size(Field, Rec) -> check_sequence(Rec), pmap_size(maps:get(Field, Rec)). -%% - -read_subscriptions(SessionId) -> - Records = kv_pmap_restore(?subscription_tab, SessionId), +-spec update_pmaps(fun((pmap(_K, _V) | undefined, atom()) -> term()), map()) -> map(). +update_pmaps(Fun, Map) -> lists:foldl( - fun({{TopicFilter, SubId}, Subscription}, Acc) -> - emqx_topic_gbt:insert(TopicFilter, SubId, Subscription, Acc) + fun({MapKey, Table}, Acc) -> + OldVal = maps:get(MapKey, Map, undefined), + Val = Fun(OldVal, Table), + maps:put(MapKey, Val, Acc) end, - emqx_topic_gbt:new(), - Records + Map, + ?pmaps ). %% diff --git a/apps/emqx/src/emqx_persistent_session_ds_stream_scheduler.erl b/apps/emqx/src/emqx_persistent_session_ds_stream_scheduler.erl index 154f59b44..1be0bdf4a 100644 --- a/apps/emqx/src/emqx_persistent_session_ds_stream_scheduler.erl +++ b/apps/emqx/src/emqx_persistent_session_ds_stream_scheduler.erl @@ -126,9 +126,10 @@ find_new_streams(S) -> renew_streams(S0) -> S1 = remove_unsubscribed_streams(S0), S2 = remove_fully_replayed_streams(S1), + S3 = update_stream_subscription_state_ids(S2), emqx_persistent_session_ds_subs:fold( fun - (Key, #{start_time := StartTime, id := SubId, deleted := false}, Acc) -> + (Key, #{start_time := StartTime, id := SubId, current_state := SStateId}, Acc) -> TopicFilter = emqx_topic:words(Key), Streams = select_streams( SubId, @@ -137,7 +138,7 @@ renew_streams(S0) -> ), lists:foldl( fun(I, Acc1) -> - ensure_iterator(TopicFilter, StartTime, SubId, I, Acc1) + ensure_iterator(TopicFilter, StartTime, SubId, SStateId, I, Acc1) end, Acc, Streams @@ -145,8 +146,8 @@ renew_streams(S0) -> (_Key, _DeletedSubscription, Acc) -> Acc end, - S2, - S2 + S3, + S3 ). -spec on_unsubscribe( @@ -201,7 +202,7 @@ is_fully_acked(Srs, S) -> %% Internal functions %%================================================================================ -ensure_iterator(TopicFilter, StartTime, SubId, {{RankX, RankY}, Stream}, S) -> +ensure_iterator(TopicFilter, StartTime, SubId, SStateId, {{RankX, RankY}, Stream}, S) -> Key = {SubId, Stream}, case emqx_persistent_session_ds_state:get_stream(Key, S) of undefined -> @@ -214,7 +215,8 @@ ensure_iterator(TopicFilter, StartTime, SubId, {{RankX, RankY}, Stream}, S) -> rank_x = RankX, rank_y = RankY, it_begin = Iterator, - it_end = Iterator + it_end = Iterator, + sub_state_id = SStateId }, emqx_persistent_session_ds_state:put_stream(Key, NewStreamState, S); {error, recoverable, Reason} -> @@ -350,6 +352,38 @@ remove_fully_replayed_streams(S0) -> S1 ). +%% @doc Update subscription state IDs for all streams that don't have unacked messages +-spec update_stream_subscription_state_ids(emqx_persistent_session_ds_state:t()) -> + emqx_persistent_session_ds_state:t(). +update_stream_subscription_state_ids(S0) -> + CommQos1 = emqx_persistent_session_ds_state:get_seqno(?committed(?QOS_1), S0), + CommQos2 = emqx_persistent_session_ds_state:get_seqno(?committed(?QOS_2), S0), + %% Find the latest state IDs for each subscription: + LastSStateIds = emqx_persistent_session_ds_state:fold_subscriptions( + fun(_, #{id := SubId, current_state := SStateId}, Acc) -> + Acc#{SubId => SStateId} + end, + #{}, + S0 + ), + %% Update subscription state IDs for fully acked streams: + emqx_persistent_session_ds_state:fold_streams( + fun + (_, #srs{unsubscribed = true}, S) -> + S; + (Key = {SubId, _Stream}, SRS0, S) -> + case is_fully_acked(CommQos1, CommQos2, SRS0) of + true -> + SRS = SRS0#srs{sub_state_id = maps:get(SubId, LastSStateIds)}, + emqx_persistent_session_ds_state:put_stream(Key, SRS, S); + false -> + S + end + end, + S0, + S0 + ). + %% @doc Compare the streams by the order in which they were replayed. compare_streams( {_KeyA, #srs{first_seqno_qos1 = A1, first_seqno_qos2 = A2}}, diff --git a/apps/emqx/src/emqx_persistent_session_ds_subs.erl b/apps/emqx/src/emqx_persistent_session_ds_subs.erl index 9071ad9d9..e9e2a97ee 100644 --- a/apps/emqx/src/emqx_persistent_session_ds_subs.erl +++ b/apps/emqx/src/emqx_persistent_session_ds_subs.erl @@ -25,21 +25,47 @@ %% API: -export([ - on_subscribe/3, - on_unsubscribe/3, + on_subscribe/4, + on_unsubscribe/2, gc/1, lookup/2, to_map/1, - fold/3, - fold_all/3 + fold/3 ]). --export_type([]). +-export_type([subscription_state_id/0, subscription/0, subscription_state/0]). + +-include("emqx_persistent_session_ds.hrl"). %%================================================================================ %% Type declarations %%================================================================================ +-type subscription() :: #{ + %% Session-unique identifier of the subscription. Other objects + %% can use it as a compact reference: + id := emqx_persistent_session_ds:subscription_id(), + %% Reference to the current subscription state: + current_state := subscription_state_id(), + %% Time when the subscription was added: + start_time := emqx_ds:time() +}. + +-type subscription_state_id() :: integer(). + +-type subscription_state() :: #{ + parent_subscription := emqx_persistent_session_ds:subscription_id(), + upgrade_qos := boolean(), + %% SubOpts: + subopts := #{ + nl => _, + qos => _, + rap => _, + subid => _, + _ => _ + } +}. + %%================================================================================ %% API functions %%================================================================================ @@ -47,41 +73,88 @@ %% @doc Process a new subscription -spec on_subscribe( emqx_persistent_session_ds:topic_filter(), - emqx_persistent_session_ds:subscription(), + boolean(), + emqx_types:subopts(), emqx_persistent_session_ds_state:t() ) -> - emqx_persistent_session_ds_state:t(). -on_subscribe(TopicFilter, Subscription, S) -> - emqx_persistent_session_ds_state:put_subscription(TopicFilter, [], Subscription, S). + {_UpdateRouter :: boolean(), emqx_persistent_session_ds_state:t()}. +on_subscribe(TopicFilter, UpgradeQoS, SubOpts, S0) -> + case emqx_persistent_session_ds_state:get_subscription(TopicFilter, S0) of + undefined -> + %% This is a new subscription: + {SubId, S1} = emqx_persistent_session_ds_state:new_id(S0), + {SStateId, S2} = emqx_persistent_session_ds_state:new_id(S1), + SState = #{parent_subscription => SubId, upgrade_qos => UpgradeQoS, subopts => SubOpts}, + S3 = emqx_persistent_session_ds_state:put_subscription_state(SStateId, SState, S2), + Subscription = #{ + id => SubId, + current_state => SStateId, + start_time => now_ms() + }, + S = emqx_persistent_session_ds_state:put_subscription(TopicFilter, Subscription, S3), + {true, S}; + Sub0 = #{current_state := SStateId0, id := SubId} -> + SState = #{parent_subscription => SubId, upgrade_qos => UpgradeQoS, subopts => SubOpts}, + case emqx_persistent_session_ds_state:get_subscription_state(SStateId0, S0) of + SState -> + %% Client resubscribed with the same parameters: + {false, S0}; + _ -> + %% Subsription parameters changed: + {SStateId, S1} = emqx_persistent_session_ds_state:new_id(S0), + S2 = emqx_persistent_session_ds_state:put_subscription_state( + SStateId, SState, S1 + ), + Sub = Sub0#{current_state => SStateId}, + S = emqx_persistent_session_ds_state:put_subscription(TopicFilter, Sub, S2), + {false, S} + end + end. %% @doc Process UNSUBSCRIBE -spec on_unsubscribe( emqx_persistent_session_ds:topic_filter(), - emqx_persistent_session_ds:subscription(), emqx_persistent_session_ds_state:t() ) -> emqx_persistent_session_ds_state:t(). -on_unsubscribe(TopicFilter, Subscription0, S0) -> - %% Note: we cannot delete the subscription immediately, since its - %% metadata can be used during replay (see `process_batch'). We - %% instead mark it as deleted, and let `subscription_gc' function - %% dispatch it later: - Subscription = Subscription0#{deleted => true}, - emqx_persistent_session_ds_state:put_subscription(TopicFilter, [], Subscription, S0). +on_unsubscribe(TopicFilter, S0) -> + emqx_persistent_session_ds_state:del_subscription(TopicFilter, S0). -%% @doc Remove subscriptions that have been marked for deletion, and -%% that don't have any unacked messages: +%% @doc Remove subscription states that don't have a parent, and that +%% don't have any unacked messages: -spec gc(emqx_persistent_session_ds_state:t()) -> emqx_persistent_session_ds_state:t(). gc(S0) -> - fold_all( - fun(TopicFilter, #{id := SubId, deleted := Deleted}, Acc) -> - case Deleted andalso has_no_unacked_streams(SubId, S0) of - true -> - emqx_persistent_session_ds_state:del_subscription(TopicFilter, [], Acc); + %% Create a set of subscription states IDs referenced either by a + %% subscription or a stream replay state: + AliveSet0 = emqx_persistent_session_ds_state:fold_subscriptions( + fun(_TopicFilter, #{current_state := SStateId}, Acc) -> + Acc#{SStateId => true} + end, + #{}, + S0 + ), + AliveSet = emqx_persistent_session_ds_state:fold_streams( + fun(_StreamId, SRS = #srs{sub_state_id = SStateId}, Acc) -> + case emqx_persistent_session_ds_stream_scheduler:is_fully_acked(SRS, S0) of false -> + Acc#{SStateId => true}; + true -> Acc end end, + AliveSet0, + S0 + ), + %% Delete dangling subscription states: + emqx_persistent_session_ds_state:fold_subscription_states( + fun(SStateId, _, S) -> + case maps:is_key(SStateId, AliveSet) of + true -> + S; + false -> + emqx_persistent_session_ds_state:del_subscription_state(SStateId, S) + end + end, S0, S0 ). @@ -90,12 +163,16 @@ gc(S0) -> -spec lookup(emqx_persistent_session_ds:topic_filter(), emqx_persistent_session_ds_state:t()) -> emqx_persistent_session_ds:subscription() | undefined. lookup(TopicFilter, S) -> - Subs = emqx_persistent_session_ds_state:get_subscriptions(S), - case emqx_topic_gbt:lookup(TopicFilter, [], Subs, undefined) of - #{deleted := true} -> - undefined; - Sub -> - Sub + case emqx_persistent_session_ds_state:get_subscription(TopicFilter, S) of + Sub = #{current_state := SStateId} -> + case emqx_persistent_session_ds_state:get_subscription_state(SStateId, S) of + #{subopts := SubOpts} -> + Sub#{subopts => SubOpts}; + undefined -> + undefined + end; + undefined -> + undefined end. %% @doc Convert active subscriptions to a map, for information @@ -103,7 +180,7 @@ lookup(TopicFilter, S) -> -spec to_map(emqx_persistent_session_ds_state:t()) -> map(). to_map(S) -> fold( - fun(TopicFilter, #{props := Props}, Acc) -> Acc#{TopicFilter => Props} end, + fun(TopicFilter, _, Acc) -> Acc#{TopicFilter => lookup(TopicFilter, S)} end, #{}, S ). @@ -115,48 +192,12 @@ to_map(S) -> emqx_persistent_session_ds_state:t() ) -> Acc. -fold(Fun, AccIn, S) -> - fold_all( - fun(TopicFilter, Sub = #{deleted := Deleted}, Acc) -> - case Deleted of - true -> Acc; - false -> Fun(TopicFilter, Sub, Acc) - end - end, - AccIn, - S - ). - -%% @doc Fold over all subscriptions, including inactive ones: --spec fold_all( - fun((emqx_types:topic(), emqx_persistent_session_ds:subscription(), Acc) -> Acc), - Acc, - emqx_persistent_session_ds_state:t() -) -> - Acc. -fold_all(Fun, AccIn, S) -> - Subs = emqx_persistent_session_ds_state:get_subscriptions(S), - emqx_topic_gbt:fold( - fun(Key, Sub, Acc) -> Fun(emqx_topic_gbt:get_topic(Key), Sub, Acc) end, - AccIn, - Subs - ). +fold(Fun, Acc, S) -> + emqx_persistent_session_ds_state:fold_subscriptions(Fun, Acc, S). %%================================================================================ %% Internal functions %%================================================================================ --spec has_no_unacked_streams( - emqx_persistent_session_ds:subscription_id(), emqx_persistent_session_ds_state:t() -) -> boolean(). -has_no_unacked_streams(SubId, S) -> - emqx_persistent_session_ds_state:fold_streams( - fun - ({SID, _Stream}, Srs, Acc) when SID =:= SubId -> - emqx_persistent_session_ds_stream_scheduler:is_fully_acked(Srs, S) andalso Acc; - (_StreamKey, _Srs, Acc) -> - Acc - end, - true, - S - ). +now_ms() -> + erlang:system_time(millisecond). diff --git a/apps/emqx/src/emqx_session.erl b/apps/emqx/src/emqx_session.erl index 37a86bda6..3892740a6 100644 --- a/apps/emqx/src/emqx_session.erl +++ b/apps/emqx/src/emqx_session.erl @@ -429,6 +429,11 @@ enrich_deliver(ClientInfo, {deliver, Topic, Msg}, UpgradeQoS, Session) -> end, enrich_message(ClientInfo, Msg, SubOpts, UpgradeQoS). +%% Caution: updating this function _may_ break consistency of replay +%% for persistent sessions. Persistent sessions expect it to return +%% the same result during replay. If it changes the behavior between +%% releases, sessions restored from the cold storage may end up +%% replaying messages with different QoS, etc. enrich_message( ClientInfo = #{clientid := ClientId}, Msg = #message{from = ClientId}, diff --git a/apps/emqx/test/emqx_persistent_session_ds_state_tests.erl b/apps/emqx/test/emqx_persistent_session_ds_state_tests.erl index 61e0575a8..375b4f4b1 100644 --- a/apps/emqx/test/emqx_persistent_session_ds_state_tests.erl +++ b/apps/emqx/test/emqx_persistent_session_ds_state_tests.erl @@ -74,9 +74,6 @@ session_id() -> topic() -> oneof([<<"foo">>, <<"bar">>, <<"foo/#">>, <<"//+/#">>]). -subid() -> - oneof([[]]). - subscription() -> oneof([#{}]). @@ -129,18 +126,25 @@ put_req() -> {Track, Seqno}, {seqno_track(), seqno()}, {#s.seqno, put_seqno, Track, Seqno} + ), + ?LET( + {Topic, Subscription}, + {topic(), subscription()}, + {#s.subs, put_subscription, Topic, Subscription} ) ]). get_req() -> oneof([ {#s.streams, get_stream, stream_id()}, - {#s.seqno, get_seqno, seqno_track()} + {#s.seqno, get_seqno, seqno_track()}, + {#s.subs, get_subscription, topic()} ]). del_req() -> oneof([ - {#s.streams, del_stream, stream_id()} + {#s.streams, del_stream, stream_id()}, + {#s.subs, del_subscription, topic()} ]). command(S) -> @@ -153,13 +157,6 @@ command(S) -> {2, {call, ?MODULE, reopen, [session_id(S)]}}, {2, {call, ?MODULE, commit, [session_id(S)]}}, - %% Subscriptions: - {3, - {call, ?MODULE, put_subscription, [ - session_id(S), topic(), subid(), subscription() - ]}}, - {3, {call, ?MODULE, del_subscription, [session_id(S), topic(), subid()]}}, - %% Metadata: {3, {call, ?MODULE, put_metadata, [session_id(S), put_metadata()]}}, {3, {call, ?MODULE, get_metadata, [session_id(S), get_metadata()]}}, @@ -170,7 +167,6 @@ command(S) -> {3, {call, ?MODULE, gen_del, [session_id(S), del_req()]}}, %% Getters: - {4, {call, ?MODULE, get_subscriptions, [session_id(S)]}}, {1, {call, ?MODULE, iterate_sessions, [batch_size()]}} ]); false -> @@ -207,19 +203,6 @@ postcondition(S, {call, ?MODULE, gen_get, [SessionId, {Idx, Fun, Key}]}, Result) #{session_id => SessionId, key => Key, 'fun' => Fun} ), true; -postcondition(S, {call, ?MODULE, get_subscriptions, [SessionId]}, Result) -> - #{SessionId := #s{subs = Subs}} = S, - ?assertEqual(maps:size(Subs), emqx_topic_gbt:size(Result)), - maps:foreach( - fun({TopicFilter, Id}, Expected) -> - ?assertEqual( - Expected, - emqx_topic_gbt:lookup(TopicFilter, Id, Result, default) - ) - end, - Subs - ), - true; postcondition(_, _, _) -> true. @@ -227,22 +210,6 @@ next_state(S, _V, {call, ?MODULE, create_new, [SessionId]}) -> S#{SessionId => #s{}}; next_state(S, _V, {call, ?MODULE, delete, [SessionId]}) -> maps:remove(SessionId, S); -next_state(S, _V, {call, ?MODULE, put_subscription, [SessionId, TopicFilter, SubId, Subscription]}) -> - Key = {TopicFilter, SubId}, - update( - SessionId, - #s.subs, - fun(Subs) -> Subs#{Key => Subscription} end, - S - ); -next_state(S, _V, {call, ?MODULE, del_subscription, [SessionId, TopicFilter, SubId]}) -> - Key = {TopicFilter, SubId}, - update( - SessionId, - #s.subs, - fun(Subs) -> maps:remove(Key, Subs) end, - S - ); next_state(S, _V, {call, ?MODULE, put_metadata, [SessionId, {Key, _Fun, Val}]}) -> update( SessionId, @@ -296,19 +263,6 @@ reopen(SessionId) -> {ok, S} = emqx_persistent_session_ds_state:open(SessionId), put_state(SessionId, S). -put_subscription(SessionId, TopicFilter, SubId, Subscription) -> - S = emqx_persistent_session_ds_state:put_subscription( - TopicFilter, SubId, Subscription, get_state(SessionId) - ), - put_state(SessionId, S). - -del_subscription(SessionId, TopicFilter, SubId) -> - S = emqx_persistent_session_ds_state:del_subscription(TopicFilter, SubId, get_state(SessionId)), - put_state(SessionId, S). - -get_subscriptions(SessionId) -> - emqx_persistent_session_ds_state:get_subscriptions(get_state(SessionId)). - put_metadata(SessionId, {_MetaKey, Fun, Value}) -> S = apply(emqx_persistent_session_ds_state, Fun, [Value, get_state(SessionId)]), put_state(SessionId, S). From 6c83bbe10bdd472e9e820b0c90d1aa709104e7eb Mon Sep 17 00:00:00 2001 From: ieQu1 <99872536+ieQu1@users.noreply.github.com> Date: Sun, 7 Apr 2024 16:47:24 +0200 Subject: [PATCH 161/234] feat(mgmt): Filter subscriptions by durability --- .../src/emqx_mgmt_api_subscriptions.erl | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/apps/emqx_management/src/emqx_mgmt_api_subscriptions.erl b/apps/emqx_management/src/emqx_mgmt_api_subscriptions.erl index cb8421211..9976bf881 100644 --- a/apps/emqx_management/src/emqx_mgmt_api_subscriptions.erl +++ b/apps/emqx_management/src/emqx_mgmt_api_subscriptions.erl @@ -86,7 +86,8 @@ fields(subscription) -> {qos, hoconsc:mk(emqx_schema:qos(), #{desc => <<"QoS">>, example => 0})}, {nl, hoconsc:mk(integer(), #{desc => <<"No Local">>, example => 0})}, {rap, hoconsc:mk(integer(), #{desc => <<"Retain as Published">>, example => 0})}, - {rh, hoconsc:mk(integer(), #{desc => <<"Retain Handling">>, example => 0})} + {rh, hoconsc:mk(integer(), #{desc => <<"Retain Handling">>, example => 0})}, + {durable, hoconsc:mk(boolean(), #{desc => <<"Durable subscription">>, example => false})} ]. parameters() -> @@ -141,6 +142,14 @@ parameters() -> required => false, desc => <<"Shared subscription group name">> }) + }, + { + durable, + hoconsc:mk(boolean(), #{ + in => query, + required => false, + desc => <<"Filter subscriptions by durability">> + }) } ]. From 6c897c26aedf2843b203181f66e2ec3c94195b01 Mon Sep 17 00:00:00 2001 From: ieQu1 <99872536+ieQu1@users.noreply.github.com> Date: Sat, 13 Apr 2024 10:23:52 +0200 Subject: [PATCH 162/234] fix(sessds): Commit session on unsubscribe --- apps/emqx/src/emqx_persistent_session_ds.erl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/apps/emqx/src/emqx_persistent_session_ds.erl b/apps/emqx/src/emqx_persistent_session_ds.erl index 0829b3fd3..5cae8487d 100644 --- a/apps/emqx/src/emqx_persistent_session_ds.erl +++ b/apps/emqx/src/emqx_persistent_session_ds.erl @@ -394,7 +394,8 @@ unsubscribe( undefined -> {error, ?RC_NO_SUBSCRIPTION_EXISTED}; Subscription = #{subopts := SubOpts} -> - S = do_unsubscribe(ID, TopicFilter, Subscription, S0), + S1 = do_unsubscribe(ID, TopicFilter, Subscription, S0), + S = emqx_persistent_session_ds_state:commit(S1), {ok, Session#{s => S}, SubOpts} end. From 113a990482bbddb61d46fd75c4da57914aada27f Mon Sep 17 00:00:00 2001 From: ieQu1 <99872536+ieQu1@users.noreply.github.com> Date: Sat, 13 Apr 2024 11:13:17 +0200 Subject: [PATCH 163/234] feat(sessds): Support max subscriptions --- apps/emqx/src/emqx_persistent_session_ds.erl | 9 ++-- .../src/emqx_persistent_session_ds_subs.erl | 41 ++++++++++++------- 2 files changed, 30 insertions(+), 20 deletions(-) diff --git a/apps/emqx/src/emqx_persistent_session_ds.erl b/apps/emqx/src/emqx_persistent_session_ds.erl index 5cae8487d..c25e3c813 100644 --- a/apps/emqx/src/emqx_persistent_session_ds.erl +++ b/apps/emqx/src/emqx_persistent_session_ds.erl @@ -367,10 +367,10 @@ subscribe( subscribe( TopicFilter, SubOpts, - Session = #{id := ID, s := S0, props := #{upgrade_qos := UpgradeQoS}} + Session = #{id := ID} ) -> {UpdateRouter, S1} = emqx_persistent_session_ds_subs:on_subscribe( - TopicFilter, UpgradeQoS, SubOpts, S0 + TopicFilter, SubOpts, Session ), case UpdateRouter of true -> @@ -379,9 +379,8 @@ subscribe( ok end, S = emqx_persistent_session_ds_state:commit(S1), - ?tp(persistent_session_ds_subscription_added, #{ - topic_filter => TopicFilter, is_new => UpdateRouter - }), + UpdateRouter andalso + ?tp(persistent_session_ds_subscription_added, #{topic_filter => TopicFilter, session => ID}), {ok, Session#{s => S}}. -spec unsubscribe(topic_filter(), session()) -> diff --git a/apps/emqx/src/emqx_persistent_session_ds_subs.erl b/apps/emqx/src/emqx_persistent_session_ds_subs.erl index e9e2a97ee..1993370ed 100644 --- a/apps/emqx/src/emqx_persistent_session_ds_subs.erl +++ b/apps/emqx/src/emqx_persistent_session_ds_subs.erl @@ -25,7 +25,7 @@ %% API: -export([ - on_subscribe/4, + on_subscribe/3, on_unsubscribe/2, gc/1, lookup/2, @@ -73,26 +73,37 @@ %% @doc Process a new subscription -spec on_subscribe( emqx_persistent_session_ds:topic_filter(), - boolean(), emqx_types:subopts(), - emqx_persistent_session_ds_state:t() + emqx_persistent_session_ds:session() ) -> {_UpdateRouter :: boolean(), emqx_persistent_session_ds_state:t()}. -on_subscribe(TopicFilter, UpgradeQoS, SubOpts, S0) -> +on_subscribe(TopicFilter, SubOpts, #{s := S0, props := Props}) -> + #{upgrade_qos := UpgradeQoS, max_subscriptions := MaxSubscriptions} = Props, case emqx_persistent_session_ds_state:get_subscription(TopicFilter, S0) of undefined -> %% This is a new subscription: - {SubId, S1} = emqx_persistent_session_ds_state:new_id(S0), - {SStateId, S2} = emqx_persistent_session_ds_state:new_id(S1), - SState = #{parent_subscription => SubId, upgrade_qos => UpgradeQoS, subopts => SubOpts}, - S3 = emqx_persistent_session_ds_state:put_subscription_state(SStateId, SState, S2), - Subscription = #{ - id => SubId, - current_state => SStateId, - start_time => now_ms() - }, - S = emqx_persistent_session_ds_state:put_subscription(TopicFilter, Subscription, S3), - {true, S}; + case emqx_persistent_session_ds_state:n_subscriptions(S0) < MaxSubscriptions of + true -> + {SubId, S1} = emqx_persistent_session_ds_state:new_id(S0), + {SStateId, S2} = emqx_persistent_session_ds_state:new_id(S1), + SState = #{ + parent_subscription => SubId, upgrade_qos => UpgradeQoS, subopts => SubOpts + }, + S3 = emqx_persistent_session_ds_state:put_subscription_state( + SStateId, SState, S2 + ), + Subscription = #{ + id => SubId, + current_state => SStateId, + start_time => now_ms() + }, + S = emqx_persistent_session_ds_state:put_subscription( + TopicFilter, Subscription, S3 + ), + {true, S}; + false -> + {false, S0} + end; Sub0 = #{current_state := SStateId0, id := SubId} -> SState = #{parent_subscription => SubId, upgrade_qos => UpgradeQoS, subopts => SubOpts}, case emqx_persistent_session_ds_state:get_subscription_state(SStateId0, S0) of From 87ffaf89e5e98c32a83ef6d20b0e8278f0bb6c57 Mon Sep 17 00:00:00 2001 From: ieQu1 <99872536+ieQu1@users.noreply.github.com> Date: Sat, 13 Apr 2024 12:09:21 +0200 Subject: [PATCH 164/234] refactor(sessds_state): Use macros for map keys --- .../src/emqx_persistent_session_ds_state.erl | 90 ++++++++++--------- 1 file changed, 50 insertions(+), 40 deletions(-) diff --git a/apps/emqx/src/emqx_persistent_session_ds_state.erl b/apps/emqx/src/emqx_persistent_session_ds_state.erl index 90d86bb1d..bad8352c8 100644 --- a/apps/emqx/src/emqx_persistent_session_ds_state.erl +++ b/apps/emqx/src/emqx_persistent_session_ds_state.erl @@ -126,21 +126,31 @@ | ?rec | ?committed(?QOS_2). +-define(id, id). +-define(dirty, dirty). +-define(metadata, metadata). +-define(subscriptions, subscriptions). +-define(subscription_states, subscription_states). +-define(seqnos, seqnos). +-define(streams, streams). +-define(ranks, ranks). +-define(awaiting_rel, awaiting_rel). + -opaque t() :: #{ - id := emqx_persistent_session_ds:id(), - dirty := boolean(), - metadata := metadata(), - subscriptions := pmap( + ?id := emqx_persistent_session_ds:id(), + ?dirty := boolean(), + ?metadata := metadata(), + ?subscriptions := pmap( emqx_persistent_session_ds:topic_filter(), emqx_persistent_session_ds_subs:subscription() ), - subscription_states := pmap( + ?subscription_states := pmap( emqx_persistent_session_ds_subs:subscription_state_id(), emqx_persistent_session_ds_subs:subscription_state() ), - seqnos := pmap(seqno_type(), emqx_persistent_session_ds:seqno()), - streams := pmap(emqx_ds:stream(), emqx_persistent_session_ds:stream_state()), - ranks := pmap(term(), integer()), - awaiting_rel := pmap(emqx_types:packet_id(), _Timestamp :: integer()) + ?seqnos := pmap(seqno_type(), emqx_persistent_session_ds:seqno()), + ?streams := pmap(emqx_ds:stream(), emqx_persistent_session_ds:stream_state()), + ?ranks := pmap(term(), integer()), + ?awaiting_rel := pmap(emqx_types:packet_id(), _Timestamp :: integer()) }. -define(session_tab, emqx_ds_session_tab). @@ -152,12 +162,12 @@ -define(awaiting_rel_tab, emqx_ds_session_awaiting_rel). -define(pmaps, [ - {subscriptions, ?subscription_tab}, - {subscription_states, ?subscription_states_tab}, - {streams, ?stream_tab}, - {seqnos, ?seqno_tab}, - {ranks, ?rank_tab}, - {awaiting_rel, ?awaiting_rel_tab} + {?subscriptions, ?subscription_tab}, + {?subscription_states, ?subscription_states_tab}, + {?streams, ?stream_tab}, + {?seqnos, ?seqno_tab}, + {?ranks, ?rank_tab}, + {?awaiting_rel, ?awaiting_rel_tab} ]). %% Enable this flag if you suspect some code breaks the sequence: @@ -358,15 +368,15 @@ new_id(Rec) -> -spec get_subscription(emqx_persistent_session_ds:topic_filter(), t()) -> emqx_persistent_session_ds_subs:subscription() | undefined. get_subscription(TopicFilter, Rec) -> - gen_get(subscriptions, TopicFilter, Rec). + gen_get(?subscriptions, TopicFilter, Rec). -spec fold_subscriptions(fun(), Acc, t()) -> Acc. fold_subscriptions(Fun, Acc, Rec) -> - gen_fold(subscriptions, Fun, Acc, Rec). + gen_fold(?subscriptions, Fun, Acc, Rec). -spec n_subscriptions(t()) -> non_neg_integer(). n_subscriptions(Rec) -> - gen_size(subscriptions, Rec). + gen_size(?subscriptions, Rec). -spec put_subscription( emqx_persistent_session_ds:topic_filter(), @@ -374,22 +384,22 @@ n_subscriptions(Rec) -> t() ) -> t(). put_subscription(TopicFilter, Subscription, Rec) -> - gen_put(subscriptions, TopicFilter, Subscription, Rec). + gen_put(?subscriptions, TopicFilter, Subscription, Rec). -spec del_subscription(emqx_persistent_session_ds:topic_filter(), t()) -> t(). del_subscription(TopicFilter, Rec) -> - gen_del(subscriptions, TopicFilter, Rec). + gen_del(?subscriptions, TopicFilter, Rec). %% -spec get_subscription_state(emqx_persistent_session_ds_subs:subscription_state_id(), t()) -> emqx_persistent_session_ds_subs:subscription_state() | undefined. get_subscription_state(SStateId, Rec) -> - gen_get(subscription_states, SStateId, Rec). + gen_get(?subscription_states, SStateId, Rec). -spec fold_subscription_states(fun(), Acc, t()) -> Acc. fold_subscription_states(Fun, Acc, Rec) -> - gen_fold(subscription_states, Fun, Acc, Rec). + gen_fold(?subscription_states, Fun, Acc, Rec). -spec put_subscription_state( emqx_persistent_session_ds_subs:subscription_state_id(), @@ -397,11 +407,11 @@ fold_subscription_states(Fun, Acc, Rec) -> t() ) -> t(). put_subscription_state(SStateId, SState, Rec) -> - gen_put(subscription_states, SStateId, SState, Rec). + gen_put(?subscription_states, SStateId, SState, Rec). -spec del_subscription_state(emqx_persistent_session_ds_subs:subscription_state_id(), t()) -> t(). del_subscription_state(SStateId, Rec) -> - gen_del(subscription_states, SStateId, Rec). + gen_del(?subscription_states, SStateId, Rec). %% @@ -410,33 +420,33 @@ del_subscription_state(SStateId, Rec) -> -spec get_stream(stream_key(), t()) -> emqx_persistent_session_ds:stream_state() | undefined. get_stream(Key, Rec) -> - gen_get(streams, Key, Rec). + gen_get(?streams, Key, Rec). -spec put_stream(stream_key(), emqx_persistent_session_ds:stream_state(), t()) -> t(). put_stream(Key, Val, Rec) -> - gen_put(streams, Key, Val, Rec). + gen_put(?streams, Key, Val, Rec). -spec del_stream(stream_key(), t()) -> t(). del_stream(Key, Rec) -> - gen_del(streams, Key, Rec). + gen_del(?streams, Key, Rec). -spec fold_streams(fun(), Acc, t()) -> Acc. fold_streams(Fun, Acc, Rec) -> - gen_fold(streams, Fun, Acc, Rec). + gen_fold(?streams, Fun, Acc, Rec). -spec n_streams(t()) -> non_neg_integer(). n_streams(Rec) -> - gen_size(streams, Rec). + gen_size(?streams, Rec). %% -spec get_seqno(seqno_type(), t()) -> emqx_persistent_session_ds:seqno() | undefined. get_seqno(Key, Rec) -> - gen_get(seqnos, Key, Rec). + gen_get(?seqnos, Key, Rec). -spec put_seqno(seqno_type(), emqx_persistent_session_ds:seqno(), t()) -> t(). put_seqno(Key, Val, Rec) -> - gen_put(seqnos, Key, Val, Rec). + gen_put(?seqnos, Key, Val, Rec). %% @@ -444,41 +454,41 @@ put_seqno(Key, Val, Rec) -> -spec get_rank(rank_key(), t()) -> integer() | undefined. get_rank(Key, Rec) -> - gen_get(ranks, Key, Rec). + gen_get(?ranks, Key, Rec). -spec put_rank(rank_key(), integer(), t()) -> t(). put_rank(Key, Val, Rec) -> - gen_put(ranks, Key, Val, Rec). + gen_put(?ranks, Key, Val, Rec). -spec del_rank(rank_key(), t()) -> t(). del_rank(Key, Rec) -> - gen_del(ranks, Key, Rec). + gen_del(?ranks, Key, Rec). -spec fold_ranks(fun(), Acc, t()) -> Acc. fold_ranks(Fun, Acc, Rec) -> - gen_fold(ranks, Fun, Acc, Rec). + gen_fold(?ranks, Fun, Acc, Rec). %% -spec get_awaiting_rel(emqx_types:packet_id(), t()) -> integer() | undefined. get_awaiting_rel(Key, Rec) -> - gen_get(awaiting_rel, Key, Rec). + gen_get(?awaiting_rel, Key, Rec). -spec put_awaiting_rel(emqx_types:packet_id(), _Timestamp :: integer(), t()) -> t(). put_awaiting_rel(Key, Val, Rec) -> - gen_put(awaiting_rel, Key, Val, Rec). + gen_put(?awaiting_rel, Key, Val, Rec). -spec del_awaiting_rel(emqx_types:packet_id(), t()) -> t(). del_awaiting_rel(Key, Rec) -> - gen_del(awaiting_rel, Key, Rec). + gen_del(?awaiting_rel, Key, Rec). -spec fold_awaiting_rel(fun(), Acc, t()) -> Acc. fold_awaiting_rel(Fun, Acc, Rec) -> - gen_fold(awaiting_rel, Fun, Acc, Rec). + gen_fold(?awaiting_rel, Fun, Acc, Rec). -spec n_awaiting_rel(t()) -> non_neg_integer(). n_awaiting_rel(Rec) -> - gen_size(awaiting_rel, Rec). + gen_size(?awaiting_rel, Rec). %% From 93bb8403654ea6cb945f32d9b799bca01adecc02 Mon Sep 17 00:00:00 2001 From: ieQu1 <99872536+ieQu1@users.noreply.github.com> Date: Sat, 13 Apr 2024 21:38:45 +0200 Subject: [PATCH 165/234] docs(ds): Update README --- apps/emqx_durable_storage/README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/apps/emqx_durable_storage/README.md b/apps/emqx_durable_storage/README.md index f613085bb..1e87f3907 100644 --- a/apps/emqx_durable_storage/README.md +++ b/apps/emqx_durable_storage/README.md @@ -13,7 +13,7 @@ This makes the storage disk requirements very predictable: only the number of _p DS _backend_ is a callback module that implements `emqx_ds` behavior. -EMQX repository contains the "builtin" backend, implemented in `emqx_ds_replication_layer` module, that uses RocksDB as the main storage. +EMQX repository contains the "builtin" backend, implemented in `emqx_ds_replication_layer` module, that uses Raft algorithm for data replication, and RocksDB as the main storage. Note that builtin backend introduces the concept of **site** to alleviate the problem of changing node names. Site IDs are persistent, and they are randomly generated at the first startup of the node. @@ -95,10 +95,10 @@ Consumption of messages is done in several stages: # Limitation -- Builtin backend currently doesn't replicate data across different sites - There is no local cache of messages, which may result in transferring the same data multiple times # Documentation links + TBD # Usage From 197a4c30bee2b1b76b25e6f4c601963584d8fdd1 Mon Sep 17 00:00:00 2001 From: ieQu1 <99872536+ieQu1@users.noreply.github.com> Date: Tue, 16 Apr 2024 11:51:53 +0200 Subject: [PATCH 166/234] fix(sessds): Strip unneccessary data from the durable session state --- apps/emqx/src/emqx_persistent_session_ds.erl | 9 +++++++-- apps/emqx_management/src/emqx_mgmt_api_clients.erl | 4 +++- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/apps/emqx/src/emqx_persistent_session_ds.erl b/apps/emqx/src/emqx_persistent_session_ds.erl index c25e3c813..d77372864 100644 --- a/apps/emqx/src/emqx_persistent_session_ds.erl +++ b/apps/emqx/src/emqx_persistent_session_ds.erl @@ -786,7 +786,7 @@ session_open(SessionId, ClientInfo, NewConnInfo, MaybeWillMsg) -> maps:get(peername, NewConnInfo), S2 ), S4 = emqx_persistent_session_ds_state:set_will_message(MaybeWillMsg, S3), - S5 = emqx_persistent_session_ds_state:set_clientinfo(ClientInfo, S4), + S5 = set_clientinfo(ClientInfo, S4), S = emqx_persistent_session_ds_state:commit(S5), Inflight = emqx_persistent_session_ds_inflight:new( receive_maximum(NewConnInfo) @@ -833,7 +833,7 @@ session_ensure_new(Id, ClientInfo, ConnInfo, MaybeWillMsg, Conf) -> ] ), S5 = emqx_persistent_session_ds_state:set_will_message(MaybeWillMsg, S4), - S6 = emqx_persistent_session_ds_state:set_clientinfo(ClientInfo, S5), + S6 = set_clientinfo(ClientInfo, S5), S = emqx_persistent_session_ds_state:commit(S6), #{ id => Id, @@ -864,6 +864,11 @@ session_drop(ID, Reason) -> now_ms() -> erlang:system_time(millisecond). +set_clientinfo(ClientInfo0, S) -> + %% Remove unnecessary fields from the clientinfo: + ClientInfo = maps:without([cn, dn, auth_result], ClientInfo0), + emqx_persistent_session_ds_state:set_clientinfo(ClientInfo, S). + %%-------------------------------------------------------------------- %% RPC targets (v1) %%-------------------------------------------------------------------- diff --git a/apps/emqx_management/src/emqx_mgmt_api_clients.erl b/apps/emqx_management/src/emqx_mgmt_api_clients.erl index 7128f1c3a..4d08854af 100644 --- a/apps/emqx_management/src/emqx_mgmt_api_clients.erl +++ b/apps/emqx_management/src/emqx_mgmt_api_clients.erl @@ -1763,7 +1763,9 @@ format_persistent_session_info(ClientId, PSInfo0) -> connected_at => CreatedAt, ip_address => IpAddress, is_persistent => true, - port => Port + port => Port, + heap_size => 0, + mqueue_len => 0 }, PSInfo = lists:foldl( fun result_format_time_fun/2, From 18196ec19ce39139e00d7f73f6d5c77f1c94d74c Mon Sep 17 00:00:00 2001 From: JianBo He Date: Wed, 17 Apr 2024 17:48:19 +0800 Subject: [PATCH 167/234] fix(ocpp): avoid an error log in handling dnsteeam messages --- apps/emqx_gateway_ocpp/include/emqx_ocpp.hrl | 5 +- .../src/emqx_gateway_ocpp.app.src | 2 +- .../src/emqx_ocpp_channel.erl | 31 +++---- .../src/emqx_ocpp_connection.erl | 1 + .../test/emqx_ocpp_SUITE.erl | 84 +++++++++++++++++++ 5 files changed, 101 insertions(+), 22 deletions(-) diff --git a/apps/emqx_gateway_ocpp/include/emqx_ocpp.hrl b/apps/emqx_gateway_ocpp/include/emqx_ocpp.hrl index dc779dc76..805c5f6f4 100644 --- a/apps/emqx_gateway_ocpp/include/emqx_ocpp.hrl +++ b/apps/emqx_gateway_ocpp/include/emqx_ocpp.hrl @@ -86,10 +86,9 @@ -define(IS_ERROR(F), F = #{type := ?OCPP_MSG_TYPE_ID_CALLERROR}). -define(IS_ERROR(F, Id), F = #{type := ?OCPP_MSG_TYPE_ID_CALLERROR, id := Id}). --define(IS_BootNotification_RESP(Payload), #{ +-define(IS_BootNotification_RESP(Status, Interval), #{ type := ?OCPP_MSG_TYPE_ID_CALLRESULT, - action := ?OCPP_ACT_BootNotification, - payload := Payload + payload := #{<<"status">> := Status, <<"interval">> := Interval} }). -define(ERR_FRAME(Id, Code, Desc), #{ diff --git a/apps/emqx_gateway_ocpp/src/emqx_gateway_ocpp.app.src b/apps/emqx_gateway_ocpp/src/emqx_gateway_ocpp.app.src index c7981a033..8682c164c 100644 --- a/apps/emqx_gateway_ocpp/src/emqx_gateway_ocpp.app.src +++ b/apps/emqx_gateway_ocpp/src/emqx_gateway_ocpp.app.src @@ -1,6 +1,6 @@ {application, emqx_gateway_ocpp, [ {description, "OCPP-J 1.6 Gateway for EMQX"}, - {vsn, "0.1.3"}, + {vsn, "0.1.4"}, {registered, []}, {applications, [kernel, stdlib, jesse, emqx, emqx_gateway]}, {env, []}, diff --git a/apps/emqx_gateway_ocpp/src/emqx_ocpp_channel.erl b/apps/emqx_gateway_ocpp/src/emqx_ocpp_channel.erl index cb8ec7e91..d20b35d04 100644 --- a/apps/emqx_gateway_ocpp/src/emqx_ocpp_channel.erl +++ b/apps/emqx_gateway_ocpp/src/emqx_ocpp_channel.erl @@ -527,20 +527,19 @@ apply_frame(Frames, Channel) when is_list(Frames) -> {Outgoings, NChannel} = lists:foldl(fun do_apply_frame/2, {[], Channel}, Frames), {lists:reverse(Outgoings), NChannel}; apply_frame(Frames, Channel) -> - ?SLOG(error, #{msg => "unexpected_frame_list", frames => Frames, channel => Channel}), + ?SLOG(error, #{msg => "unexpected_frame_list", frames => Frames}), Channel. -do_apply_frame(?IS_BootNotification_RESP(Payload), {Outgoings, Channel}) -> - case maps:get(<<"status">>, Payload) of +do_apply_frame(?IS_BootNotification_RESP(Status, Interval), {Outgoings, Channel}) -> + case Status of <<"Accepted">> -> - Intv = maps:get(<<"interval">>, Payload), - ?SLOG(info, #{msg => "adjust_heartbeat_timer", new_interval_s => Intv}), - {[{event, updated} | Outgoings], reset_keepalive(Intv, Channel)}; + ?SLOG(info, #{msg => "adjust_heartbeat_timer", new_interval_s => Interval}), + {[{event, updated} | Outgoings], reset_keepalive(Interval, Channel)}; _ -> {Outgoings, Channel} end; -do_apply_frame(Frame, Acc = {_Outgoings, Channel}) -> - ?SLOG(error, #{msg => "unexpected_frame", frame => Frame, channel => Channel}), +do_apply_frame(Frame, Acc = {_Outgoings, _Channel}) -> + ?SLOG(info, #{msg => "skip_to_apply_frame", frame => Frame}), Acc. %%-------------------------------------------------------------------- @@ -762,19 +761,15 @@ payload2frame(#{ action => Action, payload => Payload }; -payload2frame( - MqttPayload = - #{ - <<"MessageTypeId">> := ?OCPP_MSG_TYPE_ID_CALLRESULT, - <<"UniqueId">> := Id, - <<"Payload">> := Payload - } -) -> - Action = maps:get(<<"Action">>, MqttPayload, undefined), +payload2frame(#{ + <<"MessageTypeId">> := ?OCPP_MSG_TYPE_ID_CALLRESULT, + <<"UniqueId">> := Id, + <<"Payload">> := Payload +}) -> #{ type => ?OCPP_MSG_TYPE_ID_CALLRESULT, id => Id, - action => Action, + action => undefined, payload => Payload }; payload2frame(#{ diff --git a/apps/emqx_gateway_ocpp/src/emqx_ocpp_connection.erl b/apps/emqx_gateway_ocpp/src/emqx_ocpp_connection.erl index 0932314fe..331b9d323 100644 --- a/apps/emqx_gateway_ocpp/src/emqx_ocpp_connection.erl +++ b/apps/emqx_gateway_ocpp/src/emqx_ocpp_connection.erl @@ -237,6 +237,7 @@ do_init(Req, Opts, WsOpts) -> ) of {error, Reason, _State} -> + 1 = Reason, {ok, cowboy_req:reply(400, #{}, to_bin(Reason), Req), WsOpts}; {ok, [Resp, Opts, WsOpts], NState} -> {cowboy_websocket, Resp, [Req, Opts, NState], WsOpts} diff --git a/apps/emqx_gateway_ocpp/test/emqx_ocpp_SUITE.erl b/apps/emqx_gateway_ocpp/test/emqx_ocpp_SUITE.erl index 6d00726cf..b72eb9e1d 100644 --- a/apps/emqx_gateway_ocpp/test/emqx_ocpp_SUITE.erl +++ b/apps/emqx_gateway_ocpp/test/emqx_ocpp_SUITE.erl @@ -16,6 +16,7 @@ -module(emqx_ocpp_SUITE). +-include("emqx_ocpp.hrl"). -include_lib("eunit/include/eunit.hrl"). -include_lib("common_test/include/ct.hrl"). @@ -145,3 +146,86 @@ t_enable_disable_gw_ocpp(_Config) -> AssertEnabled(false), ?assertEqual({204, #{}}, request(put, "/gateways/ocpp/enable/true", <<>>)), AssertEnabled(true). + +t_adjust_keepalive_timer(_Config) -> + {ok, ClientPid} = connect("127.0.0.1", 33033, <<"client1">>), + UniqueId = <<"3335862321">>, + BootNotification = #{ + id => UniqueId, + type => ?OCPP_MSG_TYPE_ID_CALL, + action => <<"BootNotification">>, + payload => #{ + <<"chargePointVendor">> => <<"vendor1">>, + <<"chargePointModel">> => <<"model1">> + } + }, + ok = send_msg(ClientPid, BootNotification), + %% check the default keepalive timer + timer:sleep(1000), + ?assertMatch( + #{conninfo := #{keepalive := 60}}, emqx_gateway_cm:get_chan_info(ocpp, <<"client1">>) + ), + %% publish the BootNotification.ack + AckPayload = emqx_utils_json:encode(#{ + <<"MessageTypeId">> => ?OCPP_MSG_TYPE_ID_CALLRESULT, + <<"UniqueId">> => UniqueId, + <<"Payload">> => #{ + <<"currentTime">> => "2023-06-21T14:20:39+00:00", + <<"interval">> => 300, + <<"status">> => <<"Accepted">> + } + }), + _ = emqx:publish(emqx_message:make(<<"ocpp/cs/client1">>, AckPayload)), + {ok, _Resp} = receive_msg(ClientPid), + %% assert: check the keepalive timer is adjusted + ?assertMatch( + #{conninfo := #{keepalive := 300}}, emqx_gateway_cm:get_chan_info(ocpp, <<"client1">>) + ), + ok. + +%%-------------------------------------------------------------------- +%% ocpp simple client + +connect(Host, Port, ClientId) -> + Timeout = 5000, + ConnOpts = #{connect_timeout => 5000}, + case gun:open(Host, Port, ConnOpts) of + {ok, ConnPid} -> + {ok, _} = gun:await_up(ConnPid, Timeout), + case upgrade(ConnPid, ClientId, Timeout) of + {ok, _Headers} -> {ok, ConnPid}; + Error -> Error + end; + Error -> + Error + end. + +upgrade(ConnPid, ClientId, Timeout) -> + Path = binary_to_list(<<"/ocpp/", ClientId/binary>>), + WsHeaders = [{<<"cache-control">>, <<"no-cache">>}], + StreamRef = gun:ws_upgrade(ConnPid, Path, WsHeaders, #{protocols => [{<<"ocpp1.6">>, gun_ws_h}]}), + receive + {gun_upgrade, ConnPid, StreamRef, [<<"websocket">>], Headers} -> + {ok, Headers}; + {gun_response, ConnPid, _, _, Status, Headers} -> + {error, {ws_upgrade_failed, Status, Headers}}; + {gun_error, ConnPid, StreamRef, Reason} -> + {error, {ws_upgrade_failed, Reason}} + after Timeout -> + {error, timeout} + end. + +send_msg(ConnPid, Frame) when is_map(Frame) -> + Opts = emqx_ocpp_frame:serialize_opts(), + Msg = emqx_ocpp_frame:serialize_pkt(Frame, Opts), + gun:ws_send(ConnPid, {text, Msg}). + +receive_msg(ConnPid) -> + receive + {gun_ws, ConnPid, _Ref, {_Type, Msg}} -> + ParseState = emqx_ocpp_frame:initial_parse_state(#{}), + {ok, Frame, _Rest, _NewParseStaet} = emqx_ocpp_frame:parse(Msg, ParseState), + {ok, Frame} + after 5000 -> + {error, timeout} + end. From e439a2e0f2513c005e3126c93a36a55b7db10d28 Mon Sep 17 00:00:00 2001 From: ieQu1 <99872536+ieQu1@users.noreply.github.com> Date: Tue, 16 Apr 2024 12:05:48 +0200 Subject: [PATCH 168/234] fix(sessds): Save protocol name and version in the session metadata --- .../emqx_persistent_session_ds_SUITE.erl | 2 +- apps/emqx/src/emqx_persistent_session_ds.erl | 17 +++++++++++++---- apps/emqx/src/emqx_persistent_session_ds.hrl | 4 +++- .../src/emqx_persistent_session_ds_state.erl | 17 +++++++++++++++-- .../src/emqx_mgmt_api_clients.erl | 5 ++++- 5 files changed, 36 insertions(+), 9 deletions(-) diff --git a/apps/emqx/integration_test/emqx_persistent_session_ds_SUITE.erl b/apps/emqx/integration_test/emqx_persistent_session_ds_SUITE.erl index 39764af30..ab062bff7 100644 --- a/apps/emqx/integration_test/emqx_persistent_session_ds_SUITE.erl +++ b/apps/emqx/integration_test/emqx_persistent_session_ds_SUITE.erl @@ -184,7 +184,7 @@ list_all_pubranges(Node) -> session_open(Node, ClientId) -> ClientInfo = #{}, - ConnInfo = #{peername => {undefined, undefined}}, + ConnInfo = #{peername => {undefined, undefined}, proto_name => <<"MQTT">>, proto_ver => 5}, WillMsg = undefined, erpc:call( Node, diff --git a/apps/emqx/src/emqx_persistent_session_ds.erl b/apps/emqx/src/emqx_persistent_session_ds.erl index d77372864..908e71bb5 100644 --- a/apps/emqx/src/emqx_persistent_session_ds.erl +++ b/apps/emqx/src/emqx_persistent_session_ds.erl @@ -767,7 +767,12 @@ sync(ClientId) -> %% the broker. -spec session_open(id(), emqx_types:clientinfo(), emqx_types:conninfo(), emqx_maybe:t(message())) -> session() | false. -session_open(SessionId, ClientInfo, NewConnInfo, MaybeWillMsg) -> +session_open( + SessionId, + ClientInfo, + NewConnInfo = #{proto_name := ProtoName, proto_ver := ProtoVer}, + MaybeWillMsg +) -> NowMS = now_ms(), case emqx_persistent_session_ds_state:open(SessionId) of {ok, S0} -> @@ -787,7 +792,8 @@ session_open(SessionId, ClientInfo, NewConnInfo, MaybeWillMsg) -> ), S4 = emqx_persistent_session_ds_state:set_will_message(MaybeWillMsg, S3), S5 = set_clientinfo(ClientInfo, S4), - S = emqx_persistent_session_ds_state:commit(S5), + S6 = emqx_persistent_session_ds_state:set_protocol({ProtoName, ProtoVer}, S5), + S = emqx_persistent_session_ds_state:commit(S6), Inflight = emqx_persistent_session_ds_inflight:new( receive_maximum(NewConnInfo) ), @@ -810,7 +816,9 @@ session_open(SessionId, ClientInfo, NewConnInfo, MaybeWillMsg) -> emqx_session:conf() ) -> session(). -session_ensure_new(Id, ClientInfo, ConnInfo, MaybeWillMsg, Conf) -> +session_ensure_new( + Id, ClientInfo, ConnInfo = #{proto_name := ProtoName, proto_ver := ProtoVer}, MaybeWillMsg, Conf +) -> ?tp(debug, persistent_session_ds_ensure_new, #{id => Id}), Now = now_ms(), S0 = emqx_persistent_session_ds_state:create_new(Id), @@ -834,7 +842,8 @@ session_ensure_new(Id, ClientInfo, ConnInfo, MaybeWillMsg, Conf) -> ), S5 = emqx_persistent_session_ds_state:set_will_message(MaybeWillMsg, S4), S6 = set_clientinfo(ClientInfo, S5), - S = emqx_persistent_session_ds_state:commit(S6), + S7 = emqx_persistent_session_ds_state:set_protocol({ProtoName, ProtoVer}, S6), + S = emqx_persistent_session_ds_state:commit(S7), #{ id => Id, props => Conf, diff --git a/apps/emqx/src/emqx_persistent_session_ds.hrl b/apps/emqx/src/emqx_persistent_session_ds.hrl index e2b52e36d..79920629a 100644 --- a/apps/emqx/src/emqx_persistent_session_ds.hrl +++ b/apps/emqx/src/emqx_persistent_session_ds.hrl @@ -74,10 +74,12 @@ -define(created_at, created_at). -define(last_alive_at, last_alive_at). -define(expiry_interval, expiry_interval). -%% Unique integer used to create unique identities +%% Unique integer used to create unique identities: -define(last_id, last_id). +%% Connection info (relevent for the dashboard): -define(peername, peername). -define(will_message, will_message). -define(clientinfo, clientinfo). +-define(protocol, protocol). -endif. diff --git a/apps/emqx/src/emqx_persistent_session_ds_state.erl b/apps/emqx/src/emqx_persistent_session_ds_state.erl index bad8352c8..bc603647a 100644 --- a/apps/emqx/src/emqx_persistent_session_ds_state.erl +++ b/apps/emqx/src/emqx_persistent_session_ds_state.erl @@ -33,6 +33,7 @@ -export([get_clientinfo/1, set_clientinfo/2]). -export([get_will_message/1, set_will_message/2, clear_will_message/1, clear_will_message_now/1]). -export([get_peername/1, set_peername/2]). +-export([get_protocol/1, set_protocol/2]). -export([new_id/1]). -export([get_stream/2, put_stream/3, del_stream/2, fold_streams/3, n_streams/1]). -export([get_seqno/2, put_seqno/3]). @@ -66,7 +67,8 @@ seqno_type/0, stream_key/0, rank_key/0, - session_iterator/0 + session_iterator/0, + protocol/0 ]). -include("emqx_mqtt.hrl"). @@ -108,13 +110,16 @@ dirty :: #{K => dirty | del} }. +-type protocol() :: {binary(), emqx_types:proto_ver()}. + -type metadata() :: #{ ?created_at => emqx_persistent_session_ds:timestamp(), ?last_alive_at => emqx_persistent_session_ds:timestamp(), ?expiry_interval => non_neg_integer(), ?last_id => integer(), - ?peername => emqx_types:peername() + ?peername => emqx_types:peername(), + ?protocol => protocol() }. -type seqno_type() :: @@ -321,6 +326,14 @@ get_peername(Rec) -> set_peername(Val, Rec) -> set_meta(?peername, Val, Rec). +-spec get_protocol(t()) -> protocol() | undefined. +get_protocol(Rec) -> + get_meta(?protocol, Rec). + +-spec set_protocol(protocol(), t()) -> t(). +set_protocol(Val, Rec) -> + set_meta(?protocol, Val, Rec). + -spec get_clientinfo(t()) -> emqx_maybe:t(emqx_types:clientinfo()). get_clientinfo(Rec) -> get_meta(?clientinfo, Rec). diff --git a/apps/emqx_management/src/emqx_mgmt_api_clients.erl b/apps/emqx_management/src/emqx_mgmt_api_clients.erl index 4d08854af..301d4e47e 100644 --- a/apps/emqx_management/src/emqx_mgmt_api_clients.erl +++ b/apps/emqx_management/src/emqx_mgmt_api_clients.erl @@ -1747,6 +1747,7 @@ format_channel_info(undefined, {ClientId, PSInfo0 = #{}}, _Opts) -> format_persistent_session_info(ClientId, PSInfo0) -> Metadata = maps:get(metadata, PSInfo0, #{}), + {ProtoName, ProtoVer} = maps:get(protocol, Metadata), PSInfo1 = maps:with([created_at, expiry_interval], Metadata), CreatedAt = maps:get(created_at, PSInfo1), case Metadata of @@ -1765,7 +1766,9 @@ format_persistent_session_info(ClientId, PSInfo0) -> is_persistent => true, port => Port, heap_size => 0, - mqueue_len => 0 + mqueue_len => 0, + proto_name => ProtoName, + proto_ver => ProtoVer }, PSInfo = lists:foldl( fun result_format_time_fun/2, From 38a2e8add945b85eb7db0d9cff7e06c49306e3f0 Mon Sep 17 00:00:00 2001 From: ieQu1 <99872536+ieQu1@users.noreply.github.com> Date: Tue, 16 Apr 2024 14:55:48 +0200 Subject: [PATCH 169/234] fix(sessds): Return the number of subscriptions for offline sessions --- apps/emqx_management/src/emqx_mgmt_api_clients.erl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/apps/emqx_management/src/emqx_mgmt_api_clients.erl b/apps/emqx_management/src/emqx_mgmt_api_clients.erl index 301d4e47e..38320780d 100644 --- a/apps/emqx_management/src/emqx_mgmt_api_clients.erl +++ b/apps/emqx_management/src/emqx_mgmt_api_clients.erl @@ -1768,7 +1768,8 @@ format_persistent_session_info(ClientId, PSInfo0) -> heap_size => 0, mqueue_len => 0, proto_name => ProtoName, - proto_ver => ProtoVer + proto_ver => ProtoVer, + subscriptions_cnt => maps:size(maps:get(subscriptions, PSInfo0, #{})) }, PSInfo = lists:foldl( fun result_format_time_fun/2, From 124c5047d07c01576507cd49ddfe1304a440b04e Mon Sep 17 00:00:00 2001 From: ieQu1 <99872536+ieQu1@users.noreply.github.com> Date: Tue, 16 Apr 2024 23:38:01 +0200 Subject: [PATCH 170/234] feat(sessds): Add API for getting session data from the cold storage --- apps/emqx/src/emqx_persistent_session_ds.erl | 8 +++++- .../src/emqx_persistent_session_ds_state.erl | 25 +++++++++++++++++++ .../src/emqx_persistent_session_ds_subs.erl | 22 ++++++++++++++++ 3 files changed, 54 insertions(+), 1 deletion(-) diff --git a/apps/emqx/src/emqx_persistent_session_ds.erl b/apps/emqx/src/emqx_persistent_session_ds.erl index 908e71bb5..b8c853431 100644 --- a/apps/emqx/src/emqx_persistent_session_ds.erl +++ b/apps/emqx/src/emqx_persistent_session_ds.erl @@ -75,7 +75,8 @@ %% Managment APIs: -export([ - list_client_subscriptions/1 + list_client_subscriptions/1, + get_client_subscription/2 ]). %% session table operations @@ -736,6 +737,11 @@ list_client_subscriptions(ClientId) -> {error, not_found} end. +-spec get_client_subscription(emqx_types:clientid(), emqx_types:topic()) -> + subscription() | undefined. +get_client_subscription(ClientId, Topic) -> + emqx_persistent_session_ds_subs:cold_get_subscription(ClientId, Topic). + %%-------------------------------------------------------------------- %% Session tables operations %%-------------------------------------------------------------------- diff --git a/apps/emqx/src/emqx_persistent_session_ds_state.erl b/apps/emqx/src/emqx_persistent_session_ds_state.erl index bc603647a..9efffc7ff 100644 --- a/apps/emqx/src/emqx_persistent_session_ds_state.erl +++ b/apps/emqx/src/emqx_persistent_session_ds_state.erl @@ -22,6 +22,9 @@ %% It is responsible for saving, caching, and restoring session state. %% It is completely devoid of business logic. Not even the default %% values should be set in this module. +%% +%% Session process MUST NOT use `cold_*' functions! They are reserved +%% for use in the management APIs. -module(emqx_persistent_session_ds_state). -export([create_tables/0]). @@ -40,12 +43,14 @@ -export([get_rank/2, put_rank/3, del_rank/2, fold_ranks/3]). -export([ get_subscription_state/2, + cold_get_subscription_state/2, fold_subscription_states/3, put_subscription_state/3, del_subscription_state/2 ]). -export([ get_subscription/2, + cold_get_subscription/2, fold_subscriptions/3, n_subscriptions/1, put_subscription/3, @@ -383,6 +388,11 @@ new_id(Rec) -> get_subscription(TopicFilter, Rec) -> gen_get(?subscriptions, TopicFilter, Rec). +-spec cold_get_subscription(emqx_persistent_session_ds:id(), emqx_types:topic()) -> + [emqx_persistent_session_ds_subs:subscription()]. +cold_get_subscription(SessionId, Topic) -> + kv_pmap_read(?subscription_tab, SessionId, Topic). + -spec fold_subscriptions(fun(), Acc, t()) -> Acc. fold_subscriptions(Fun, Acc, Rec) -> gen_fold(?subscriptions, Fun, Acc, Rec). @@ -410,6 +420,13 @@ del_subscription(TopicFilter, Rec) -> get_subscription_state(SStateId, Rec) -> gen_get(?subscription_states, SStateId, Rec). +-spec cold_get_subscription_state( + emqx_persistent_session_ds:id(), emqx_persistent_session_ds_subs:subscription_state_id() +) -> + [emqx_persistent_session_ds_subs:subscription_state()]. +cold_get_subscription_state(SessionId, SStateId) -> + kv_pmap_read(?subscription_states_tab, SessionId, SStateId). + -spec fold_subscription_states(fun(), Acc, t()) -> Acc. fold_subscription_states(Fun, Acc, Rec) -> gen_fold(?subscription_states, Fun, Acc, Rec). @@ -675,6 +692,14 @@ kv_pmap_persist(Tab, SessionId, Key, Val0) -> Val = encoder(encode, Tab, Val0), mnesia:write(Tab, #kv{k = {SessionId, Key}, v = Val}, write). +kv_pmap_read(Table, SessionId, Key) -> + lists:map( + fun(#kv{v = Val}) -> + encoder(decode, Table, Val) + end, + mnesia:dirty_read(Table, {SessionId, Key}) + ). + kv_pmap_restore(Table, SessionId) -> MS = [{#kv{k = {SessionId, '$1'}, v = '$2'}, [], [{{'$1', '$2'}}]}], Objs = mnesia:select(Table, MS, read), diff --git a/apps/emqx/src/emqx_persistent_session_ds_subs.erl b/apps/emqx/src/emqx_persistent_session_ds_subs.erl index 1993370ed..99ad9f9fc 100644 --- a/apps/emqx/src/emqx_persistent_session_ds_subs.erl +++ b/apps/emqx/src/emqx_persistent_session_ds_subs.erl @@ -33,6 +33,11 @@ fold/3 ]). +%% Management API: +-export([ + cold_get_subscription/2 +]). + -export_type([subscription_state_id/0, subscription/0, subscription_state/0]). -include("emqx_persistent_session_ds.hrl"). @@ -206,6 +211,23 @@ to_map(S) -> fold(Fun, Acc, S) -> emqx_persistent_session_ds_state:fold_subscriptions(Fun, Acc, S). +-spec cold_get_subscription(emqx_persistent_session_ds:id(), emqx_types:topic()) -> + emqx_persistent_session_ds:subscription() | undefined. +cold_get_subscription(SessionId, Topic) -> + case emqx_persistent_session_ds_state:cold_get_subscription(SessionId, Topic) of + [Sub = #{current_state := SStateId}] -> + case + emqx_persistent_session_ds_state:cold_get_subscription_state(SessionId, SStateId) + of + [#{subopts := Subopts}] -> + Sub#{subopts => Subopts}; + _ -> + undefined + end; + _ -> + undefined + end. + %%================================================================================ %% Internal functions %%================================================================================ From 2468243dfda730cd4987ea933f4a2e1c649bd77f Mon Sep 17 00:00:00 2001 From: JianBo He Date: Wed, 17 Apr 2024 17:51:37 +0800 Subject: [PATCH 171/234] chore: update changes --- apps/emqx_gateway_ocpp/src/emqx_ocpp_connection.erl | 1 - changes/ce/fix-12892.md | 1 + 2 files changed, 1 insertion(+), 1 deletion(-) create mode 100644 changes/ce/fix-12892.md diff --git a/apps/emqx_gateway_ocpp/src/emqx_ocpp_connection.erl b/apps/emqx_gateway_ocpp/src/emqx_ocpp_connection.erl index 331b9d323..0932314fe 100644 --- a/apps/emqx_gateway_ocpp/src/emqx_ocpp_connection.erl +++ b/apps/emqx_gateway_ocpp/src/emqx_ocpp_connection.erl @@ -237,7 +237,6 @@ do_init(Req, Opts, WsOpts) -> ) of {error, Reason, _State} -> - 1 = Reason, {ok, cowboy_req:reply(400, #{}, to_bin(Reason), Req), WsOpts}; {ok, [Resp, Opts, WsOpts], NState} -> {cowboy_websocket, Resp, [Req, Opts, NState], WsOpts} diff --git a/changes/ce/fix-12892.md b/changes/ce/fix-12892.md new file mode 100644 index 000000000..c6f52629a --- /dev/null +++ b/changes/ce/fix-12892.md @@ -0,0 +1 @@ +Avoid printing error logs when processing downstream messages From 51c8173174e26efc0a0c9e4f8942d8f9005d8d08 Mon Sep 17 00:00:00 2001 From: zmstone Date: Tue, 16 Apr 2024 19:00:32 +0200 Subject: [PATCH 172/234] feat(bridge): add is_template flag to bridge config fields --- apps/emqx/src/emqx_schema.erl | 11 +++++++-- .../src/emqx_bridge_cassandra.erl | 2 +- .../src/emqx_bridge_clickhouse.erl | 8 +++++-- .../src/emqx_bridge_dynamo.erl | 23 +++++++++--------- apps/emqx_bridge_es/src/emqx_bridge_es.erl | 8 +++---- .../src/emqx_bridge_gcp_pubsub.erl | 19 ++++++++++----- .../src/emqx_bridge_hstreamdb.erl | 17 ++++++++----- .../src/emqx_bridge_http_schema.erl | 2 +- .../src/emqx_bridge_influxdb.erl | 2 +- .../src/emqx_bridge_iotdb.erl | 10 ++++---- .../src/emqx_bridge_kafka.erl | 24 ++++++++++++++----- .../src/emqx_bridge_kinesis.erl | 2 +- .../src/emqx_bridge_mongodb.erl | 6 +++-- .../src/emqx_bridge_mqtt_connector_schema.erl | 14 +++++------ .../src/emqx_bridge_mysql.erl | 2 +- .../src/emqx_bridge_opents.erl | 8 +++---- .../src/emqx_bridge_oracle.erl | 4 ++-- .../src/emqx_bridge_pgsql.erl | 2 +- .../src/emqx_bridge_pulsar_pubsub_schema.erl | 4 ++-- .../emqx_bridge_rabbitmq_pubsub_schema.erl | 2 +- .../src/emqx_bridge_redis.erl | 2 +- .../src/emqx_bridge_rocketmq.erl | 12 ++++++---- .../src/emqx_bridge_rocketmq_connector.erl | 2 +- apps/emqx_bridge_s3/src/emqx_bridge_s3.erl | 2 +- .../src/emqx_bridge_sqlserver.erl | 2 +- .../src/emqx_bridge_syskeeper.erl | 4 ++-- .../src/emqx_bridge_tdengine.erl | 4 ++-- apps/emqx_conf/src/emqx_conf_schema_types.erl | 6 +++++ apps/emqx_s3/src/emqx_s3.app.src | 2 +- apps/emqx_s3/src/emqx_s3_client.erl | 3 ++- apps/emqx_s3/src/emqx_s3_schema.erl | 4 ++-- rel/i18n/emqx_conf_schema_types.hocon | 3 +++ 32 files changed, 133 insertions(+), 83 deletions(-) diff --git a/apps/emqx/src/emqx_schema.erl b/apps/emqx/src/emqx_schema.erl index 1dab4f42f..57ad00e99 100644 --- a/apps/emqx/src/emqx_schema.erl +++ b/apps/emqx/src/emqx_schema.erl @@ -61,6 +61,7 @@ }. -type url() :: binary(). -type json_binary() :: binary(). +-type template() :: binary(). -typerefl_from_string({duration/0, emqx_schema, to_duration}). -typerefl_from_string({duration_s/0, emqx_schema, to_duration_s}). @@ -78,6 +79,7 @@ -typerefl_from_string({comma_separated_atoms/0, emqx_schema, to_comma_separated_atoms}). -typerefl_from_string({url/0, emqx_schema, to_url}). -typerefl_from_string({json_binary/0, emqx_schema, to_json_binary}). +-typerefl_from_string({template/0, emqx_schema, to_template}). -type parsed_server() :: #{ hostname := string(), @@ -120,7 +122,8 @@ to_erl_cipher_suite/1, to_comma_separated_atoms/1, to_url/1, - to_json_binary/1 + to_json_binary/1, + to_template/1 ]). -export([ @@ -160,7 +163,8 @@ comma_separated_atoms/0, url/0, json_binary/0, - port_number/0 + port_number/0, + template/0 ]). -export([namespace/0, roots/0, roots/1, fields/1, desc/1, tags/0]). @@ -2594,6 +2598,9 @@ to_json_binary(Str) -> Error end. +to_template(Str) -> + {ok, iolist_to_binary(Str)}. + %% @doc support the following format: %% - 127.0.0.1:1883 %% - ::1:1883 diff --git a/apps/emqx_bridge_cassandra/src/emqx_bridge_cassandra.erl b/apps/emqx_bridge_cassandra/src/emqx_bridge_cassandra.erl index d34cb1950..80fbc80d2 100644 --- a/apps/emqx_bridge_cassandra/src/emqx_bridge_cassandra.erl +++ b/apps/emqx_bridge_cassandra/src/emqx_bridge_cassandra.erl @@ -181,7 +181,7 @@ fields("post", Type) -> cql_field() -> {cql, mk( - binary(), + emqx_schema:template(), #{desc => ?DESC("cql_template"), default => ?DEFAULT_CQL, format => <<"sql">>} )}. diff --git a/apps/emqx_bridge_clickhouse/src/emqx_bridge_clickhouse.erl b/apps/emqx_bridge_clickhouse/src/emqx_bridge_clickhouse.erl index 833c2570d..1e07f2340 100644 --- a/apps/emqx_bridge_clickhouse/src/emqx_bridge_clickhouse.erl +++ b/apps/emqx_bridge_clickhouse/src/emqx_bridge_clickhouse.erl @@ -184,8 +184,12 @@ fields("post", Type) -> sql_field() -> {sql, mk( - binary(), - #{desc => ?DESC("sql_template"), default => ?DEFAULT_SQL, format => <<"sql">>} + emqx_schema:template(), + #{ + desc => ?DESC("sql_template"), + default => ?DEFAULT_SQL, + format => <<"sql">> + } )}. batch_value_separator_field() -> diff --git a/apps/emqx_bridge_dynamo/src/emqx_bridge_dynamo.erl b/apps/emqx_bridge_dynamo/src/emqx_bridge_dynamo.erl index 13828c0f7..d568fee25 100644 --- a/apps/emqx_bridge_dynamo/src/emqx_bridge_dynamo.erl +++ b/apps/emqx_bridge_dynamo/src/emqx_bridge_dynamo.erl @@ -160,13 +160,7 @@ fields(dynamo_action) -> ); fields(action_parameters) -> Parameters = - [ - {template, - mk( - binary(), - #{desc => ?DESC("template"), default => ?DEFAULT_TEMPLATE} - )} - ] ++ emqx_bridge_dynamo_connector:fields(config), + [{template, template_field_schema()}] ++ emqx_bridge_dynamo_connector:fields(config), lists:foldl( fun(Key, Acc) -> proplists:delete(Key, Acc) @@ -199,11 +193,7 @@ fields(connector_resource_opts) -> fields("config") -> [ {enable, mk(boolean(), #{desc => ?DESC("config_enable"), default => true})}, - {template, - mk( - binary(), - #{desc => ?DESC("template"), default => ?DEFAULT_TEMPLATE} - )}, + {template, template_field_schema()}, {local_topic, mk( binary(), @@ -230,6 +220,15 @@ fields("put") -> fields("get") -> emqx_bridge_schema:status_fields() ++ fields("post"). +template_field_schema() -> + mk( + emqx_schema:template(), + #{ + desc => ?DESC("template"), + default => ?DEFAULT_TEMPLATE + } + ). + desc("config") -> ?DESC("desc_config"); desc(Method) when Method =:= "get"; Method =:= "put"; Method =:= "post" -> diff --git a/apps/emqx_bridge_es/src/emqx_bridge_es.erl b/apps/emqx_bridge_es/src/emqx_bridge_es.erl index 97f3986e4..def0b76f7 100644 --- a/apps/emqx_bridge_es/src/emqx_bridge_es.erl +++ b/apps/emqx_bridge_es/src/emqx_bridge_es.erl @@ -135,7 +135,7 @@ overwrite() -> index() -> {index, ?HOCON( - binary(), + emqx_schema:template(), #{ required => true, example => <<"${payload.index}">>, @@ -146,7 +146,7 @@ index() -> id(Required) -> {id, ?HOCON( - binary(), + emqx_schema:template(), #{ required => Required, example => <<"${payload.id}">>, @@ -157,7 +157,7 @@ id(Required) -> doc() -> {doc, ?HOCON( - binary(), + emqx_schema:template(), #{ required => false, example => <<"${payload.doc}">>, @@ -187,7 +187,7 @@ doc_as_upsert() -> routing() -> {routing, ?HOCON( - binary(), + emqx_schema:template(), #{ required => false, example => <<"${payload.routing}">>, diff --git a/apps/emqx_bridge_gcp_pubsub/src/emqx_bridge_gcp_pubsub.erl b/apps/emqx_bridge_gcp_pubsub/src/emqx_bridge_gcp_pubsub.erl index 007bbc1a0..a5991af81 100644 --- a/apps/emqx_bridge_gcp_pubsub/src/emqx_bridge_gcp_pubsub.erl +++ b/apps/emqx_bridge_gcp_pubsub/src/emqx_bridge_gcp_pubsub.erl @@ -122,7 +122,7 @@ fields(producer) -> )}, {ordering_key_template, sc( - binary(), + emqx_schema:template(), #{ default => <<>>, desc => ?DESC("ordering_key_template") @@ -130,7 +130,7 @@ fields(producer) -> )}, {payload_template, sc( - binary(), + emqx_schema:template(), #{ default => <<>>, desc => ?DESC("payload_template") @@ -201,8 +201,11 @@ fields(consumer_topic_mapping) -> {qos, mk(emqx_schema:qos(), #{default => 0, desc => ?DESC(consumer_mqtt_qos)})}, {payload_template, mk( - string(), - #{default => <<"${.}">>, desc => ?DESC(consumer_mqtt_payload)} + emqx_schema:template(), + #{ + default => <<"${.}">>, + desc => ?DESC(consumer_mqtt_payload) + } )} ]; fields("consumer_resource_opts") -> @@ -221,14 +224,18 @@ fields("consumer_resource_opts") -> fields(key_value_pair) -> [ {key, - mk(binary(), #{ + mk(emqx_schema:template(), #{ required => true, validator => [ emqx_resource_validator:not_empty("Key templates must not be empty") ], desc => ?DESC(kv_pair_key) })}, - {value, mk(binary(), #{required => true, desc => ?DESC(kv_pair_value)})} + {value, + mk(emqx_schema:template(), #{ + required => true, + desc => ?DESC(kv_pair_value) + })} ]; fields("get_producer") -> emqx_bridge_schema:status_fields() ++ fields("post_producer"); diff --git a/apps/emqx_bridge_hstreamdb/src/emqx_bridge_hstreamdb.erl b/apps/emqx_bridge_hstreamdb/src/emqx_bridge_hstreamdb.erl index 7fa19c9a4..7024a2e07 100644 --- a/apps/emqx_bridge_hstreamdb/src/emqx_bridge_hstreamdb.erl +++ b/apps/emqx_bridge_hstreamdb/src/emqx_bridge_hstreamdb.erl @@ -167,13 +167,13 @@ fields(action_parameters) -> })}, {partition_key, - mk(binary(), #{ - required => false, desc => ?DESC(emqx_bridge_hstreamdb_connector, "partition_key") + mk(emqx_schema:template(), #{ + required => false, + desc => ?DESC(emqx_bridge_hstreamdb_connector, "partition_key") })}, {grpc_flush_timeout, fun grpc_flush_timeout/1}, - {record_template, - mk(binary(), #{default => <<"${payload}">>, desc => ?DESC("record_template")})}, + {record_template, record_template_schema()}, {aggregation_pool_size, mk(pos_integer(), #{ default => ?DEFAULT_AGG_POOL_SIZE, desc => ?DESC("aggregation_pool_size") @@ -222,6 +222,12 @@ fields("put") -> hstream_bridge_common_fields() ++ connector_fields(). +record_template_schema() -> + mk(emqx_schema:template(), #{ + default => <<"${payload}">>, + desc => ?DESC("record_template") + }). + grpc_timeout(type) -> emqx_schema:timeout_duration_ms(); grpc_timeout(desc) -> ?DESC(emqx_bridge_hstreamdb_connector, "grpc_timeout"); grpc_timeout(default) -> ?DEFAULT_GRPC_TIMEOUT_RAW; @@ -239,8 +245,7 @@ hstream_bridge_common_fields() -> [ {direction, mk(egress, #{desc => ?DESC("config_direction"), default => egress})}, {local_topic, mk(binary(), #{desc => ?DESC("local_topic")})}, - {record_template, - mk(binary(), #{default => <<"${payload}">>, desc => ?DESC("record_template")})} + {record_template, record_template_schema()} ] ++ emqx_resource_schema:fields("resource_opts"). diff --git a/apps/emqx_bridge_http/src/emqx_bridge_http_schema.erl b/apps/emqx_bridge_http/src/emqx_bridge_http_schema.erl index 43f3d1748..ef150adfc 100644 --- a/apps/emqx_bridge_http/src/emqx_bridge_http_schema.erl +++ b/apps/emqx_bridge_http/src/emqx_bridge_http_schema.erl @@ -287,7 +287,7 @@ method_field() -> body_field() -> {body, mk( - binary(), + emqx_schema:template(), #{ default => undefined, desc => ?DESC("config_body") diff --git a/apps/emqx_bridge_influxdb/src/emqx_bridge_influxdb.erl b/apps/emqx_bridge_influxdb/src/emqx_bridge_influxdb.erl index a62effe51..59d36cd5f 100644 --- a/apps/emqx_bridge_influxdb/src/emqx_bridge_influxdb.erl +++ b/apps/emqx_bridge_influxdb/src/emqx_bridge_influxdb.erl @@ -42,7 +42,7 @@ %% api write_syntax_type() -> - typerefl:alias("string", write_syntax()). + typerefl:alias("template", write_syntax()). %% Examples conn_bridge_examples(Method) -> diff --git a/apps/emqx_bridge_iotdb/src/emqx_bridge_iotdb.erl b/apps/emqx_bridge_iotdb/src/emqx_bridge_iotdb.erl index 134868978..599be842a 100644 --- a/apps/emqx_bridge_iotdb/src/emqx_bridge_iotdb.erl +++ b/apps/emqx_bridge_iotdb/src/emqx_bridge_iotdb.erl @@ -84,7 +84,7 @@ fields(action_parameters) -> )}, {device_id, mk( - binary(), + emqx_schema:template(), #{ desc => ?DESC("config_device_id") } @@ -114,7 +114,7 @@ fields(action_parameters_data) -> )}, {measurement, mk( - binary(), + emqx_schema:template(), #{ required => true, desc => ?DESC("config_parameters_measurement") @@ -122,7 +122,9 @@ fields(action_parameters_data) -> )}, {data_type, mk( - hoconsc:union([enum([text, boolean, int32, int64, float, double]), binary()]), + hoconsc:union([ + enum([text, boolean, int32, int64, float, double]), emqx_schema:template() + ]), #{ required => true, desc => ?DESC("config_parameters_data_type") @@ -130,7 +132,7 @@ fields(action_parameters_data) -> )}, {value, mk( - binary(), + emqx_schema:template(), #{ required => true, desc => ?DESC("config_parameters_value") diff --git a/apps/emqx_bridge_kafka/src/emqx_bridge_kafka.erl b/apps/emqx_bridge_kafka/src/emqx_bridge_kafka.erl index ff9d19c0d..b0b0c3a03 100644 --- a/apps/emqx_bridge_kafka/src/emqx_bridge_kafka.erl +++ b/apps/emqx_bridge_kafka/src/emqx_bridge_kafka.erl @@ -477,11 +477,20 @@ fields(producer_kafka_ext_headers) -> ]; fields(kafka_message) -> [ - {key, mk(string(), #{default => <<"${.clientid}">>, desc => ?DESC(kafka_message_key)})}, - {value, mk(string(), #{default => <<"${.}">>, desc => ?DESC(kafka_message_value)})}, + {key, + mk(emqx_schema:template(), #{ + default => <<"${.clientid}">>, + desc => ?DESC(kafka_message_key) + })}, + {value, + mk(emqx_schema:template(), #{ + default => <<"${.}">>, + desc => ?DESC(kafka_message_value) + })}, {timestamp, - mk(string(), #{ - default => <<"${.timestamp}">>, desc => ?DESC(kafka_message_timestamp) + mk(emqx_schema:template(), #{ + default => <<"${.timestamp}">>, + desc => ?DESC(kafka_message_timestamp) })} ]; fields(producer_buffer) -> @@ -536,8 +545,11 @@ fields(consumer_topic_mapping) -> {qos, mk(emqx_schema:qos(), #{default => 0, desc => ?DESC(consumer_mqtt_qos)})}, {payload_template, mk( - string(), - #{default => <<"${.}">>, desc => ?DESC(consumer_mqtt_payload)} + emqx_schema:template(), + #{ + default => <<"${.}">>, + desc => ?DESC(consumer_mqtt_payload) + } )} ]; fields(consumer_kafka_opts) -> diff --git a/apps/emqx_bridge_kinesis/src/emqx_bridge_kinesis.erl b/apps/emqx_bridge_kinesis/src/emqx_bridge_kinesis.erl index 3c22e41e2..40849a29d 100644 --- a/apps/emqx_bridge_kinesis/src/emqx_bridge_kinesis.erl +++ b/apps/emqx_bridge_kinesis/src/emqx_bridge_kinesis.erl @@ -150,7 +150,7 @@ fields(producer) -> [ {payload_template, sc( - binary(), + emqx_schema:template(), #{ default => <<"${.}">>, desc => ?DESC("payload_template") diff --git a/apps/emqx_bridge_mongodb/src/emqx_bridge_mongodb.erl b/apps/emqx_bridge_mongodb/src/emqx_bridge_mongodb.erl index c81df1334..593bf6ff8 100644 --- a/apps/emqx_bridge_mongodb/src/emqx_bridge_mongodb.erl +++ b/apps/emqx_bridge_mongodb/src/emqx_bridge_mongodb.erl @@ -44,8 +44,10 @@ roots() -> []. fields("config") -> [ {enable, mk(boolean(), #{desc => ?DESC("enable"), default => true})}, - {collection, mk(binary(), #{desc => ?DESC("collection"), default => <<"mqtt">>})}, - {payload_template, mk(binary(), #{required => false, desc => ?DESC("payload_template")})}, + {collection, + mk(emqx_schema:template(), #{desc => ?DESC("collection"), default => <<"mqtt">>})}, + {payload_template, + mk(emqx_schema:template(), #{required => false, desc => ?DESC("payload_template")})}, {resource_opts, mk( ref(?MODULE, "creation_opts"), diff --git a/apps/emqx_bridge_mqtt/src/emqx_bridge_mqtt_connector_schema.erl b/apps/emqx_bridge_mqtt/src/emqx_bridge_mqtt_connector_schema.erl index 7103e53ee..bc2939c24 100644 --- a/apps/emqx_bridge_mqtt/src/emqx_bridge_mqtt_connector_schema.erl +++ b/apps/emqx_bridge_mqtt/src/emqx_bridge_mqtt_connector_schema.erl @@ -200,7 +200,7 @@ fields("ingress_local") -> [ {topic, mk( - binary(), + emqx_schema:template(), #{ validator => fun emqx_schema:non_empty_string/1, desc => ?DESC("ingress_local_topic"), @@ -217,7 +217,7 @@ fields("ingress_local") -> )}, {retain, mk( - hoconsc:union([boolean(), binary()]), + hoconsc:union([boolean(), emqx_schema:template()]), #{ default => <<"${retain}">>, desc => ?DESC("retain") @@ -225,7 +225,7 @@ fields("ingress_local") -> )}, {payload, mk( - binary(), + emqx_schema:template(), #{ default => undefined, desc => ?DESC("payload") @@ -268,7 +268,7 @@ fields("egress_remote") -> [ {topic, mk( - binary(), + emqx_schema:template(), #{ required => true, validator => fun emqx_schema:non_empty_string/1, @@ -286,7 +286,7 @@ fields("egress_remote") -> )}, {retain, mk( - hoconsc:union([boolean(), binary()]), + hoconsc:union([boolean(), emqx_schema:template()]), #{ required => false, default => false, @@ -295,7 +295,7 @@ fields("egress_remote") -> )}, {payload, mk( - binary(), + emqx_schema:template(), #{ default => undefined, desc => ?DESC("payload") @@ -344,7 +344,7 @@ desc(_) -> undefined. qos() -> - hoconsc:union([emqx_schema:qos(), binary()]). + hoconsc:union([emqx_schema:qos(), emqx_schema:template()]). parse_server(Str) -> #{hostname := Host, port := Port} = emqx_schema:parse_server(Str, ?MQTT_HOST_OPTS), diff --git a/apps/emqx_bridge_mysql/src/emqx_bridge_mysql.erl b/apps/emqx_bridge_mysql/src/emqx_bridge_mysql.erl index ee7487760..24b11b930 100644 --- a/apps/emqx_bridge_mysql/src/emqx_bridge_mysql.erl +++ b/apps/emqx_bridge_mysql/src/emqx_bridge_mysql.erl @@ -117,7 +117,7 @@ fields("config") -> {enable, mk(boolean(), #{desc => ?DESC("config_enable"), default => true})}, {sql, mk( - binary(), + emqx_schema:template(), #{desc => ?DESC("sql_template"), default => ?DEFAULT_SQL, format => <<"sql">>} )}, {local_topic, diff --git a/apps/emqx_bridge_opents/src/emqx_bridge_opents.erl b/apps/emqx_bridge_opents/src/emqx_bridge_opents.erl index d38ed8eb4..25c0ce88d 100644 --- a/apps/emqx_bridge_opents/src/emqx_bridge_opents.erl +++ b/apps/emqx_bridge_opents/src/emqx_bridge_opents.erl @@ -146,7 +146,7 @@ fields(action_parameters_data) -> [ {timestamp, mk( - binary(), + emqx_schema:template(), #{ desc => ?DESC("config_parameters_timestamp"), required => false @@ -154,7 +154,7 @@ fields(action_parameters_data) -> )}, {metric, mk( - binary(), + emqx_schema:template(), #{ required => true, desc => ?DESC("config_parameters_metric") @@ -162,7 +162,7 @@ fields(action_parameters_data) -> )}, {tags, mk( - hoconsc:union([map(), binary()]), + hoconsc:union([map(), emqx_schema:template()]), #{ required => true, desc => ?DESC("config_parameters_tags"), @@ -188,7 +188,7 @@ fields(action_parameters_data) -> )}, {value, mk( - hoconsc:union([integer(), float(), binary()]), + hoconsc:union([integer(), float(), emqx_schema:template()]), #{ required => true, desc => ?DESC("config_parameters_value") diff --git a/apps/emqx_bridge_oracle/src/emqx_bridge_oracle.erl b/apps/emqx_bridge_oracle/src/emqx_bridge_oracle.erl index fb485c16b..c3b4160ab 100644 --- a/apps/emqx_bridge_oracle/src/emqx_bridge_oracle.erl +++ b/apps/emqx_bridge_oracle/src/emqx_bridge_oracle.erl @@ -158,7 +158,7 @@ fields(action_parameters) -> [ {sql, hoconsc:mk( - binary(), + emqx_schema:template(), #{desc => ?DESC("sql_template"), default => ?DEFAULT_SQL, format => <<"sql">>} )} ]; @@ -177,7 +177,7 @@ fields("config") -> )}, {sql, hoconsc:mk( - binary(), + emqx_schema:template(), #{desc => ?DESC("sql_template"), default => ?DEFAULT_SQL, format => <<"sql">>} )}, {local_topic, diff --git a/apps/emqx_bridge_pgsql/src/emqx_bridge_pgsql.erl b/apps/emqx_bridge_pgsql/src/emqx_bridge_pgsql.erl index 7d02e8cca..5a0b9eb5b 100644 --- a/apps/emqx_bridge_pgsql/src/emqx_bridge_pgsql.erl +++ b/apps/emqx_bridge_pgsql/src/emqx_bridge_pgsql.erl @@ -61,7 +61,7 @@ fields(action_parameters) -> [ {sql, hoconsc:mk( - binary(), + emqx_schema:template(), #{desc => ?DESC("sql_template"), default => default_sql(), format => <<"sql">>} )} ]; diff --git a/apps/emqx_bridge_pulsar/src/emqx_bridge_pulsar_pubsub_schema.erl b/apps/emqx_bridge_pulsar/src/emqx_bridge_pulsar_pubsub_schema.erl index ccf985ba8..dff62843e 100644 --- a/apps/emqx_bridge_pulsar/src/emqx_bridge_pulsar_pubsub_schema.erl +++ b/apps/emqx_bridge_pulsar/src/emqx_bridge_pulsar_pubsub_schema.erl @@ -51,12 +51,12 @@ fields(action_parameters) -> fields(producer_pulsar_message) -> [ {key, - ?HOCON(string(), #{ + ?HOCON(emqx_schema:template(), #{ default => <<"${.clientid}">>, desc => ?DESC("producer_key_template") })}, {value, - ?HOCON(string(), #{ + ?HOCON(emqx_schema:template(), #{ default => <<"${.}">>, desc => ?DESC("producer_value_template") })} diff --git a/apps/emqx_bridge_rabbitmq/src/emqx_bridge_rabbitmq_pubsub_schema.erl b/apps/emqx_bridge_rabbitmq/src/emqx_bridge_rabbitmq_pubsub_schema.erl index 9a9741226..b0c254fc4 100644 --- a/apps/emqx_bridge_rabbitmq/src/emqx_bridge_rabbitmq_pubsub_schema.erl +++ b/apps/emqx_bridge_rabbitmq/src/emqx_bridge_rabbitmq_pubsub_schema.erl @@ -99,7 +99,7 @@ fields(action_parameters) -> )}, {payload_template, hoconsc:mk( - binary(), + emqx_schema:template(), #{ default => <<"">>, desc => ?DESC(?CONNECTOR_SCHEMA, "payload_template") diff --git a/apps/emqx_bridge_redis/src/emqx_bridge_redis.erl b/apps/emqx_bridge_redis/src/emqx_bridge_redis.erl index c80f9ead1..c9b2a35b9 100644 --- a/apps/emqx_bridge_redis/src/emqx_bridge_redis.erl +++ b/apps/emqx_bridge_redis/src/emqx_bridge_redis.erl @@ -211,7 +211,7 @@ desc(_) -> undefined. command_template(type) -> - list(binary()); + hoconsc:array(emqx_schema:template()); command_template(required) -> true; command_template(validator) -> diff --git a/apps/emqx_bridge_rocketmq/src/emqx_bridge_rocketmq.erl b/apps/emqx_bridge_rocketmq/src/emqx_bridge_rocketmq.erl index 589719486..750993e9a 100644 --- a/apps/emqx_bridge_rocketmq/src/emqx_bridge_rocketmq.erl +++ b/apps/emqx_bridge_rocketmq/src/emqx_bridge_rocketmq.erl @@ -162,8 +162,11 @@ fields(action_parameters) -> [ {template, mk( - binary(), - #{desc => ?DESC("template"), default => ?DEFAULT_TEMPLATE} + emqx_schema:template(), + #{ + desc => ?DESC("template"), + default => ?DEFAULT_TEMPLATE + } )} ] ++ emqx_bridge_rocketmq_connector:fields(config), lists:foldl( @@ -205,7 +208,7 @@ fields("config") -> {enable, mk(boolean(), #{desc => ?DESC("config_enable"), default => true})}, {template, mk( - binary(), + emqx_schema:template(), #{desc => ?DESC("template"), default => ?DEFAULT_TEMPLATE} )}, {local_topic, @@ -214,8 +217,7 @@ fields("config") -> #{desc => ?DESC("local_topic"), required => false} )} ] ++ emqx_resource_schema:fields("resource_opts") ++ - (emqx_bridge_rocketmq_connector:fields(config) -- - emqx_connector_schema_lib:prepare_statement_fields()); + emqx_bridge_rocketmq_connector:fields(config); fields("post") -> [type_field(), name_field() | fields("config")]; fields("put") -> diff --git a/apps/emqx_bridge_rocketmq/src/emqx_bridge_rocketmq_connector.erl b/apps/emqx_bridge_rocketmq/src/emqx_bridge_rocketmq_connector.erl index 1af520a93..0bea5a8ff 100644 --- a/apps/emqx_bridge_rocketmq/src/emqx_bridge_rocketmq_connector.erl +++ b/apps/emqx_bridge_rocketmq/src/emqx_bridge_rocketmq_connector.erl @@ -47,7 +47,7 @@ fields(config) -> {servers, servers()}, {topic, mk( - binary(), + emqx_schema:template(), #{default => <<"TopicTest">>, desc => ?DESC(topic)} )}, {access_key, diff --git a/apps/emqx_bridge_s3/src/emqx_bridge_s3.erl b/apps/emqx_bridge_s3/src/emqx_bridge_s3.erl index 5d7e176e3..79cc560d2 100644 --- a/apps/emqx_bridge_s3/src/emqx_bridge_s3.erl +++ b/apps/emqx_bridge_s3/src/emqx_bridge_s3.erl @@ -77,7 +77,7 @@ fields(s3_upload_parameters) -> [ {content, hoconsc:mk( - string(), + emqx_schema:template(), #{ required => false, default => <<"${.}">>, diff --git a/apps/emqx_bridge_sqlserver/src/emqx_bridge_sqlserver.erl b/apps/emqx_bridge_sqlserver/src/emqx_bridge_sqlserver.erl index e9df1fdb6..af66b8a88 100644 --- a/apps/emqx_bridge_sqlserver/src/emqx_bridge_sqlserver.erl +++ b/apps/emqx_bridge_sqlserver/src/emqx_bridge_sqlserver.erl @@ -192,7 +192,7 @@ fields(action_parameters) -> [ {sql, mk( - binary(), + emqx_schema:template(), #{desc => ?DESC("sql_template"), default => ?DEFAULT_SQL, format => <<"sql">>} )} ]; diff --git a/apps/emqx_bridge_syskeeper/src/emqx_bridge_syskeeper.erl b/apps/emqx_bridge_syskeeper/src/emqx_bridge_syskeeper.erl index 9ac0efe8a..547562f26 100644 --- a/apps/emqx_bridge_syskeeper/src/emqx_bridge_syskeeper.erl +++ b/apps/emqx_bridge_syskeeper/src/emqx_bridge_syskeeper.erl @@ -112,7 +112,7 @@ fields("parameters") -> [ {target_topic, mk( - binary(), + emqx_schema:template(), #{desc => ?DESC("target_topic"), default => <<"${topic}">>} )}, {target_qos, @@ -122,7 +122,7 @@ fields("parameters") -> )}, {template, mk( - binary(), + emqx_schema:template(), #{desc => ?DESC("template"), default => <<"${payload}">>} )} ]; diff --git a/apps/emqx_bridge_tdengine/src/emqx_bridge_tdengine.erl b/apps/emqx_bridge_tdengine/src/emqx_bridge_tdengine.erl index 6e71da87e..f086f00dc 100644 --- a/apps/emqx_bridge_tdengine/src/emqx_bridge_tdengine.erl +++ b/apps/emqx_bridge_tdengine/src/emqx_bridge_tdengine.erl @@ -83,7 +83,7 @@ fields("config") -> {enable, mk(boolean(), #{desc => ?DESC("config_enable"), default => true})}, {sql, mk( - binary(), + emqx_schema:template(), #{ desc => ?DESC("sql_template"), default => ?DEFAULT_SQL, @@ -125,7 +125,7 @@ fields(action_parameters) -> {database, fun emqx_connector_schema_lib:database/1}, {sql, mk( - binary(), + emqx_schema:template(), #{ desc => ?DESC("sql_template"), default => ?DEFAULT_SQL, diff --git a/apps/emqx_conf/src/emqx_conf_schema_types.erl b/apps/emqx_conf/src/emqx_conf_schema_types.erl index f530ee872..49ee8fbaf 100644 --- a/apps/emqx_conf/src/emqx_conf_schema_types.erl +++ b/apps/emqx_conf/src/emqx_conf_schema_types.erl @@ -65,6 +65,12 @@ readable("boolean()") -> dashboard => #{type => boolean}, docgen => #{type => "Boolean"} }; +readable("template()") -> + #{ + swagger => #{type => string}, + dashboard => #{type => string, is_template => true}, + docgen => #{type => "String", desc => ?DESC(template)} + }; readable("binary()") -> #{ swagger => #{type => string}, diff --git a/apps/emqx_s3/src/emqx_s3.app.src b/apps/emqx_s3/src/emqx_s3.app.src index 965cb099d..c307f2c9c 100644 --- a/apps/emqx_s3/src/emqx_s3.app.src +++ b/apps/emqx_s3/src/emqx_s3.app.src @@ -1,6 +1,6 @@ {application, emqx_s3, [ {description, "EMQX S3"}, - {vsn, "5.0.14"}, + {vsn, "5.1.0"}, {modules, []}, {registered, [emqx_s3_sup]}, {applications, [ diff --git a/apps/emqx_s3/src/emqx_s3_client.erl b/apps/emqx_s3/src/emqx_s3_client.erl index a415cf8d4..b7bd85833 100644 --- a/apps/emqx_s3/src/emqx_s3_client.erl +++ b/apps/emqx_s3/src/emqx_s3_client.erl @@ -103,7 +103,7 @@ put_object(Client, Key, Value) -> -spec put_object(client(), key(), upload_options(), iodata()) -> ok_or_error(term()). put_object( - #{bucket := Bucket, headers := BaseHeaders, aws_config := AwsConfig = #aws_config{}}, + #{bucket := Bucket0, headers := BaseHeaders, aws_config := AwsConfig = #aws_config{}}, Key, UploadOpts, Content @@ -111,6 +111,7 @@ put_object( ECKey = erlcloud_key(Key), ECOpts = erlcloud_upload_options(UploadOpts), Headers = join_headers(BaseHeaders, maps:get(headers, UploadOpts, undefined)), + Bucket = to_list_string(Bucket0), try erlcloud_s3:put_object(Bucket, ECKey, Content, ECOpts, Headers, AwsConfig) of Props when is_list(Props) -> ok diff --git a/apps/emqx_s3/src/emqx_s3_schema.erl b/apps/emqx_s3/src/emqx_s3_schema.erl index ff8c632bd..de5e4f53e 100644 --- a/apps/emqx_s3/src/emqx_s3_schema.erl +++ b/apps/emqx_s3/src/emqx_s3_schema.erl @@ -74,7 +74,7 @@ fields(s3_upload) -> [ {bucket, mk( - string(), + emqx_schema:template(), #{ desc => ?DESC("bucket"), required => true @@ -82,7 +82,7 @@ fields(s3_upload) -> )}, {key, mk( - string(), + emqx_schema:template(), #{ desc => ?DESC("key"), required => true diff --git a/rel/i18n/emqx_conf_schema_types.hocon b/rel/i18n/emqx_conf_schema_types.hocon index 6b9dac9ea..f9eefbe1d 100644 --- a/rel/i18n/emqx_conf_schema_types.hocon +++ b/rel/i18n/emqx_conf_schema_types.hocon @@ -9,4 +9,7 @@ emqx_conf_schema_types { secret.desc: """A string holding some sensitive information, such as a password. When secret starts with file://, the rest of the string is interpreted as a path to a file containing the secret itself: whole content of the file except any trailing whitespace characters is considered a secret value. Note: when clustered, all EMQX nodes should have the same file present before using file:// secrets.""" + template.desc: """~ + A string for `${.path.to.var}` style value interpolation, + where the leading dot is optional, and `${.}` represents all values as an object.""" } From f9f14f9758697cebf09f7a1f5647b83b9d158ec6 Mon Sep 17 00:00:00 2001 From: zmstone Date: Wed, 17 Apr 2024 13:08:08 +0200 Subject: [PATCH 173/234] refactor(emqx_conf): raise exception at higher level for more context --- apps/emqx_conf/src/emqx_conf.erl | 22 +++++++++++++----- apps/emqx_conf/src/emqx_conf_schema_types.erl | 23 +++++++++++-------- 2 files changed, 30 insertions(+), 15 deletions(-) diff --git a/apps/emqx_conf/src/emqx_conf.erl b/apps/emqx_conf/src/emqx_conf.erl index 0bd319503..23dda6b02 100644 --- a/apps/emqx_conf/src/emqx_conf.erl +++ b/apps/emqx_conf/src/emqx_conf.erl @@ -304,12 +304,22 @@ gen_flat_doc(RootNames, #{full_name := FullName, fields := Fields} = S, DescReso false -> ok end, - #{ - text => short_name(FullName), - hash => format_hash(FullName), - doc => maps:get(desc, S, <<"">>), - fields => format_fields(Fields, DescResolver) - }. + try + #{ + text => short_name(FullName), + hash => format_hash(FullName), + doc => maps:get(desc, S, <<"">>), + fields => format_fields(Fields, DescResolver) + } + catch + throw:Reason -> + io:format( + standard_error, + "failed_to_build_doc for ~s:~n~p~n", + [FullName, Reason] + ), + error(failed_to_build_doc) + end. format_fields(Fields, DescResolver) -> [format_field(F, DescResolver) || F <- Fields]. diff --git a/apps/emqx_conf/src/emqx_conf_schema_types.erl b/apps/emqx_conf/src/emqx_conf_schema_types.erl index 49ee8fbaf..dbfbe74bc 100644 --- a/apps/emqx_conf/src/emqx_conf_schema_types.erl +++ b/apps/emqx_conf/src/emqx_conf_schema_types.erl @@ -33,8 +33,19 @@ readable(Module, TypeStr) when is_list(TypeStr) -> %% Module is ignored so far as all types are distinguished by their names readable(TypeStr) catch - throw:unknown_type -> - fail(#{reason => unknown_type, type => TypeStr, module => Module}) + throw:Reason -> + throw(#{ + reason => Reason, + type => TypeStr, + module => Module + }); + error:Reason:Stacktrace -> + throw(#{ + reason => Reason, + stacktrace => Stacktrace, + type => TypeStr, + module => Module + }) end. readable_swagger(Module, TypeStr) -> @@ -49,16 +60,10 @@ readable_docgen(Module, TypeStr) -> get_readable(Module, TypeStr, Flavor) -> Map = readable(Module, TypeStr), case maps:get(Flavor, Map, undefined) of - undefined -> fail(#{reason => unknown_type, module => Module, type => TypeStr}); + undefined -> throw(#{reason => unknown_type, module => Module, type => TypeStr}); Value -> Value end. -%% Fail the build or test. Production code should never get here. --spec fail(_) -> no_return(). -fail(Reason) -> - io:format(standard_error, "ERROR: ~p~n", [Reason]), - error(Reason). - readable("boolean()") -> #{ swagger => #{type => boolean}, From d49e98bc4b6defce0500a45ef9db1c084f1f885e Mon Sep 17 00:00:00 2001 From: zmstone Date: Wed, 17 Apr 2024 13:53:43 +0200 Subject: [PATCH 174/234] test: fix dashboard schema validation --- apps/emqx_dashboard/src/emqx_dashboard_schema_api.erl | 8 ++++---- scripts/test/emqx-smoke-test.sh | 6 ++++-- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/apps/emqx_dashboard/src/emqx_dashboard_schema_api.erl b/apps/emqx_dashboard/src/emqx_dashboard_schema_api.erl index 2bc1c5b39..632c8b8d4 100644 --- a/apps/emqx_dashboard/src/emqx_dashboard_schema_api.erl +++ b/apps/emqx_dashboard/src/emqx_dashboard_schema_api.erl @@ -100,28 +100,28 @@ gen_schema(connectors) -> hotconf_schema_json() -> SchemaInfo = #{ - title => <<"EMQX Hot Conf Schema">>, + title => <<"Hot Conf Schema">>, version => ?SCHEMA_VERSION }, gen_api_schema_json_iodata(emqx_mgmt_api_configs, SchemaInfo). bridge_schema_json() -> SchemaInfo = #{ - title => <<"EMQX Data Bridge Schema">>, + title => <<"Data Bridge Schema">>, version => ?SCHEMA_VERSION }, gen_api_schema_json_iodata(emqx_bridge_api, SchemaInfo). actions_schema_json() -> SchemaInfo = #{ - title => <<"EMQX Data Actions and Sources Schema">>, + title => <<"Actions and Sources Schema">>, version => ?SCHEMA_VERSION }, gen_api_schema_json_iodata(emqx_bridge_v2_api, SchemaInfo). connectors_schema_json() -> SchemaInfo = #{ - title => <<"EMQX Connectors Schema">>, + title => <<"Connectors Schema">>, version => ?SCHEMA_VERSION }, gen_api_schema_json_iodata(emqx_connector_api, SchemaInfo). diff --git a/scripts/test/emqx-smoke-test.sh b/scripts/test/emqx-smoke-test.sh index 4430a313a..8177d7b85 100755 --- a/scripts/test/emqx-smoke-test.sh +++ b/scripts/test/emqx-smoke-test.sh @@ -82,8 +82,10 @@ main() { ## The json status feature was added after hotconf and bridges schema API if [ "$JSON_STATUS" != 'NOT_JSON' ]; then check_swagger_json - check_schema_json hotconf "EMQX Hot Conf API Schema" - check_schema_json bridges "EMQX Data Bridge API Schema" + check_schema_json hotconf "Hot Conf Schema" + check_schema_json bridges "Data Bridge Schema" + check_schema_json actions "Actions and Sources Schema" + check_schema_json connectors "Connectors Schema" fi } From cf56050759d96806b1e1cb39a564cdf7f3acbb44 Mon Sep 17 00:00:00 2001 From: Kjell Winblad Date: Wed, 17 Apr 2024 16:12:12 +0200 Subject: [PATCH 175/234] feat: avoid mixing request with and without the stop_after_render flag Previously a batch of requests that was sent to a connector could contain both requests with the stop_after_rendering flag and requests without this flag. When this happened a warning message was generated and the stop_after_render flags for the batch would be ignored. This commit fixes so that a mixed batch is never created so there is no longer any need for a warning message or ignoring flags. --- apps/emqx_bridge/src/emqx_action_info.erl | 10 +- .../src/emqx_connector_info.erl | 10 +- .../src/emqx_resource_buffer_worker.erl | 110 +++++++---- .../emqx_rule_engine_api_rule_apply_SUITE.erl | 182 ++++++++++++++++-- .../emqx_rule_engine_test_action_info.erl | 101 ++++++++++ .../test/emqx_rule_engine_test_connector.erl | 99 ++++++++++ .../emqx_rule_engine_test_connector_info.erl | 43 +++++ mix.exs | 2 +- rebar.config | 2 +- 9 files changed, 505 insertions(+), 54 deletions(-) create mode 100644 apps/emqx_rule_engine/test/emqx_rule_engine_test_action_info.erl create mode 100644 apps/emqx_rule_engine/test/emqx_rule_engine_test_connector.erl create mode 100644 apps/emqx_rule_engine/test/emqx_rule_engine_test_connector_info.erl diff --git a/apps/emqx_bridge/src/emqx_action_info.erl b/apps/emqx_bridge/src/emqx_action_info.erl index 464b2e429..a8aaf9fdd 100644 --- a/apps/emqx_bridge/src/emqx_action_info.erl +++ b/apps/emqx_bridge/src/emqx_action_info.erl @@ -41,6 +41,9 @@ ]). -export([clean_cache/0]). +%% For tests +-export([hard_coded_test_action_info_modules/0]). + -callback bridge_v1_type_name() -> atom() | { @@ -128,8 +131,13 @@ hard_coded_action_info_modules_common() -> emqx_bridge_mqtt_pubsub_action_info ]. +%% This exists so that it can be mocked for test cases +hard_coded_test_action_info_modules() -> []. + hard_coded_action_info_modules() -> - hard_coded_action_info_modules_common() ++ hard_coded_action_info_modules_ee(). + hard_coded_action_info_modules_common() ++ + hard_coded_action_info_modules_ee() ++ + ?MODULE:hard_coded_test_action_info_modules(). %% ==================================================================== %% API diff --git a/apps/emqx_connector/src/emqx_connector_info.erl b/apps/emqx_connector/src/emqx_connector_info.erl index 766f34168..e87c2ad7e 100644 --- a/apps/emqx_connector/src/emqx_connector_info.erl +++ b/apps/emqx_connector/src/emqx_connector_info.erl @@ -31,6 +31,9 @@ -export([clean_cache/0]). +%% For tests +-export([hard_coded_test_connector_info_modules/0]). + %% The type name for the conncector -callback type_name() -> atom(). @@ -117,8 +120,13 @@ hard_coded_connector_info_modules_common() -> emqx_bridge_mqtt_pubsub_connector_info ]. +%% This exists so that it can be mocked for test cases +hard_coded_test_connector_info_modules() -> []. + hard_coded_connector_info_modules() -> - hard_coded_connector_info_modules_common() ++ hard_coded_connector_info_modules_ee(). + hard_coded_connector_info_modules_common() ++ + hard_coded_connector_info_modules_ee() ++ + ?MODULE:hard_coded_test_connector_info_modules(). %% -------------------------------------------------------------------- %% Atom macros to avoid typos diff --git a/apps/emqx_resource/src/emqx_resource_buffer_worker.erl b/apps/emqx_resource/src/emqx_resource_buffer_worker.erl index 6dfcde88c..24eb86d37 100644 --- a/apps/emqx_resource/src/emqx_resource_buffer_worker.erl +++ b/apps/emqx_resource/src/emqx_resource_buffer_worker.erl @@ -583,7 +583,11 @@ flush(Data0) -> {keep_state, Data1}; {_, false} -> ?tp(buffer_worker_flush_before_pop, #{}), - {Q1, QAckRef, Batch} = replayq:pop(Q0, #{count_limit => BatchSize}), + PopOpts = #{ + count_limit => BatchSize, + stop_before => {fun stop_before_mixed_stop_after_render/2, initial_state} + }, + {Q1, QAckRef, Batch} = replayq:pop(Q0, PopOpts), Data2 = Data1#{queue := Q1}, ?tp(buffer_worker_flush_before_sieve_expired, #{}), Now = now_(), @@ -619,6 +623,73 @@ flush(Data0) -> end end. +stop_before_mixed_stop_after_render( + ?QUERY( + _, + _, + _, + _, + #{stop_action_after_render := true} = _TraceCtx + ), + initial_state +) -> + stop_action_after_render; +stop_before_mixed_stop_after_render( + ?QUERY( + _, + _, + _, + _, + _TraceCtx + ), + initial_state +) -> + no_stop_action_after_render; +stop_before_mixed_stop_after_render( + ?QUERY( + _, + _, + _, + _, + #{stop_action_after_render := true} = _TraceCtx + ), + no_stop_action_after_render +) -> + true; +stop_before_mixed_stop_after_render( + ?QUERY( + _, + _, + _, + _, + #{stop_action_after_render := true} = _TraceCtx + ), + stop_action_after_render +) -> + stop_action_after_render; +stop_before_mixed_stop_after_render( + ?QUERY( + _, + _, + _, + _, + _TraceCtx + ), + stop_action_after_render +) -> + true; +stop_before_mixed_stop_after_render( + ?QUERY( + _, + _, + _, + _, + _TraceCtx + ), + State +) -> + State. + -spec do_flush(data(), #{ is_batch := boolean(), batch := [queue_query()], @@ -1119,25 +1190,13 @@ set_rule_id_trace_meta_data(Requests) when is_list(Requests) -> %% Get the rule ids from requests RuleIDs = lists:foldl(fun collect_rule_id/2, #{}, Requests), ClientIDs = lists:foldl(fun collect_client_id/2, #{}, Requests), - StopAfterRender = lists:foldl(fun collect_stop_after_render/2, no_info, Requests), StopAfterRenderVal = - case StopAfterRender of - only_true -> - logger:update_process_metadata(#{stop_action_after_render => false}), + case Requests of + %% We know that the batch is not mixed since we prevent this by + %% using a stop_after function in the replayq:pop call + [?QUERY(_, _, _, _, #{stop_action_after_render := true}) | _] -> true; - only_false -> - false; - mixed -> - ?TRACE( - warning, - "ACTION", - "mixed_stop_action_after_render_batch " - "(A batch will be sent to connector where some but " - "not all requests has stop_action_after_render set. " - "The batch will get assigned " - "stop_action_after_render = false)", - #{rule_ids => RuleIDs, client_ids => ClientIDs} - ), + [?QUERY(_, _, _, _, _TraceCTX) | _] -> false end, logger:update_process_metadata(#{ @@ -1158,21 +1217,6 @@ collect_client_id(?QUERY(_, _, _, _, #{clientid := ClientId}), Acc) -> collect_client_id(?QUERY(_, _, _, _, _), Acc) -> Acc. -collect_stop_after_render(?QUERY(_, _, _, _, #{stop_action_after_render := true}), no_info) -> - only_true; -collect_stop_after_render(?QUERY(_, _, _, _, #{stop_action_after_render := true}), only_true) -> - only_true; -collect_stop_after_render(?QUERY(_, _, _, _, #{stop_action_after_render := true}), only_false) -> - mixed; -collect_stop_after_render(?QUERY(_, _, _, _, _), no_info) -> - only_false; -collect_stop_after_render(?QUERY(_, _, _, _, _), only_true) -> - mixed; -collect_stop_after_render(?QUERY(_, _, _, _, _), only_false) -> - only_false; -collect_stop_after_render(?QUERY(_, _, _, _, _), mixed) -> - mixed. - unset_rule_id_trace_meta_data() -> logger:update_process_metadata(#{ rule_ids => #{}, client_ids => #{}, stop_action_after_render => false diff --git a/apps/emqx_rule_engine/test/emqx_rule_engine_api_rule_apply_SUITE.erl b/apps/emqx_rule_engine/test/emqx_rule_engine_api_rule_apply_SUITE.erl index 576806464..e73c2c44d 100644 --- a/apps/emqx_rule_engine/test/emqx_rule_engine_api_rule_apply_SUITE.erl +++ b/apps/emqx_rule_engine/test/emqx_rule_engine_api_rule_apply_SUITE.erl @@ -94,9 +94,6 @@ basic_apply_rule_test_helper(Config, TraceType, StopAfterRender) -> %% =================================== %% Create trace for RuleId %% =================================== - Now = erlang:system_time(second) - 10, - Start = Now, - End = Now + 60, TraceName = atom_to_binary(?FUNCTION_NAME), TraceValue = case TraceType of @@ -105,16 +102,7 @@ basic_apply_rule_test_helper(Config, TraceType, StopAfterRender) -> clientid -> ClientId end, - Trace = #{ - name => TraceName, - type => TraceType, - TraceType => TraceValue, - start_at => Start, - end_at => End - }, - emqx_trace_SUITE:reload(), - ok = emqx_trace:clear(), - {ok, _} = emqx_trace:create(Trace), + create_trace(TraceName, TraceType, TraceValue), %% =================================== Context = #{ clientid => ClientId, @@ -125,13 +113,12 @@ basic_apply_rule_test_helper(Config, TraceType, StopAfterRender) -> username => <<"u_emqx">> }, Params = #{ - % body => #{ <<"context">> => Context, <<"stop_action_after_template_rendering">> => StopAfterRender - % } }, emqx_trace:check(), ok = emqx_trace_handler_SUITE:filesync(TraceName, TraceType), + Now = erlang:system_time(second) - 10, {ok, _} = file:read_file(emqx_trace:log_file(TraceName, Now)), ?assertMatch({ok, _}, call_apply_rule_api(RuleId, Params)), ?retry( @@ -173,14 +160,175 @@ basic_apply_rule_test_helper(Config, TraceType, StopAfterRender) -> emqx_trace:delete(TraceName), ok. +create_trace(TraceName, TraceType, TraceValue) -> + Now = erlang:system_time(second) - 10, + Start = Now, + End = Now + 60, + Trace = #{ + name => TraceName, + type => TraceType, + TraceType => TraceValue, + start_at => Start, + end_at => End + }, + emqx_trace_SUITE:reload(), + ok = emqx_trace:clear(), + {ok, _} = emqx_trace:create(Trace). + +t_apply_rule_test_batch_separation_stop_after_render(_Config) -> + MeckOpts = [passthrough, no_link, no_history, non_strict], + catch meck:new(emqx_connector_info, MeckOpts), + meck:expect( + emqx_connector_info, + hard_coded_test_connector_info_modules, + 0, + [emqx_rule_engine_test_connector_info] + ), + emqx_connector_info:clean_cache(), + catch meck:new(emqx_action_info, MeckOpts), + meck:expect( + emqx_action_info, + hard_coded_test_action_info_modules, + 0, + [emqx_rule_engine_test_action_info] + ), + emqx_action_info:clean_cache(), + {ok, _} = emqx_connector:create(rule_engine_test, ?FUNCTION_NAME, #{}), + Name = atom_to_binary(?FUNCTION_NAME), + ActionConf = + #{ + <<"connector">> => Name, + <<"parameters">> => + #{ + <<"values">> => + #{ + <<"send_to_pid">> => emqx_utils:bin_to_hexstr( + term_to_binary(self()), upper + ) + } + }, + <<"resource_opts">> => #{ + <<"batch_size">> => 1000, + <<"batch_time">> => 500 + } + }, + {ok, _} = emqx_bridge_v2:create( + rule_engine_test, + ?FUNCTION_NAME, + ActionConf + ), + SQL = <<"SELECT payload.is_stop_after_render as stop_after_render FROM \"", Name/binary, "\"">>, + {ok, RuleID} = create_rule_with_action( + rule_engine_test, + ?FUNCTION_NAME, + SQL + ), + create_trace(Name, ruleid, RuleID), + emqx_trace:check(), + ok = emqx_trace_handler_SUITE:filesync(Name, ruleid), + Now = erlang:system_time(second) - 10, + %% Stop + ParmsStopAfterRender = apply_rule_parms(true, Name), + ParmsNoStopAfterRender = apply_rule_parms(false, Name), + %% Check that batching is working + Count = 400, + CountMsgFun = + fun + CountMsgFunRec(0 = _CurCount, GotBatchWithAtLeastTwo) -> + GotBatchWithAtLeastTwo; + CountMsgFunRec(CurCount, GotBatchWithAtLeastTwo) -> + receive + List -> + Len = length(List), + CountMsgFunRec(CurCount - Len, GotBatchWithAtLeastTwo orelse (Len > 1)) + end + end, + lists:foreach( + fun(_) -> + {ok, _} = call_apply_rule_api(RuleID, ParmsStopAfterRender) + end, + lists:seq(1, Count) + ), + %% We should get the messages and at least one batch with more than 1 + true = CountMsgFun(Count, false), + %% We should check that we don't get any mixed batch + CheckBatchesFun = + fun + CheckBatchesFunRec(0 = _CurCount) -> + ok; + CheckBatchesFunRec(CurCount) -> + receive + [{_, #{<<"stop_after_render">> := StopValue}} | _] = List -> + [ + ?assertMatch(#{<<"stop_after_render">> := StopValue}, Msg) + || {_, Msg} <- List + ], + Len = length(List), + CheckBatchesFunRec(CurCount - Len) + end + end, + lists:foreach( + fun(_) -> + case rand:normal() < 0 of + true -> + {ok, _} = call_apply_rule_api(RuleID, ParmsStopAfterRender); + false -> + {ok, _} = call_apply_rule_api(RuleID, ParmsNoStopAfterRender) + end + end, + lists:seq(1, Count) + ), + CheckBatchesFun(Count), + %% Just check that the log file is created as expected + ?retry( + _Interval0 = 200, + _NAttempts0 = 20, + begin + Bin = read_rule_trace_file(Name, ruleid, Now), + ?assertNotEqual(nomatch, binary:match(Bin, [<<"action_success">>])) + end + ), + ok. + +apply_rule_parms(StopAfterRender, Name) -> + Payload = #{<<"is_stop_after_render">> => StopAfterRender}, + Context = #{ + clientid => Name, + event_type => message_publish, + payload => emqx_utils_json:encode(Payload), + qos => 1, + topic => Name, + username => <<"u_emqx">> + }, + #{ + <<"context">> => Context, + <<"stop_action_after_template_rendering">> => StopAfterRender + }. + +create_rule_with_action(ActionType, ActionName, SQL) -> + BridgeId = emqx_bridge_resource:bridge_id(ActionType, ActionName), + Params = #{ + enable => true, + sql => SQL, + actions => [BridgeId] + }, + Path = emqx_mgmt_api_test_util:api_path(["rules"]), + AuthHeader = emqx_mgmt_api_test_util:auth_header_(), + ct:pal("rule action params: ~p", [Params]), + case emqx_mgmt_api_test_util:request_api(post, Path, "", AuthHeader, Params) of + {ok, Res0} -> + #{<<"id">> := RuleId} = emqx_utils_json:decode(Res0, [return_maps]), + {ok, RuleId}; + Error -> + Error + end. + %% Helper Functions call_apply_rule_api(RuleId, Params) -> Method = post, Path = emqx_mgmt_api_test_util:api_path(["rules", RuleId, "test"]), - ct:pal("sql test (http):\n ~p", [Params]), Res = request(Method, Path, Params), - ct:pal("sql test (http) result:\n ~p", [Res]), Res. request(Method, Path, Params) -> diff --git a/apps/emqx_rule_engine/test/emqx_rule_engine_test_action_info.erl b/apps/emqx_rule_engine/test/emqx_rule_engine_test_action_info.erl new file mode 100644 index 000000000..91bbcb442 --- /dev/null +++ b/apps/emqx_rule_engine/test/emqx_rule_engine_test_action_info.erl @@ -0,0 +1,101 @@ +%%-------------------------------------------------------------------- +%% Copyright (c) 2022-2024 EMQ Technologies Co., Ltd. All Rights Reserved. +%%-------------------------------------------------------------------- + +-module(emqx_rule_engine_test_action_info). + +-behaviour(emqx_action_info). + +-export([ + bridge_v1_type_name/0, + action_type_name/0, + connector_type_name/0, + schema_module/0 +]). + +-export([ + namespace/0, + roots/0, + fields/1, + desc/1 +]). + +-define(CONNECTOR_TYPE, rule_engine_test). +-define(ACTION_TYPE, ?CONNECTOR_TYPE). + +bridge_v1_type_name() -> ?ACTION_TYPE. + +action_type_name() -> ?ACTION_TYPE. + +connector_type_name() -> ?ACTION_TYPE. + +schema_module() -> emqx_rule_engine_test_action_info. + +%% ------------------------------------------------------------------------------------------------- +%% Hocon Schema Definitions + +namespace() -> "bridge_test_action_info". + +roots() -> []. + +fields(Field) when + Field == "get_connector"; + Field == "put_connector"; + Field == "post_connector" +-> + Fields = + fields(connector_fields) ++ + emqx_connector_schema:resource_opts_ref(?MODULE, connector_resource_opts), + emqx_connector_schema:api_fields(Field, ?CONNECTOR_TYPE, Fields); +fields(Field) when + Field == "get_bridge_v2"; + Field == "post_bridge_v2"; + Field == "put_bridge_v2" +-> + emqx_bridge_v2_schema:api_fields(Field, ?ACTION_TYPE, fields(rule_engine_test_action)); +fields(action) -> + {?ACTION_TYPE, + hoconsc:mk( + hoconsc:map(name, hoconsc:ref(?MODULE, rule_engine_test_action)), + #{ + desc => <<"Test Action Config">>, + required => false + } + )}; +fields(rule_engine_test_action) -> + emqx_bridge_v2_schema:make_producer_action_schema( + hoconsc:mk( + hoconsc:ref(?MODULE, action_parameters), + #{ + required => true, + desc => undefined + } + ) + ); +fields(action_parameters) -> + [ + {values, + hoconsc:mk( + typerefl:map(), + #{desc => undefined, default => #{}} + )} + ]; +fields("config_connector") -> + emqx_connector_schema:common_fields() ++ + fields(connector_fields) ++ + emqx_connector_schema:resource_opts_ref(?MODULE, connector_resource_opts); +fields(connector_resource_opts) -> + emqx_connector_schema:resource_opts_fields(); +fields("config") -> + emqx_resource_schema:fields("resource_opts") ++ + fields(connector_fields); +fields(connector_fields) -> + [ + {values, + hoconsc:mk( + typerefl:map(), + #{desc => undefined, default => #{}} + )} + ]. +desc(_) -> + undefined. diff --git a/apps/emqx_rule_engine/test/emqx_rule_engine_test_connector.erl b/apps/emqx_rule_engine/test/emqx_rule_engine_test_connector.erl new file mode 100644 index 000000000..5633ed27e --- /dev/null +++ b/apps/emqx_rule_engine/test/emqx_rule_engine_test_connector.erl @@ -0,0 +1,99 @@ +%%-------------------------------------------------------------------- +%% Copyright (c) 2020-2024 EMQ Technologies Co., Ltd. All Rights Reserved. +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. +%%-------------------------------------------------------------------- +-module(emqx_rule_engine_test_connector). + +-include_lib("emqx_connector/include/emqx_connector.hrl"). +-include_lib("typerefl/include/types.hrl"). +-include_lib("emqx/include/logger.hrl"). +-include_lib("hocon/include/hoconsc.hrl"). +-include_lib("snabbkaffe/include/snabbkaffe.hrl"). + +-behaviour(emqx_resource). + +%% callbacks of behaviour emqx_resource +-export([ + callback_mode/0, + on_start/2, + on_stop/2, + on_query/3, + on_batch_query/3, + on_get_status/2, + on_add_channel/4, + on_remove_channel/3, + on_get_channels/1, + on_get_channel_status/3 +]). + +%% =================================================================== +callback_mode() -> always_sync. + +on_start( + _InstId, + _Config +) -> + {ok, #{installed_channels => #{}}}. + +on_stop(_InstId, _State) -> + ok. + +on_add_channel( + _InstId, + #{ + installed_channels := InstalledChannels + } = OldState, + ChannelId, + ChannelConfig +) -> + NewInstalledChannels = maps:put(ChannelId, ChannelConfig, InstalledChannels), + NewState = OldState#{installed_channels => NewInstalledChannels}, + {ok, NewState}. + +on_remove_channel( + _InstId, + OldState, + _ChannelId +) -> + {ok, OldState}. + +on_get_channel_status( + _ResId, + _ChannelId, + _State +) -> + connected. + +on_get_channels(ResId) -> + emqx_bridge_v2:get_channels_for_connector(ResId). + +on_query( + _InstId, + _Query, + _State +) -> + ok. + +on_batch_query( + _InstId, + [{ChannelId, _Req} | _] = Msg, + #{installed_channels := Channels} = _State +) -> + #{parameters := #{values := #{send_to_pid := PidBin}}} = maps:get(ChannelId, Channels), + Pid = binary_to_term(emqx_utils:hexstr_to_bin(PidBin)), + Pid ! Msg, + ok. + +on_get_status(_InstId, _State) -> + connected. diff --git a/apps/emqx_rule_engine/test/emqx_rule_engine_test_connector_info.erl b/apps/emqx_rule_engine/test/emqx_rule_engine_test_connector_info.erl new file mode 100644 index 000000000..1c300bff8 --- /dev/null +++ b/apps/emqx_rule_engine/test/emqx_rule_engine_test_connector_info.erl @@ -0,0 +1,43 @@ +%%-------------------------------------------------------------------- +%% Copyright (c) 2022-2024 EMQ Technologies Co., Ltd. All Rights Reserved. +%%-------------------------------------------------------------------- + +-module(emqx_rule_engine_test_connector_info). + +-behaviour(emqx_connector_info). + +-export([ + type_name/0, + bridge_types/0, + resource_callback_module/0, + config_schema/0, + schema_module/0, + api_schema/1 +]). + +type_name() -> + rule_engine_test. + +bridge_types() -> + [rule_engine_test]. + +resource_callback_module() -> + emqx_rule_engine_test_connector. + +config_schema() -> + {rule_engine_test, + hoconsc:mk( + hoconsc:map(name, hoconsc:ref(emqx_rule_engine_test_action_info, "config_connector")), + #{ + desc => <<"Test Connector Config">>, + required => false + } + )}. + +schema_module() -> + emqx_rule_engine_test_action_info. + +api_schema(Method) -> + emqx_connector_schema:api_ref( + ?MODULE, <<"rule_engine_test">>, Method ++ "_connector" + ). diff --git a/mix.exs b/mix.exs index 486484f72..6ce5c4885 100644 --- a/mix.exs +++ b/mix.exs @@ -60,7 +60,7 @@ defmodule EMQXUmbrella.MixProject do {:grpc, github: "emqx/grpc-erl", tag: "0.6.12", override: true}, {:minirest, github: "emqx/minirest", tag: "1.4.0", override: true}, {:ecpool, github: "emqx/ecpool", tag: "0.5.7", override: true}, - {:replayq, github: "emqx/replayq", tag: "0.3.7", override: true}, + {:replayq, github: "emqx/replayq", tag: "0.3.8", override: true}, {:pbkdf2, github: "emqx/erlang-pbkdf2", tag: "2.0.4", override: true}, # maybe forbid to fetch quicer {:emqtt, diff --git a/rebar.config b/rebar.config index 8bbe178aa..a0a726fcf 100644 --- a/rebar.config +++ b/rebar.config @@ -88,7 +88,7 @@ {grpc, {git, "https://github.com/emqx/grpc-erl", {tag, "0.6.12"}}}, {minirest, {git, "https://github.com/emqx/minirest", {tag, "1.4.0"}}}, {ecpool, {git, "https://github.com/emqx/ecpool", {tag, "0.5.7"}}}, - {replayq, {git, "https://github.com/emqx/replayq.git", {tag, "0.3.7"}}}, + {replayq, {git, "https://github.com/emqx/replayq.git", {tag, "0.3.8"}}}, {pbkdf2, {git, "https://github.com/emqx/erlang-pbkdf2.git", {tag, "2.0.4"}}}, {emqtt, {git, "https://github.com/emqx/emqtt", {tag, "1.10.1"}}}, {rulesql, {git, "https://github.com/emqx/rulesql", {tag, "0.2.0"}}}, From 6cf29ba68883f425b8101b12b22fd213231bbd98 Mon Sep 17 00:00:00 2001 From: Kjell Winblad Date: Wed, 17 Apr 2024 18:20:17 +0200 Subject: [PATCH 176/234] fix: clean up traces to make them easier to parse and understand --- apps/emqx/src/emqx_trace/emqx_trace.erl | 5 ++-- .../src/emqx_rule_runtime.erl | 23 +++++++++++++++---- .../emqx_rule_engine_api_rule_apply_SUITE.erl | 12 ++++++---- .../test/emqx_rule_engine_test_connector.erl | 1 + 4 files changed, 30 insertions(+), 11 deletions(-) diff --git a/apps/emqx/src/emqx_trace/emqx_trace.erl b/apps/emqx/src/emqx_trace/emqx_trace.erl index 17580745a..4ae973722 100644 --- a/apps/emqx/src/emqx_trace/emqx_trace.erl +++ b/apps/emqx/src/emqx_trace/emqx_trace.erl @@ -96,13 +96,12 @@ rendered_action_template(ActionID, RenderResult) -> %% resource is called/modified StopMsg = lists:flatten( io_lib:format( - "action_stopped_after_render(~ts): " - "Action stopped after template render due to test setting.", + "Action ~ts stopped after template rendering due to test setting.", [ActionID] ) ), MsgBin = unicode:characters_to_binary(StopMsg), - error({unrecoverable_error, MsgBin}); + error({unrecoverable_error, {action_stopped_after_template_rendering, MsgBin}}); _ -> ok end, diff --git a/apps/emqx_rule_engine/src/emqx_rule_runtime.erl b/apps/emqx_rule_engine/src/emqx_rule_runtime.erl index 7181fb59b..3dfc5f6c8 100644 --- a/apps/emqx_rule_engine/src/emqx_rule_runtime.erl +++ b/apps/emqx_rule_engine/src/emqx_rule_runtime.erl @@ -709,27 +709,42 @@ inc_action_metrics(TraceCtx, Result) -> _ = do_inc_action_metrics(TraceCtx, Result), Result. +do_inc_action_metrics( + #{rule_id := RuleId, action_id := ActId} = TraceContext, + {error, {unrecoverable_error, {action_stopped_after_template_rendering, Explanation}} = _Reason} +) -> + TraceContext1 = maps:remove(action_id, TraceContext), + trace_action( + ActId, + "action_stopped_after_template_rendering", + maps:merge(#{reason => Explanation}, TraceContext1) + ), + emqx_metrics_worker:inc(rule_metrics, RuleId, 'actions.failed'), + emqx_metrics_worker:inc(rule_metrics, RuleId, 'actions.failed.unknown'); do_inc_action_metrics( #{rule_id := RuleId, action_id := ActId} = TraceContext, {error, {recoverable_error, _}} ) -> - trace_action(ActId, "out_of_service", TraceContext), + TraceContext1 = maps:remove(action_id, TraceContext), + trace_action(ActId, "out_of_service", TraceContext1), emqx_metrics_worker:inc(rule_metrics, RuleId, 'actions.failed.out_of_service'); do_inc_action_metrics( #{rule_id := RuleId, action_id := ActId} = TraceContext, {error, {unrecoverable_error, _} = Reason} ) -> - trace_action(ActId, "action_failed", maps:merge(#{reason => Reason}, TraceContext)), + TraceContext1 = maps:remove(action_id, TraceContext), + trace_action(ActId, "action_failed", maps:merge(#{reason => Reason}, TraceContext1)), emqx_metrics_worker:inc(rule_metrics, RuleId, 'actions.failed'), emqx_metrics_worker:inc(rule_metrics, RuleId, 'actions.failed.unknown'); do_inc_action_metrics(#{rule_id := RuleId, action_id := ActId} = TraceContext, R) -> + TraceContext1 = maps:remove(action_id, TraceContext), case is_ok_result(R) of false -> - trace_action(ActId, "action_failed", maps:merge(#{reason => R}, TraceContext)), + trace_action(ActId, "action_failed", maps:merge(#{reason => R}, TraceContext1)), emqx_metrics_worker:inc(rule_metrics, RuleId, 'actions.failed'), emqx_metrics_worker:inc(rule_metrics, RuleId, 'actions.failed.unknown'); true -> - trace_action(ActId, "action_success", maps:merge(#{result => R}, TraceContext)), + trace_action(ActId, "action_success", maps:merge(#{result => R}, TraceContext1)), emqx_metrics_worker:inc(rule_metrics, RuleId, 'actions.success') end. diff --git a/apps/emqx_rule_engine/test/emqx_rule_engine_api_rule_apply_SUITE.erl b/apps/emqx_rule_engine/test/emqx_rule_engine_api_rule_apply_SUITE.erl index e73c2c44d..063982367 100644 --- a/apps/emqx_rule_engine/test/emqx_rule_engine_api_rule_apply_SUITE.erl +++ b/apps/emqx_rule_engine/test/emqx_rule_engine_api_rule_apply_SUITE.erl @@ -143,7 +143,9 @@ basic_apply_rule_test_helper(Config, TraceType, StopAfterRender) -> begin Bin = read_rule_trace_file(TraceName, TraceType, Now), io:format("THELOG2:~n~s", [Bin]), - ?assertNotEqual(nomatch, binary:match(Bin, [<<"action_failed">>])) + ?assertNotEqual( + nomatch, binary:match(Bin, [<<"action_stopped_after_template_rendering">>]) + ) end ); false -> @@ -231,7 +233,7 @@ t_apply_rule_test_batch_separation_stop_after_render(_Config) -> ParmsStopAfterRender = apply_rule_parms(true, Name), ParmsNoStopAfterRender = apply_rule_parms(false, Name), %% Check that batching is working - Count = 400, + Count = 200, CountMsgFun = fun CountMsgFunRec(0 = _CurCount, GotBatchWithAtLeastTwo) -> @@ -285,7 +287,10 @@ t_apply_rule_test_batch_separation_stop_after_render(_Config) -> _NAttempts0 = 20, begin Bin = read_rule_trace_file(Name, ruleid, Now), - ?assertNotEqual(nomatch, binary:match(Bin, [<<"action_success">>])) + ?assertNotEqual(nomatch, binary:match(Bin, [<<"action_success">>])), + ?assertNotEqual( + nomatch, binary:match(Bin, [<<"action_stopped_after_template_rendering">>]) + ) end ), ok. @@ -364,5 +369,4 @@ read_rule_trace_file(TraceName, TraceType, From) -> emqx_trace:check(), ok = emqx_trace_handler_SUITE:filesync(TraceName, TraceType), {ok, Bin} = file:read_file(emqx_trace:log_file(TraceName, From)), - io_lib:format("MYTRACE:~n~s", [Bin]), Bin. diff --git a/apps/emqx_rule_engine/test/emqx_rule_engine_test_connector.erl b/apps/emqx_rule_engine/test/emqx_rule_engine_test_connector.erl index 5633ed27e..c22c5fbd5 100644 --- a/apps/emqx_rule_engine/test/emqx_rule_engine_test_connector.erl +++ b/apps/emqx_rule_engine/test/emqx_rule_engine_test_connector.erl @@ -93,6 +93,7 @@ on_batch_query( #{parameters := #{values := #{send_to_pid := PidBin}}} = maps:get(ChannelId, Channels), Pid = binary_to_term(emqx_utils:hexstr_to_bin(PidBin)), Pid ! Msg, + emqx_trace:rendered_action_template(ChannelId, #{nothing_to_render => ok}), ok. on_get_status(_InstId, _State) -> From 95891db29ab75529fd33b721587a8cab98395963 Mon Sep 17 00:00:00 2001 From: Kjell Winblad Date: Wed, 17 Apr 2024 18:43:19 +0200 Subject: [PATCH 177/234] test: clean up created resources in the end of test case --- .../test/emqx_rule_engine_api_rule_apply_SUITE.erl | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/apps/emqx_rule_engine/test/emqx_rule_engine_api_rule_apply_SUITE.erl b/apps/emqx_rule_engine/test/emqx_rule_engine_api_rule_apply_SUITE.erl index 063982367..c875617ce 100644 --- a/apps/emqx_rule_engine/test/emqx_rule_engine_api_rule_apply_SUITE.erl +++ b/apps/emqx_rule_engine/test/emqx_rule_engine_api_rule_apply_SUITE.erl @@ -293,6 +293,12 @@ t_apply_rule_test_batch_separation_stop_after_render(_Config) -> ) end ), + %% Cleanup + ok = emqx_trace:delete(Name), + ok = emqx_rule_engine:delete_rule(RuleID), + ok = emqx_bridge_v2:remove(rule_engine_test, ?FUNCTION_NAME), + ok = emqx_connector:remove(rule_engine_test, ?FUNCTION_NAME), + [_, _] = meck:unload(), ok. apply_rule_parms(StopAfterRender, Name) -> From 5c014f4c299d2c34fd5b5dbd66565f78e93319a0 Mon Sep 17 00:00:00 2001 From: zmstone Date: Wed, 17 Apr 2024 15:04:03 +0200 Subject: [PATCH 178/234] test: fix test cases --- apps/emqx_bridge_kafka/src/emqx_bridge_kafka.erl | 4 ++-- apps/emqx_dashboard/test/emqx_swagger_requestBody_SUITE.erl | 2 +- apps/emqx_dashboard/test/emqx_swagger_response_SUITE.erl | 4 ++-- apps/emqx_s3/test/emqx_s3_schema_SUITE.erl | 4 ++-- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/apps/emqx_bridge_kafka/src/emqx_bridge_kafka.erl b/apps/emqx_bridge_kafka/src/emqx_bridge_kafka.erl index b0b0c3a03..cf96ce6cb 100644 --- a/apps/emqx_bridge_kafka/src/emqx_bridge_kafka.erl +++ b/apps/emqx_bridge_kafka/src/emqx_bridge_kafka.erl @@ -756,8 +756,8 @@ producer_strategy_key_validator( producer_strategy_key_validator(emqx_utils_maps:binary_key_map(Conf)); producer_strategy_key_validator(#{ <<"partition_strategy">> := key_dispatch, - <<"message">> := #{<<"key">> := ""} -}) -> + <<"message">> := #{<<"key">> := Key} +}) when Key =:= "" orelse Key =:= <<>> -> {error, "Message key cannot be empty when `key_dispatch` strategy is used"}; producer_strategy_key_validator(_) -> ok. diff --git a/apps/emqx_dashboard/test/emqx_swagger_requestBody_SUITE.erl b/apps/emqx_dashboard/test/emqx_swagger_requestBody_SUITE.erl index 917c24ffa..0f2448480 100644 --- a/apps/emqx_dashboard/test/emqx_swagger_requestBody_SUITE.erl +++ b/apps/emqx_dashboard/test/emqx_swagger_requestBody_SUITE.erl @@ -359,7 +359,7 @@ t_bad_ref(_Config) -> Refs = [{?MODULE, bad_ref}], Fields = fields(bad_ref), ?assertThrow( - {error, #{msg := <<"Object only supports not empty proplists">>, args := Fields}}, + {error, #{msg := <<"Object only supports non-empty fields list">>, args := Fields}}, validate(Path, Spec, Refs) ), ok. diff --git a/apps/emqx_dashboard/test/emqx_swagger_response_SUITE.erl b/apps/emqx_dashboard/test/emqx_swagger_response_SUITE.erl index 6fa3dbd3d..e9397f643 100644 --- a/apps/emqx_dashboard/test/emqx_swagger_response_SUITE.erl +++ b/apps/emqx_dashboard/test/emqx_swagger_response_SUITE.erl @@ -189,7 +189,7 @@ t_nest_object(_Config) -> t_empty(_Config) -> ?assertThrow( {error, #{ - msg := <<"Object only supports not empty proplists">>, + msg := <<"Object only supports non-empty fields list">>, args := [], module := ?MODULE }}, @@ -273,7 +273,7 @@ t_bad_ref(_Config) -> ?assertThrow( {error, #{ module := ?MODULE, - msg := <<"Object only supports not empty proplists">> + msg := <<"Object only supports non-empty fields list">> }}, validate(Path, Object, ExpectRefs) ), diff --git a/apps/emqx_s3/test/emqx_s3_schema_SUITE.erl b/apps/emqx_s3/test/emqx_s3_schema_SUITE.erl index ec02aeac4..170f8a065 100644 --- a/apps/emqx_s3/test/emqx_s3_schema_SUITE.erl +++ b/apps/emqx_s3/test/emqx_s3_schema_SUITE.erl @@ -20,7 +20,7 @@ all() -> t_minimal_config(_Config) -> ?assertMatch( #{ - bucket := "bucket", + bucket := <<"bucket">>, host := "s3.us-east-1.endpoint.com", port := 443, min_part_size := 5242880, @@ -45,7 +45,7 @@ t_full_config(_Config) -> #{ access_key_id := "access_key_id", acl := public_read, - bucket := "bucket", + bucket := <<"bucket">>, host := "s3.us-east-1.endpoint.com", min_part_size := 10485760, port := 443, From 5a4bfff9e55486d36d911a6c5747aaebb9dbbbb6 Mon Sep 17 00:00:00 2001 From: zmstone Date: Wed, 17 Apr 2024 15:10:26 +0200 Subject: [PATCH 179/234] refactor: add template_str type --- apps/emqx/src/emqx_schema.erl | 11 +++++++++-- apps/emqx_conf/src/emqx_conf_schema_types.erl | 6 ++++++ apps/emqx_s3/src/emqx_s3_client.erl | 3 +-- apps/emqx_s3/src/emqx_s3_schema.erl | 4 ++-- apps/emqx_s3/test/emqx_s3_schema_SUITE.erl | 4 ++-- 5 files changed, 20 insertions(+), 8 deletions(-) diff --git a/apps/emqx/src/emqx_schema.erl b/apps/emqx/src/emqx_schema.erl index 57ad00e99..02e31387e 100644 --- a/apps/emqx/src/emqx_schema.erl +++ b/apps/emqx/src/emqx_schema.erl @@ -62,6 +62,7 @@ -type url() :: binary(). -type json_binary() :: binary(). -type template() :: binary(). +-type template_str() :: string(). -typerefl_from_string({duration/0, emqx_schema, to_duration}). -typerefl_from_string({duration_s/0, emqx_schema, to_duration_s}). @@ -80,6 +81,7 @@ -typerefl_from_string({url/0, emqx_schema, to_url}). -typerefl_from_string({json_binary/0, emqx_schema, to_json_binary}). -typerefl_from_string({template/0, emqx_schema, to_template}). +-typerefl_from_string({template_str/0, emqx_schema, to_template_str}). -type parsed_server() :: #{ hostname := string(), @@ -123,7 +125,8 @@ to_comma_separated_atoms/1, to_url/1, to_json_binary/1, - to_template/1 + to_template/1, + to_template_str/1 ]). -export([ @@ -164,7 +167,8 @@ url/0, json_binary/0, port_number/0, - template/0 + template/0, + template_str/0 ]). -export([namespace/0, roots/0, roots/1, fields/1, desc/1, tags/0]). @@ -2601,6 +2605,9 @@ to_json_binary(Str) -> to_template(Str) -> {ok, iolist_to_binary(Str)}. +to_template_str(Str) -> + {ok, unicode:characters_to_list(Str, utf8)}. + %% @doc support the following format: %% - 127.0.0.1:1883 %% - ::1:1883 diff --git a/apps/emqx_conf/src/emqx_conf_schema_types.erl b/apps/emqx_conf/src/emqx_conf_schema_types.erl index dbfbe74bc..bcc9c1469 100644 --- a/apps/emqx_conf/src/emqx_conf_schema_types.erl +++ b/apps/emqx_conf/src/emqx_conf_schema_types.erl @@ -76,6 +76,12 @@ readable("template()") -> dashboard => #{type => string, is_template => true}, docgen => #{type => "String", desc => ?DESC(template)} }; +readable("template_str()") -> + #{ + swagger => #{type => string}, + dashboard => #{type => string, is_template => true}, + docgen => #{type => "String", desc => ?DESC(template)} + }; readable("binary()") -> #{ swagger => #{type => string}, diff --git a/apps/emqx_s3/src/emqx_s3_client.erl b/apps/emqx_s3/src/emqx_s3_client.erl index b7bd85833..a415cf8d4 100644 --- a/apps/emqx_s3/src/emqx_s3_client.erl +++ b/apps/emqx_s3/src/emqx_s3_client.erl @@ -103,7 +103,7 @@ put_object(Client, Key, Value) -> -spec put_object(client(), key(), upload_options(), iodata()) -> ok_or_error(term()). put_object( - #{bucket := Bucket0, headers := BaseHeaders, aws_config := AwsConfig = #aws_config{}}, + #{bucket := Bucket, headers := BaseHeaders, aws_config := AwsConfig = #aws_config{}}, Key, UploadOpts, Content @@ -111,7 +111,6 @@ put_object( ECKey = erlcloud_key(Key), ECOpts = erlcloud_upload_options(UploadOpts), Headers = join_headers(BaseHeaders, maps:get(headers, UploadOpts, undefined)), - Bucket = to_list_string(Bucket0), try erlcloud_s3:put_object(Bucket, ECKey, Content, ECOpts, Headers, AwsConfig) of Props when is_list(Props) -> ok diff --git a/apps/emqx_s3/src/emqx_s3_schema.erl b/apps/emqx_s3/src/emqx_s3_schema.erl index de5e4f53e..1199948d0 100644 --- a/apps/emqx_s3/src/emqx_s3_schema.erl +++ b/apps/emqx_s3/src/emqx_s3_schema.erl @@ -74,7 +74,7 @@ fields(s3_upload) -> [ {bucket, mk( - emqx_schema:template(), + emqx_schema:template_str(), #{ desc => ?DESC("bucket"), required => true @@ -82,7 +82,7 @@ fields(s3_upload) -> )}, {key, mk( - emqx_schema:template(), + emqx_schema:template_str(), #{ desc => ?DESC("key"), required => true diff --git a/apps/emqx_s3/test/emqx_s3_schema_SUITE.erl b/apps/emqx_s3/test/emqx_s3_schema_SUITE.erl index 170f8a065..ec02aeac4 100644 --- a/apps/emqx_s3/test/emqx_s3_schema_SUITE.erl +++ b/apps/emqx_s3/test/emqx_s3_schema_SUITE.erl @@ -20,7 +20,7 @@ all() -> t_minimal_config(_Config) -> ?assertMatch( #{ - bucket := <<"bucket">>, + bucket := "bucket", host := "s3.us-east-1.endpoint.com", port := 443, min_part_size := 5242880, @@ -45,7 +45,7 @@ t_full_config(_Config) -> #{ access_key_id := "access_key_id", acl := public_read, - bucket := <<"bucket">>, + bucket := "bucket", host := "s3.us-east-1.endpoint.com", min_part_size := 10485760, port := 443, From c96ae8dd2375666a63aebc3431efc8d092347bed Mon Sep 17 00:00:00 2001 From: zmstone Date: Wed, 17 Apr 2024 16:11:26 +0200 Subject: [PATCH 180/234] fix: return 503 if bridge bpapi call timeout --- apps/emqx_bridge/src/emqx_bridge_v2_api.erl | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/apps/emqx_bridge/src/emqx_bridge_v2_api.erl b/apps/emqx_bridge/src/emqx_bridge_v2_api.erl index a7bef1952..e33e1ca07 100644 --- a/apps/emqx_bridge/src/emqx_bridge_v2_api.erl +++ b/apps/emqx_bridge/src/emqx_bridge_v2_api.erl @@ -1007,7 +1007,13 @@ call_operation(NodeOrAll, OperFunc, Args = [_Nodes, _ConfRootKey, BridgeType, Br {error, not_implemented} -> ?NOT_IMPLEMENTED; {error, timeout} -> - ?BAD_REQUEST(<<"Request timeout">>); + BridgeId = emqx_bridge_resource:bridge_id(BridgeType, BridgeName), + ?SLOG(warning, #{ + msg => "bridge_bpapi_call_timeout", + bridge => BridgeId, + call => OperFunc + }), + ?SERVICE_UNAVAILABLE(<<"Request timeout">>); {error, {start_pool_failed, Name, Reason}} -> Msg = bin( io_lib:format("Failed to start ~p pool for reason ~p", [Name, redact(Reason)]) @@ -1018,9 +1024,8 @@ call_operation(NodeOrAll, OperFunc, Args = [_Nodes, _ConfRootKey, BridgeType, Br ?SLOG(warning, #{ msg => "bridge_inconsistent_in_cluster_for_call_operation", reason => not_found, - type => BridgeType, - name => BridgeName, - bridge => BridgeId + bridge => BridgeId, + call => OperFunc }), ?SERVICE_UNAVAILABLE(<<"Bridge not found on remote node: ", BridgeId/binary>>); {error, {node_not_found, Node}} -> From 6ab2b004ed24c2c583336df14140a1d4b4ea5b80 Mon Sep 17 00:00:00 2001 From: zmstone Date: Wed, 17 Apr 2024 16:40:25 +0200 Subject: [PATCH 181/234] fix(resource_manager): update cache after channel add --- apps/emqx_resource/src/emqx_resource_manager.erl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/apps/emqx_resource/src/emqx_resource_manager.erl b/apps/emqx_resource/src/emqx_resource_manager.erl index fcdf56202..6d9ad50e4 100644 --- a/apps/emqx_resource/src/emqx_resource_manager.erl +++ b/apps/emqx_resource/src/emqx_resource_manager.erl @@ -1058,7 +1058,8 @@ channels_health_check(?status_connected = _ConnectorStatus, Data0) -> get_config_for_channels(Data0, ChannelsNotAdded), Data1 = add_channels_in_list(ChannelsNotAddedWithConfigs, Data0), %% Now that we have done the adding, we can get the status of all channels - trigger_health_check_for_added_channels(Data1); + Data2 = trigger_health_check_for_added_channels(Data1), + update_state(Data2, Data0); channels_health_check(?status_connecting = _ConnectorStatus, Data0) -> %% Whenever the resource is connecting: %% 1. Change the status of all added channels to connecting From 55941000c02dab8168608e6ddad571e9f470d9e1 Mon Sep 17 00:00:00 2001 From: zmstone Date: Wed, 17 Apr 2024 20:42:03 +0200 Subject: [PATCH 182/234] test: make test case more stable, less flaky --- .../test/emqx_bridge_pulsar_connector_SUITE.erl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/apps/emqx_bridge_pulsar/test/emqx_bridge_pulsar_connector_SUITE.erl b/apps/emqx_bridge_pulsar/test/emqx_bridge_pulsar_connector_SUITE.erl index b3c351da0..cd54e2194 100644 --- a/apps/emqx_bridge_pulsar/test/emqx_bridge_pulsar_connector_SUITE.erl +++ b/apps/emqx_bridge_pulsar/test/emqx_bridge_pulsar_connector_SUITE.erl @@ -1235,7 +1235,7 @@ t_resilience(Config) -> after 1_000 -> ct:fail("producer didn't stop!") end, Consumed = lists:flatmap( - fun(_) -> receive_consumed(5_000) end, lists:seq(1, NumProduced) + fun(_) -> receive_consumed(10_000) end, lists:seq(1, NumProduced) ), ?assertEqual(NumProduced, length(Consumed)), ExpectedPayloads = lists:map(fun integer_to_binary/1, lists:seq(1, NumProduced)), From b16b9d8fcc3a4a3ca36f0c30f341c88995d8470c Mon Sep 17 00:00:00 2001 From: Kjell Winblad Date: Thu, 18 Apr 2024 06:51:30 +0200 Subject: [PATCH 183/234] refactor: made code more readable thanks to suggestion from @zmstone --- .../src/emqx_resource_buffer_worker.erl | 82 ++++--------------- 1 file changed, 16 insertions(+), 66 deletions(-) diff --git a/apps/emqx_resource/src/emqx_resource_buffer_worker.erl b/apps/emqx_resource/src/emqx_resource_buffer_worker.erl index 24eb86d37..bc1aea734 100644 --- a/apps/emqx_resource/src/emqx_resource_buffer_worker.erl +++ b/apps/emqx_resource/src/emqx_resource_buffer_worker.erl @@ -585,7 +585,7 @@ flush(Data0) -> ?tp(buffer_worker_flush_before_pop, #{}), PopOpts = #{ count_limit => BatchSize, - stop_before => {fun stop_before_mixed_stop_after_render/2, initial_state} + stop_before => {fun stop_batching/2, initial_state} }, {Q1, QAckRef, Batch} = replayq:pop(Q0, PopOpts), Data2 = Data1#{queue := Q1}, @@ -623,72 +623,22 @@ flush(Data0) -> end end. -stop_before_mixed_stop_after_render( - ?QUERY( - _, - _, - _, - _, - #{stop_action_after_render := true} = _TraceCtx - ), - initial_state -) -> +stop_batching(Query, initial_state) -> + get_stop_flag(Query); +stop_batching(Query, PrevStopFlag) -> + case get_stop_flag(Query) =:= PrevStopFlag of + true -> + PrevStopFlag; + false -> + %% We stop beceause we don't want a batch with mixed values for the + %% stop_action_after_render option + true + end. + +get_stop_flag(?QUERY(_, _, _, _, #{stop_action_after_render := true})) -> stop_action_after_render; -stop_before_mixed_stop_after_render( - ?QUERY( - _, - _, - _, - _, - _TraceCtx - ), - initial_state -) -> - no_stop_action_after_render; -stop_before_mixed_stop_after_render( - ?QUERY( - _, - _, - _, - _, - #{stop_action_after_render := true} = _TraceCtx - ), - no_stop_action_after_render -) -> - true; -stop_before_mixed_stop_after_render( - ?QUERY( - _, - _, - _, - _, - #{stop_action_after_render := true} = _TraceCtx - ), - stop_action_after_render -) -> - stop_action_after_render; -stop_before_mixed_stop_after_render( - ?QUERY( - _, - _, - _, - _, - _TraceCtx - ), - stop_action_after_render -) -> - true; -stop_before_mixed_stop_after_render( - ?QUERY( - _, - _, - _, - _, - _TraceCtx - ), - State -) -> - State. +get_stop_flag(_) -> + no_stop_action_after_render. -spec do_flush(data(), #{ is_batch := boolean(), From 75632bb2cd849931e6774a0b04b87987c4ed2616 Mon Sep 17 00:00:00 2001 From: JianBo He Date: Thu, 18 Apr 2024 13:58:31 +0800 Subject: [PATCH 184/234] chore(gw): update listener examples --- apps/emqx_gateway/src/emqx_gateway.app.src | 2 +- .../src/emqx_gateway_api_listeners.erl | 108 ++++++++++++++++-- 2 files changed, 100 insertions(+), 10 deletions(-) diff --git a/apps/emqx_gateway/src/emqx_gateway.app.src b/apps/emqx_gateway/src/emqx_gateway.app.src index 731a1807c..3c6634edc 100644 --- a/apps/emqx_gateway/src/emqx_gateway.app.src +++ b/apps/emqx_gateway/src/emqx_gateway.app.src @@ -1,7 +1,7 @@ %% -*- mode: erlang -*- {application, emqx_gateway, [ {description, "The Gateway management application"}, - {vsn, "0.1.31"}, + {vsn, "0.1.32"}, {registered, []}, {mod, {emqx_gateway_app, []}}, {applications, [kernel, stdlib, emqx, emqx_auth, emqx_ctl]}, diff --git a/apps/emqx_gateway/src/emqx_gateway_api_listeners.erl b/apps/emqx_gateway/src/emqx_gateway_api_listeners.erl index 22d76fe60..17fbf1e31 100644 --- a/apps/emqx_gateway/src/emqx_gateway_api_listeners.erl +++ b/apps/emqx_gateway/src/emqx_gateway_api_listeners.erl @@ -762,9 +762,9 @@ examples_listener() -> <<"tlsv1.1">>, <<"tlsv1">> ], - cacertfile => <<"/etc/emqx/certs/cacert.pem">>, - certfile => <<"/etc/emqx/certs/cert.pem">>, - keyfile => <<"/etc/emqx/certs/key.pem">>, + cacertfile => <<"etc/certs/cacert.pem">>, + certfile => <<"etc/certs/cert.pem">>, + keyfile => <<"etc/certs/key.pem">>, verify => <<"verify_none">>, fail_if_no_peer_cert => false }, @@ -808,9 +808,9 @@ examples_listener() -> dtls_options => #{ versions => [<<"dtlsv1.2">>, <<"dtlsv1">>], - cacertfile => <<"/etc/emqx/certs/cacert.pem">>, - certfile => <<"/etc/emqx/certs/cert.pem">>, - keyfile => <<"/etc/emqx/certs/key.pem">>, + cacertfile => <<"etc/certs/cacert.pem">>, + certfile => <<"etc/certs/cert.pem">>, + keyfile => <<"etc/certs/key.pem">>, verify => <<"verify_none">>, fail_if_no_peer_cert => false }, @@ -835,9 +835,9 @@ examples_listener() -> dtls_options => #{ versions => [<<"dtlsv1.2">>, <<"dtlsv1">>], - cacertfile => <<"/etc/emqx/certs/cacert.pem">>, - certfile => <<"/etc/emqx/certs/cert.pem">>, - keyfile => <<"/etc/emqx/certs/key.pem">>, + cacertfile => <<"etc/certs/cacert.pem">>, + certfile => <<"etc/certs/cert.pem">>, + keyfile => <<"etc/certs/key.pem">>, verify => <<"verify_none">>, user_lookup_fun => <<"emqx_tls_psk:lookup">>, ciphers => @@ -869,5 +869,95 @@ examples_listener() -> user_id_type => <<"username">> } } + }, + ws_listener => + #{ + summary => <<"A simple WebSocket listener example">>, + value => + #{ + name => <<"ws-def">>, + type => <<"ws">>, + bind => <<"33043">>, + acceptors => 16, + max_connections => 1024000, + max_conn_rate => 1000, + websocket => + #{ + path => <<"/ocpp">>, + fail_if_no_subprotocol => true, + supported_subprotocols => <<"ocpp1.6">>, + check_origin_enable => false, + check_origins => + <<"http://localhost:18083, http://127.0.0.1:18083">>, + compress => false, + piggyback => <<"single">> + }, + tcp_options => + #{ + active_n => 100, + backlog => 1024, + send_timeout => <<"15s">>, + send_timeout_close => true, + recbuf => <<"10KB">>, + sndbuf => <<"10KB">>, + buffer => <<"10KB">>, + high_watermark => <<"1MB">>, + nodelay => false, + reuseaddr => true, + keepalive => "none" + } + } + }, + wss_listener => + #{ + summary => <<"A simple WebSocket/TLS listener example">>, + value => + #{ + name => <<"ws-ssl-def">>, + type => <<"wss">>, + bind => <<"33053">>, + acceptors => 16, + max_connections => 1024000, + max_conn_rate => 1000, + websocket => + #{ + path => <<"/ocpp">>, + fail_if_no_subprotocol => true, + supported_subprotocols => <<"ocpp1.6">>, + check_origin_enable => false, + check_origins => + <<"http://localhost:18083, http://127.0.0.1:18083">>, + compress => false, + piggyback => <<"single">> + }, + ssl_options => + #{ + versions => [ + <<"tlsv1.3">>, + <<"tlsv1.2">>, + <<"tlsv1.1">>, + <<"tlsv1">> + ], + cacertfile => <<"etc/certs/cacert.pem">>, + certfile => <<"etc/certs/cert.pem">>, + keyfile => <<"etc/certs/key.pem">>, + verify => <<"verify_none">>, + fail_if_no_peer_cert => false + }, + tcp_options => + #{ + active_n => 100, + backlog => 1024, + send_timeout => <<"15s">>, + send_timeout_close => true, + recbuf => <<"10KB">>, + sndbuf => <<"10KB">>, + buffer => <<"10KB">>, + high_watermark => <<"1MB">>, + nodelay => false, + reuseaddr => true, + keepalive => "none" + } + } } }. From e93bd314bc9526810431d1cc89a51ea6031283c4 Mon Sep 17 00:00:00 2001 From: Ivan Dyachkov Date: Thu, 18 Apr 2024 08:03:56 +0200 Subject: [PATCH 185/234] chore: 5.6.1 --- apps/emqx/include/emqx_release.hrl | 4 ++-- deploy/charts/emqx-enterprise/Chart.yaml | 4 ++-- deploy/charts/emqx/Chart.yaml | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/apps/emqx/include/emqx_release.hrl b/apps/emqx/include/emqx_release.hrl index 9d4437a3f..270acf07a 100644 --- a/apps/emqx/include/emqx_release.hrl +++ b/apps/emqx/include/emqx_release.hrl @@ -32,7 +32,7 @@ %% `apps/emqx/src/bpapi/README.md' %% Opensource edition --define(EMQX_RELEASE_CE, "5.6.1-rc.2"). +-define(EMQX_RELEASE_CE, "5.6.1"). %% Enterprise edition --define(EMQX_RELEASE_EE, "5.6.1-rc.2"). +-define(EMQX_RELEASE_EE, "5.6.1"). diff --git a/deploy/charts/emqx-enterprise/Chart.yaml b/deploy/charts/emqx-enterprise/Chart.yaml index 1ee736e4b..3178cce62 100644 --- a/deploy/charts/emqx-enterprise/Chart.yaml +++ b/deploy/charts/emqx-enterprise/Chart.yaml @@ -14,8 +14,8 @@ type: application # This is the chart version. This version number should be incremented each time you make changes # to the chart and its templates, including the app version. -version: 5.6.1-rc.2 +version: 5.6.1 # This is the version number of the application being deployed. This version number should be # incremented each time you make changes to the application. -appVersion: 5.6.1-rc.2 +appVersion: 5.6.1 diff --git a/deploy/charts/emqx/Chart.yaml b/deploy/charts/emqx/Chart.yaml index b2a755e72..e4c15c7f7 100644 --- a/deploy/charts/emqx/Chart.yaml +++ b/deploy/charts/emqx/Chart.yaml @@ -14,8 +14,8 @@ type: application # This is the chart version. This version number should be incremented each time you make changes # to the chart and its templates, including the app version. -version: 5.6.1-rc.2 +version: 5.6.1 # This is the version number of the application being deployed. This version number should be # incremented each time you make changes to the application. -appVersion: 5.6.1-rc.2 +appVersion: 5.6.1 From d7ebecddb4a225178c32e9491d11230e06ef240b Mon Sep 17 00:00:00 2001 From: JianBo He Date: Thu, 18 Apr 2024 14:54:12 +0800 Subject: [PATCH 186/234] fix(ocpp): return correct current_connections number of listenrs http api --- .../src/emqx_gateway_api_listeners.erl | 39 ++++++++++----- .../test/emqx_ocpp_SUITE.erl | 50 +++++++++++++++++++ changes/ce/fix-12892.md | 1 - changes/ee/fix-12892.md | 3 ++ 4 files changed, 79 insertions(+), 14 deletions(-) delete mode 100644 changes/ce/fix-12892.md create mode 100644 changes/ee/fix-12892.md diff --git a/apps/emqx_gateway/src/emqx_gateway_api_listeners.erl b/apps/emqx_gateway/src/emqx_gateway_api_listeners.erl index 17fbf1e31..aee04969b 100644 --- a/apps/emqx_gateway/src/emqx_gateway_api_listeners.erl +++ b/apps/emqx_gateway/src/emqx_gateway_api_listeners.erl @@ -247,9 +247,10 @@ page_params(Qs) -> get_cluster_listeners_info(GwName) -> Listeners = emqx_gateway_conf:listeners(GwName), ListenOns = lists:map( - fun(#{id := Id} = Conf) -> + fun(#{id := Id, type := Type0} = Conf) -> + Type = binary_to_existing_atom(Type0), ListenOn = emqx_gateway_conf:get_bind(Conf), - {Id, ListenOn} + {Type, Id, ListenOn} end, Listeners ), @@ -293,17 +294,11 @@ listeners_cluster_status(Listeners) -> do_listeners_cluster_status(Listeners) -> Node = node(), lists:foldl( - fun({Id, ListenOn}, Acc) -> - BinId = erlang:atom_to_binary(Id), - {ok, #{<<"max_connections">> := Max}} = emqx_gateway_conf:listener(BinId), - {Running, Curr} = - try esockd:get_current_connections({Id, ListenOn}) of - Int -> {true, Int} - catch - %% not started - error:not_found -> - {false, 0} - end, + fun({Type, Id, ListenOn}, Acc) -> + {Running, Curr} = current_listener_status(Type, Id, ListenOn), + {ok, #{<<"max_connections">> := Max}} = emqx_gateway_conf:listener( + erlang:atom_to_binary(Id) + ), Acc#{ Id => #{ node => Node, @@ -319,6 +314,24 @@ do_listeners_cluster_status(Listeners) -> Listeners ). +current_listener_status(Type, Id, _ListenOn) when Type =:= ws; Type =:= wss -> + Info = ranch:info(Id), + Conns = proplists:get_value(all_connections, Info, 0), + Running = + case proplists:get_value(status, Info) of + running -> true; + _ -> false + end, + {Running, Conns}; +current_listener_status(_Type, Id, ListenOn) -> + try esockd:get_current_connections({Id, ListenOn}) of + Int -> {true, Int} + catch + %% not started + error:not_found -> + {false, 0} + end. + ensure_integer_or_infinity(infinity) -> infinity; ensure_integer_or_infinity(<<"infinity">>) -> diff --git a/apps/emqx_gateway_ocpp/test/emqx_ocpp_SUITE.erl b/apps/emqx_gateway_ocpp/test/emqx_ocpp_SUITE.erl index b72eb9e1d..e63f8891d 100644 --- a/apps/emqx_gateway_ocpp/test/emqx_ocpp_SUITE.erl +++ b/apps/emqx_gateway_ocpp/test/emqx_ocpp_SUITE.erl @@ -181,8 +181,55 @@ t_adjust_keepalive_timer(_Config) -> ?assertMatch( #{conninfo := #{keepalive := 300}}, emqx_gateway_cm:get_chan_info(ocpp, <<"client1">>) ), + %% close conns + close(ClientPid), + timer:sleep(1000), + %% assert: + ?assertEqual(undefined, emqx_gateway_cm:get_chan_info(ocpp, <<"client1">>)), ok. +t_listeners_status(_Config) -> + {200, [Listener]} = request(get, "/gateways/ocpp/listeners"), + ?assertMatch( + #{ + status := #{running := true, current_connections := 0} + }, + Listener + ), + %% add a connection + {ok, ClientPid} = connect("127.0.0.1", 33033, <<"client1">>), + UniqueId = <<"3335862321">>, + BootNotification = #{ + id => UniqueId, + type => ?OCPP_MSG_TYPE_ID_CALL, + action => <<"BootNotification">>, + payload => #{ + <<"chargePointVendor">> => <<"vendor1">>, + <<"chargePointModel">> => <<"model1">> + } + }, + ok = send_msg(ClientPid, BootNotification), + timer:sleep(1000), + %% assert: the current_connections is 1 + {200, [Listener1]} = request(get, "/gateways/ocpp/listeners"), + ?assertMatch( + #{ + status := #{running := true, current_connections := 1} + }, + Listener1 + ), + %% close conns + close(ClientPid), + timer:sleep(1000), + %% assert: the current_connections is 0 + {200, [Listener2]} = request(get, "/gateways/ocpp/listeners"), + ?assertMatch( + #{ + status := #{running := true, current_connections := 0} + }, + Listener2 + ). + %%-------------------------------------------------------------------- %% ocpp simple client @@ -229,3 +276,6 @@ receive_msg(ConnPid) -> after 5000 -> {error, timeout} end. + +close(ConnPid) -> + gun:shutdown(ConnPid). diff --git a/changes/ce/fix-12892.md b/changes/ce/fix-12892.md deleted file mode 100644 index c6f52629a..000000000 --- a/changes/ce/fix-12892.md +++ /dev/null @@ -1 +0,0 @@ -Avoid printing error logs when processing downstream messages diff --git a/changes/ee/fix-12892.md b/changes/ee/fix-12892.md new file mode 100644 index 000000000..47463925b --- /dev/null +++ b/changes/ee/fix-12892.md @@ -0,0 +1,3 @@ +Fix a logical error in OCPP gateway's handling of downstream BootNotification. + +Fix the `gateways/ocpp/listeners` endpoint to return the correct current connection number. From ab763fe6656c42df5c59dd2a31feebdfe05bcf7d Mon Sep 17 00:00:00 2001 From: zmstone Date: Thu, 18 Apr 2024 09:32:05 +0200 Subject: [PATCH 187/234] test: fix test case flakyness --- .../src/emqx_authn/emqx_authn_chains.erl | 19 ++++++++++--------- .../test/emqx_authn/emqx_authn_init_SUITE.erl | 3 ++- 2 files changed, 12 insertions(+), 10 deletions(-) diff --git a/apps/emqx_auth/src/emqx_authn/emqx_authn_chains.erl b/apps/emqx_auth/src/emqx_authn/emqx_authn_chains.erl index 62163dda3..0d21058e3 100644 --- a/apps/emqx_auth/src/emqx_authn/emqx_authn_chains.erl +++ b/apps/emqx_auth/src/emqx_authn/emqx_authn_chains.erl @@ -353,13 +353,13 @@ init(_Opts) -> ok = emqx_config_handler:add_handler([listeners, '?', '?', ?CONF_ROOT], Module), ok = hook_deny(), {ok, #{hooked => false, providers => #{}, init_done => false}, - {continue, initialize_authentication}}. + {continue, {initialize_authentication, init}}}. handle_call(get_providers, _From, #{providers := Providers} = State) -> reply(Providers, State); handle_call( {register_providers, Providers}, - _From, + From, #{providers := Reg0} = State ) -> case lists:filter(fun({T, _}) -> maps:is_key(T, Reg0) end, Providers) of @@ -371,7 +371,7 @@ handle_call( Reg0, Providers ), - reply(ok, State#{providers := Reg}, initialize_authentication); + reply(ok, State#{providers := Reg}, {initialize_authentication, From}); Clashes -> reply({error, {authentication_type_clash, Clashes}}, State) end; @@ -447,10 +447,10 @@ handle_call(Req, _From, State) -> ?SLOG(error, #{msg => "unexpected_call", call => Req}), {reply, ignored, State}. -handle_continue(initialize_authentication, #{init_done := true} = State) -> +handle_continue({initialize_authentication, _From}, #{init_done := true} = State) -> {noreply, State}; -handle_continue(initialize_authentication, #{providers := Providers} = State) -> - InitDone = initialize_authentication(Providers), +handle_continue({initialize_authentication, From}, #{providers := Providers} = State) -> + InitDone = initialize_authentication(Providers, From), {noreply, maybe_hook(State#{init_done := InitDone})}. handle_cast(Req, State) -> @@ -484,11 +484,13 @@ code_change(_OldVsn, State, _Extra) -> %% Private functions %%------------------------------------------------------------------------------ -initialize_authentication(Providers) -> +initialize_authentication(Providers, From) -> ProviderTypes = maps:keys(Providers), Chains = chain_configs(), HasProviders = has_providers_for_configs(Chains, ProviderTypes), - do_initialize_authentication(Providers, Chains, HasProviders). + Result = do_initialize_authentication(Providers, Chains, HasProviders), + ?tp(info, authn_chains_initialization_done, #{from => From, result => Result}), + Result. do_initialize_authentication(_Providers, _Chains, _HasProviders = false) -> false; @@ -500,7 +502,6 @@ do_initialize_authentication(Providers, Chains, _HasProviders = true) -> Chains ), ok = unhook_deny(), - ?tp(info, authn_chains_initialization_done, #{}), true. initialize_chain_authentication(_Providers, _ChainName, []) -> diff --git a/apps/emqx_auth/test/emqx_authn/emqx_authn_init_SUITE.erl b/apps/emqx_auth/test/emqx_authn/emqx_authn_init_SUITE.erl index fec1f3fa4..78e179ccb 100644 --- a/apps/emqx_auth/test/emqx_authn/emqx_authn_init_SUITE.erl +++ b/apps/emqx_auth/test/emqx_authn/emqx_authn_init_SUITE.erl @@ -69,9 +69,10 @@ t_initialize(_Config) -> emqx_access_control:authenticate(?CLIENTINFO) ), + Self = self(), ?assertWaitEvent( ok = emqx_authn_test_lib:register_fake_providers([{password_based, built_in_database}]), - #{?snk_kind := authn_chains_initialization_done}, + #{?snk_kind := authn_chains_initialization_done, from := {Self, _}}, 100 ), From a6558740e8320e0d2af199ba6b251aebf1b46b04 Mon Sep 17 00:00:00 2001 From: Kjell Winblad Date: Thu, 11 Apr 2024 12:09:04 +0200 Subject: [PATCH 188/234] feat(emqx_trace): add JSON trace log entry formatter This commit makes it possible to select the JSON trace log entry formatter when crating a trace pattern. This will make it easier for the dashboard and automatic tests to parse the log entries. Fixes: EMQX-12025 (partly) --- apps/emqx/include/emqx_trace.hrl | 3 +- apps/emqx/src/emqx_trace/emqx_trace.erl | 19 ++- .../src/emqx_trace/emqx_trace_handler.erl | 7 + .../emqx_trace/emqx_trace_json_formatter.erl | 113 ++++++++++++++ .../src/emqx_mgmt_api_trace.erl | 9 ++ .../test/emqx_mgmt_api_trace_SUITE.erl | 145 ++++++++++++++++++ rel/i18n/emqx_mgmt_api_trace.hocon | 5 + 7 files changed, 296 insertions(+), 5 deletions(-) create mode 100644 apps/emqx/src/emqx_trace/emqx_trace_json_formatter.erl diff --git a/apps/emqx/include/emqx_trace.hrl b/apps/emqx/include/emqx_trace.hrl index 476794223..a2f77c044 100644 --- a/apps/emqx/include/emqx_trace.hrl +++ b/apps/emqx/include/emqx_trace.hrl @@ -32,7 +32,8 @@ payload_encode = text :: hex | text | hidden | '_', extra = #{} :: map() | '_', start_at :: integer() | undefined | '_', - end_at :: integer() | undefined | '_' + end_at :: integer() | undefined | '_', + formatter = plain :: plain | json }). -define(SHARD, ?COMMON_SHARD). diff --git a/apps/emqx/src/emqx_trace/emqx_trace.erl b/apps/emqx/src/emqx_trace/emqx_trace.erl index 4ae973722..6a255806a 100644 --- a/apps/emqx/src/emqx_trace/emqx_trace.erl +++ b/apps/emqx/src/emqx_trace/emqx_trace.erl @@ -183,8 +183,10 @@ create(Trace) -> case mnesia:table_info(?TRACE, size) < ?MAX_SIZE of true -> case to_trace(Trace) of - {ok, TraceRec} -> insert_new_trace(TraceRec); - {error, Reason} -> {error, Reason} + {ok, TraceRec} -> + insert_new_trace(TraceRec); + {error, Reason} -> + {error, Reason} end; false -> {error, @@ -392,9 +394,16 @@ start_trace(Trace) -> type = Type, filter = Filter, start_at = Start, - payload_encode = PayloadEncode + payload_encode = PayloadEncode, + formatter = Formatter } = Trace, - Who = #{name => Name, type => Type, filter => Filter, payload_encode => PayloadEncode}, + Who = #{ + name => Name, + type => Type, + filter => Filter, + payload_encode => PayloadEncode, + formatter => Formatter + }, emqx_trace_handler:install(Who, debug, log_file(Name, Start)). stop_trace(Finished, Started) -> @@ -559,6 +568,8 @@ to_trace(#{end_at := EndAt} = Trace, Rec) -> {ok, _Sec} -> {error, "end_at time has already passed"} end; +to_trace(#{formatter := Formatter} = Trace, Rec) -> + to_trace(maps:remove(formatter, Trace), Rec#?TRACE{formatter = Formatter}); to_trace(_, Rec) -> {ok, Rec}. diff --git a/apps/emqx/src/emqx_trace/emqx_trace_handler.erl b/apps/emqx/src/emqx_trace/emqx_trace_handler.erl index c69809052..d80fa70ec 100644 --- a/apps/emqx/src/emqx_trace/emqx_trace_handler.erl +++ b/apps/emqx/src/emqx_trace/emqx_trace_handler.erl @@ -183,6 +183,13 @@ filters(#{type := ip_address, filter := Filter, name := Name}) -> filters(#{type := ruleid, filter := Filter, name := Name}) -> [{ruleid, {fun ?MODULE:filter_ruleid/2, {ensure_bin(Filter), Name}}}]. +formatter(#{type := _Type, payload_encode := PayloadEncode, formatter := json}) -> + {emqx_trace_json_formatter, #{ + single_line => true, + max_size => unlimited, + depth => unlimited, + payload_encode => PayloadEncode + }}; formatter(#{type := _Type, payload_encode := PayloadEncode}) -> {emqx_trace_formatter, #{ %% template is for ?SLOG message not ?TRACE. diff --git a/apps/emqx/src/emqx_trace/emqx_trace_json_formatter.erl b/apps/emqx/src/emqx_trace/emqx_trace_json_formatter.erl new file mode 100644 index 000000000..b0548ac04 --- /dev/null +++ b/apps/emqx/src/emqx_trace/emqx_trace_json_formatter.erl @@ -0,0 +1,113 @@ +%%-------------------------------------------------------------------- +%% Copyright (c) 2020-2024 EMQ Technologies Co., Ltd. All Rights Reserved. +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. +%%-------------------------------------------------------------------- +-module(emqx_trace_json_formatter). + +-include("emqx_mqtt.hrl"). + +-export([format/2]). + +%% logger_formatter:config/0 is not exported. +-type config() :: map(). + +%%%----------------------------------------------------------------- +%%% Callback Function +%%%----------------------------------------------------------------- + +-spec format(LogEvent, Config) -> unicode:chardata() when + LogEvent :: logger:log_event(), + Config :: config(). +format( + LogMap, + #{payload_encode := PEncode} +) -> + Time = emqx_utils_calendar:now_to_rfc3339(microsecond), + LogMap1 = LogMap#{time => Time}, + [format_log_map(LogMap1, PEncode), "\n"]. + +%%%----------------------------------------------------------------- +%%% Helper Functions +%%%----------------------------------------------------------------- + +format_log_map(Map, PEncode) -> + KeyValuePairs = format_key_value_pairs(maps:to_list(Map), PEncode, []), + ["{", KeyValuePairs, "}"]. + +format_key_value_pairs([], _PEncode, Acc) -> + lists:join(",", Acc); +format_key_value_pairs([{payload, Value} | Rest], PEncode, Acc) -> + FormattedPayload = format_payload(Value, PEncode), + FormattedPayloadEscaped = escape(FormattedPayload), + Pair = ["\"payload\": \"", FormattedPayloadEscaped, "\""], + format_key_value_pairs(Rest, PEncode, [Pair | Acc]); +format_key_value_pairs([{packet, Value} | Rest], PEncode, Acc) -> + Formatted = format_packet(Value, PEncode), + FormattedEscaped = escape(Formatted), + Pair = ["\"packet\": \"", FormattedEscaped, "\""], + format_key_value_pairs(Rest, PEncode, [Pair | Acc]); +format_key_value_pairs([{Key, Value} | Rest], PEncode, Acc) -> + FormattedKey = format_key(Key), + FormattedValue = format_value(Value, PEncode), + Pair = ["\"", FormattedKey, "\":", FormattedValue], + format_key_value_pairs(Rest, PEncode, [Pair | Acc]). + +format_key(Term) -> + %% Keys must be strings + String = emqx_logger_textfmt:try_format_unicode(Term), + escape(String). + +format_value(Map, PEncode) when is_map(Map) -> + format_log_map(Map, PEncode); +format_value(V, _PEncode) when is_integer(V) -> + integer_to_list(V); +format_value(V, _PEncode) when is_float(V) -> + float_to_list(V, [{decimals, 2}]); +format_value(true, _PEncode) -> + "true"; +format_value(false, _PEncode) -> + "false"; +format_value(V, _PEncode) -> + String = emqx_logger_textfmt:try_format_unicode(V), + ["\"", escape(String), "\""]. + +escape(IOList) -> + Bin = iolist_to_binary(IOList), + List = binary_to_list(Bin), + escape_list(List). + +escape_list([]) -> + []; +escape_list([$\n | Rest]) -> + %% 92 is backslash + [92, $n | escape_list(Rest)]; +escape_list([$" | Rest]) -> + [92, $" | escape_list(Rest)]; +escape_list([92 | Rest]) -> + [92, 92 | escape_list(Rest)]; +escape_list([X | Rest]) -> + [X | escape_list(Rest)]. + +format_packet(undefined, _) -> ""; +format_packet(Packet, Encode) -> emqx_packet:format(Packet, Encode). + +format_payload(undefined, _) -> + ""; +format_payload(_, hidden) -> + "******"; +format_payload(Payload, text) when ?MAX_PAYLOAD_FORMAT_LIMIT(Payload) -> + unicode:characters_to_list(Payload); +format_payload(Payload, hex) when ?MAX_PAYLOAD_FORMAT_LIMIT(Payload) -> binary:encode_hex(Payload); +format_payload(<> = Payload, Type) -> + emqx_packet:format_truncated_payload(Part, byte_size(Payload), Type). diff --git a/apps/emqx_management/src/emqx_mgmt_api_trace.erl b/apps/emqx_management/src/emqx_mgmt_api_trace.erl index 19edc229d..f0efe9f85 100644 --- a/apps/emqx_management/src/emqx_mgmt_api_trace.erl +++ b/apps/emqx_management/src/emqx_mgmt_api_trace.erl @@ -314,6 +314,15 @@ fields(trace) -> example => [#{<<"node">> => <<"emqx@127.0.0.1">>, <<"size">> => 1024}], required => false } + )}, + {formatter, + hoconsc:mk( + hoconsc:union([plain, json]), + #{ + description => ?DESC(trace_log_formatter), + example => plain, + required => false + } )} ]; fields(name) -> diff --git a/apps/emqx_management/test/emqx_mgmt_api_trace_SUITE.erl b/apps/emqx_management/test/emqx_mgmt_api_trace_SUITE.erl index ef7b5a191..de74316f4 100644 --- a/apps/emqx_management/test/emqx_mgmt_api_trace_SUITE.erl +++ b/apps/emqx_management/test/emqx_mgmt_api_trace_SUITE.erl @@ -23,6 +23,7 @@ -include_lib("kernel/include/file.hrl"). -include_lib("stdlib/include/zip.hrl"). -include_lib("snabbkaffe/include/snabbkaffe.hrl"). +-include_lib("emqx/include/logger.hrl"). %%-------------------------------------------------------------------- %% Setups @@ -169,9 +170,153 @@ t_http_test_rule_trace(_Config) -> {ok, Delete} = request_api(delete, api_path(["trace/", Name])), ?assertEqual(<<>>, Delete), + emqx_trace:clear(), unload(), ok. +t_http_test_json_formatter(_Config) -> + emqx_trace:clear(), + load(), + + Name = <<"testname">>, + Topic = <<"/x/y/z">>, + Trace = [ + {<<"name">>, Name}, + {<<"type">>, <<"topic">>}, + {<<"topic">>, Topic}, + {<<"formatter">>, <<"json">>} + ], + + {ok, Create} = request_api(post, api_path("trace"), Trace), + ?assertMatch(#{<<"name">> := Name}, json(Create)), + + {ok, List} = request_api(get, api_path("trace")), + [Data] = json(List), + ?assertEqual(<<"json">>, maps:get(<<"formatter">>, Data)), + + {ok, List1} = request_api(get, api_path("trace")), + [Data1] = json(List1), + ?assertMatch( + #{ + <<"formatter">> := <<"json">> + }, + Data1 + ), + + %% Check that the log is empty + ok = emqx_trace_handler_SUITE:filesync(Name, topic), + {ok, _Detail} = request_api(get, api_path("trace/" ++ binary_to_list(Name) ++ "/log_detail")), + %% Trace is empty which results in a not found error + {error, _} = request_api(get, api_path("trace/" ++ binary_to_list(Name) ++ "/download")), + + %% Start a client and send a message to get info to the log + ClientId = <<"my_client_id">>, + {ok, Client} = emqtt:start_link([{clean_start, true}, {clientid, ClientId}]), + {ok, _} = emqtt:connect(Client), + %% Normal message + emqtt:publish(Client, Topic, #{}, <<"log_this_message">>, [{qos, 2}]), + %% Escape line breaks + emqtt:publish(Client, Topic, #{}, <<"\nlog\nthis\nmessage">>, [{qos, 2}]), + %% Escape escape character + emqtt:publish(Client, Topic, #{}, <<"\\\nlog\n_\\n_this\nmessage\\">>, [{qos, 2}]), + %% Escape end of string + emqtt:publish(Client, Topic, #{}, <<"\"log_this_message\"">>, [{qos, 2}]), + + %% Manually create some trace messages to test the JSON formatter + + %% String key and value + ?TRACE("CUSTOM", "my_log_msg", #{topic => Topic, "str" => "str"}), + %% Log Erlang term + ?TRACE("CUSTOM", "my_log_msg", #{topic => Topic, term => {notjson}}), + %% Log Erlang term key + ?TRACE("CUSTOM", "my_log_msg", #{topic => Topic, {'notjson'} => term}), + %% Log Integer + ?TRACE("CUSTOM", "my_log_msg", #{topic => Topic, integer => 42}), + %% Log Float + ?TRACE("CUSTOM", "my_log_msg", #{topic => Topic, float => 1.2}), + %% Log Integer Key + ?TRACE("CUSTOM", "my_log_msg", #{topic => Topic, 42 => integer}), + %% Log Float Key + ?TRACE("CUSTOM", "my_log_msg", #{topic => Topic, 1.2 => float}), + %% Log Map Key + ?TRACE("CUSTOM", "my_log_msg", #{topic => Topic, #{} => value}), + %% Empty submap + ?TRACE("CUSTOM", "my_log_msg", #{topic => Topic, sub => #{}}), + %% Non-empty submap + ?TRACE("CUSTOM", "my_log_msg", #{topic => Topic, sub => #{key => value}}), + %% Bolean values + ?TRACE("CUSTOM", "my_log_msg", #{topic => Topic, true => true, false => false}), + %% Key value list + ?TRACE("CUSTOM", "my_log_msg", #{ + topic => Topic, + list => [ + {<<"key">>, <<"value">>}, + {<<"key2">>, <<"value2">>} + ] + }), + ok = emqx_trace_handler_SUITE:filesync(Name, topic), + {ok, _Detail2} = request_api(get, api_path("trace/" ++ binary_to_list(Name) ++ "/log_detail")), + {ok, Bin} = request_api(get, api_path("trace/" ++ binary_to_list(Name) ++ "/download")), + {ok, [ + _Comment, + #zip_file{ + name = _ZipName, + info = #file_info{size = Size, type = regular, access = read_write} + } + ]} = zip:table(Bin), + ?assert(Size > 0), + {ok, [{_, LogContent}]} = zip:unzip(Bin, [memory]), + LogEntriesTrailing = string:split(LogContent, "\n", all), + LogEntries = lists:droplast(LogEntriesTrailing), + DecodedLogEntries = [ + begin + ct:pal("LOG ENTRY\n~s\n", [JSONEntry]), + emqx_utils_json:decode(JSONEntry) + end + || JSONEntry <- LogEntries + ], + ?assertMatch( + [ + #{<<"meta">> := #{<<"payload">> := <<"log_this_message">>}}, + #{<<"meta">> := #{<<"payload">> := <<"\nlog\nthis\nmessage">>}}, + #{ + <<"meta">> := #{<<"payload">> := <<"\\\nlog\n_\\n_this\nmessage\\">>} + }, + #{<<"meta">> := #{<<"payload">> := <<"\"log_this_message\"">>}}, + #{<<"meta">> := #{<<"str">> := <<"str">>}}, + #{<<"meta">> := #{<<"term">> := <<"{notjson}">>}}, + #{<<"meta">> := <<_/binary>>}, + #{<<"meta">> := #{<<"integer">> := 42}}, + #{<<"meta">> := #{<<"float">> := 1.2}}, + #{<<"meta">> := <<_/binary>>}, + #{<<"meta">> := <<_/binary>>}, + #{<<"meta">> := <<_/binary>>}, + #{<<"meta">> := #{<<"sub">> := #{}}}, + #{<<"meta">> := #{<<"sub">> := #{<<"key">> := <<"value">>}}}, + #{<<"meta">> := #{<<"true">> := <<"true">>, <<"false">> := <<"false">>}}, + #{ + <<"meta">> := #{ + <<"list">> := #{ + <<"key">> := <<"value">>, + <<"key2">> := <<"value2">> + } + } + } + | _ + ], + DecodedLogEntries + ), + {ok, Delete} = request_api(delete, api_path("trace/" ++ binary_to_list(Name))), + ?assertEqual(<<>>, Delete), + + {ok, List2} = request_api(get, api_path("trace")), + ?assertEqual([], json(List2)), + + ok = emqtt:disconnect(Client), + unload(), + emqx_trace:clear(), + ok. + t_create_failed(_Config) -> load(), Trace = [{<<"type">>, <<"topic">>}, {<<"topic">>, <<"/x/y/z">>}], diff --git a/rel/i18n/emqx_mgmt_api_trace.hocon b/rel/i18n/emqx_mgmt_api_trace.hocon index ba07d7d53..3b12caf97 100644 --- a/rel/i18n/emqx_mgmt_api_trace.hocon +++ b/rel/i18n/emqx_mgmt_api_trace.hocon @@ -115,4 +115,9 @@ current_trace_offset.desc: current_trace_offset.label: """Offset from the current trace position.""" +trace_log_formatter.desc: +"""The formatter that will be used to format the trace log entries. Set this to plain to format the log entries as plain text (default). Set it to json to format each log entry as a JSON object.""" +trace_log_formatter.label: +"""Trace Log Entry Formatter""" + } From 3c5c3ebe1b36dbd19f4c76cad7e791f9fe6dade0 Mon Sep 17 00:00:00 2001 From: Kjell Winblad Date: Thu, 11 Apr 2024 13:54:56 +0200 Subject: [PATCH 189/234] docs: add change log entry --- changes/ce/feat-12863.en.md | 1 + 1 file changed, 1 insertion(+) create mode 100644 changes/ce/feat-12863.en.md diff --git a/changes/ce/feat-12863.en.md b/changes/ce/feat-12863.en.md new file mode 100644 index 000000000..45bebfbd6 --- /dev/null +++ b/changes/ce/feat-12863.en.md @@ -0,0 +1 @@ +You can now format trace log entries as JSON objects by setting the formatter parameter to "json" when creating the trace pattern. From 6fd7a06c5dbf8a26261e14b4dfa96be680a8aa5e Mon Sep 17 00:00:00 2001 From: Kjell Winblad Date: Thu, 11 Apr 2024 17:22:33 +0200 Subject: [PATCH 190/234] fix: problems reported by dialyzer --- apps/emqx/include/emqx_trace.hrl | 2 +- apps/emqx/src/emqx_trace/emqx_trace_handler.erl | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/apps/emqx/include/emqx_trace.hrl b/apps/emqx/include/emqx_trace.hrl index a2f77c044..f273c2dbc 100644 --- a/apps/emqx/include/emqx_trace.hrl +++ b/apps/emqx/include/emqx_trace.hrl @@ -33,7 +33,7 @@ extra = #{} :: map() | '_', start_at :: integer() | undefined | '_', end_at :: integer() | undefined | '_', - formatter = plain :: plain | json + formatter = plain :: plain | json | '_' }). -define(SHARD, ?COMMON_SHARD). diff --git a/apps/emqx/src/emqx_trace/emqx_trace_handler.erl b/apps/emqx/src/emqx_trace/emqx_trace_handler.erl index d80fa70ec..98577d0b9 100644 --- a/apps/emqx/src/emqx_trace/emqx_trace_handler.erl +++ b/apps/emqx/src/emqx_trace/emqx_trace_handler.erl @@ -46,7 +46,8 @@ name := binary(), type := clientid | topic | ip_address, filter := emqx_types:clientid() | emqx_types:topic() | emqx_trace:ip_address(), - payload_encode := text | hidden | hex + payload_encode := text | hidden | hex, + formatter => json | plain }. -define(CONFIG(_LogFile_), #{ From aa398352427a7118f787c8f2d3f6c8f770b0d6e3 Mon Sep 17 00:00:00 2001 From: Kjell Winblad Date: Thu, 11 Apr 2024 18:27:49 +0200 Subject: [PATCH 191/234] feat(emqx_cli): add parameter for trace format (plain or json) --- .../src/emqx_trace/emqx_trace_handler.erl | 19 +++++- .../emqx_trace/emqx_trace_json_formatter.erl | 11 +++- apps/emqx_management/src/emqx_mgmt_cli.erl | 66 +++++++++++-------- 3 files changed, 65 insertions(+), 31 deletions(-) diff --git a/apps/emqx/src/emqx_trace/emqx_trace_handler.erl b/apps/emqx/src/emqx_trace/emqx_trace_handler.erl index 98577d0b9..169e0361b 100644 --- a/apps/emqx/src/emqx_trace/emqx_trace_handler.erl +++ b/apps/emqx/src/emqx_trace/emqx_trace_handler.erl @@ -27,6 +27,7 @@ install/3, install/4, install/5, + install/6, uninstall/1, uninstall/2 ]). @@ -70,17 +71,29 @@ Type :: clientid | topic | ip_address, Filter :: emqx_types:clientid() | emqx_types:topic() | string(), Level :: logger:level() | all, - LogFilePath :: string() + LogFilePath :: string(), + Formatter :: plain | json ) -> ok | {error, term()}. -install(Name, Type, Filter, Level, LogFile) -> +install(Name, Type, Filter, Level, LogFile, Formatter) -> Who = #{ type => Type, filter => ensure_bin(Filter), name => ensure_bin(Name), - payload_encode => payload_encode() + payload_encode => payload_encode(), + formatter => Formatter }, install(Who, Level, LogFile). +-spec install( + Name :: binary() | list(), + Type :: clientid | topic | ip_address, + Filter :: emqx_types:clientid() | emqx_types:topic() | string(), + Level :: logger:level() | all, + LogFilePath :: string() +) -> ok | {error, term()}. +install(Name, Type, Filter, Level, LogFile) -> + install(Name, Type, Filter, Level, LogFile, plain). + -spec install( Type :: clientid | topic | ip_address, Filter :: emqx_types:clientid() | emqx_types:topic() | string(), diff --git a/apps/emqx/src/emqx_trace/emqx_trace_json_formatter.erl b/apps/emqx/src/emqx_trace/emqx_trace_json_formatter.erl index b0548ac04..6f30774a6 100644 --- a/apps/emqx/src/emqx_trace/emqx_trace_json_formatter.erl +++ b/apps/emqx/src/emqx_trace/emqx_trace_json_formatter.erl @@ -65,7 +65,7 @@ format_key_value_pairs([{Key, Value} | Rest], PEncode, Acc) -> format_key(Term) -> %% Keys must be strings - String = emqx_logger_textfmt:try_format_unicode(Term), + String = try_format_unicode(Term), escape(String). format_value(Map, PEncode) when is_map(Map) -> @@ -79,9 +79,16 @@ format_value(true, _PEncode) -> format_value(false, _PEncode) -> "false"; format_value(V, _PEncode) -> - String = emqx_logger_textfmt:try_format_unicode(V), + String = try_format_unicode(V), ["\"", escape(String), "\""]. +try_format_unicode(undefined) -> + %% emqx_logger_textfmt:try_format_unicode converts the atom undefined to + %% the atom undefined + "undefined"; +try_format_unicode(V) -> + emqx_logger_textfmt:try_format_unicode(V). + escape(IOList) -> Bin = iolist_to_binary(IOList), List = binary_to_list(Bin), diff --git a/apps/emqx_management/src/emqx_mgmt_cli.erl b/apps/emqx_management/src/emqx_mgmt_cli.erl index 465989f3d..01c121e5b 100644 --- a/apps/emqx_management/src/emqx_mgmt_cli.erl +++ b/apps/emqx_management/src/emqx_mgmt_cli.erl @@ -507,21 +507,24 @@ trace(["list"]) -> ) end; trace(["stop", Operation, Filter0]) -> - case trace_type(Operation, Filter0) of - {ok, Type, Filter} -> trace_off(Type, Filter); + case trace_type(Operation, Filter0, plain) of + {ok, Type, Filter, _} -> trace_off(Type, Filter); error -> trace([]) end; trace(["start", Operation, ClientId, LogFile]) -> trace(["start", Operation, ClientId, LogFile, "all"]); trace(["start", Operation, Filter0, LogFile, Level]) -> - case trace_type(Operation, Filter0) of - {ok, Type, Filter} -> + trace(["start", Operation, Filter0, LogFile, Level, plain]); +trace(["start", Operation, Filter0, LogFile, Level, Formatter0]) -> + case trace_type(Operation, Filter0, Formatter0) of + {ok, Type, Filter, Formatter} -> trace_on( name(Filter0), Type, Filter, list_to_existing_atom(Level), - LogFile + LogFile, + Formatter ); error -> trace([]) @@ -529,19 +532,22 @@ trace(["start", Operation, Filter0, LogFile, Level]) -> trace(_) -> emqx_ctl:usage([ {"trace list", "List all traces started on local node"}, - {"trace start client []", "Traces for a client on local node"}, + {"trace start client [] []", + "Traces for a client on local node (Formatter=plain|json)"}, {"trace stop client ", "Stop tracing for a client on local node"}, - {"trace start topic [] ", "Traces for a topic on local node"}, + {"trace start topic [] []", + "Traces for a topic on local node (Formatter=plain|json)"}, {"trace stop topic ", "Stop tracing for a topic on local node"}, - {"trace start ip_address [] ", - "Traces for a client ip on local node"}, + {"trace start ip_address [] []", + "Traces for a client ip on local node (Formatter=plain|json)"}, {"trace stop ip_address ", "Stop tracing for a client ip on local node"}, - {"trace start ruleid [] ", "Traces for a rule ID on local node"}, + {"trace start ruleid [] []", + "Traces for a rule ID on local node (Formatter=plain|json)"}, {"trace stop ruleid ", "Stop tracing for a rule ID on local node"} ]). -trace_on(Name, Type, Filter, Level, LogFile) -> - case emqx_trace_handler:install(Name, Type, Filter, Level, LogFile) of +trace_on(Name, Type, Filter, Level, LogFile, Formatter) -> + case emqx_trace_handler:install(Name, Type, Filter, Level, LogFile, Formatter) of ok -> emqx_trace:check(), emqx_ctl:print("trace ~s ~s successfully~n", [Filter, Name]); @@ -592,28 +598,33 @@ traces(["delete", Name]) -> trace_cluster_del(Name); traces(["start", Name, Operation, Filter]) -> traces(["start", Name, Operation, Filter, ?DEFAULT_TRACE_DURATION]); -traces(["start", Name, Operation, Filter0, DurationS]) -> - case trace_type(Operation, Filter0) of - {ok, Type, Filter} -> trace_cluster_on(Name, Type, Filter, DurationS); +traces(["start", Name, Operation, Filter, DurationS]) -> + traces(["start", Name, Operation, Filter, DurationS, plain]); +traces(["start", Name, Operation, Filter0, DurationS, Formatter0]) -> + case trace_type(Operation, Filter0, Formatter0) of + {ok, Type, Filter, Formatter} -> trace_cluster_on(Name, Type, Filter, DurationS, Formatter); error -> traces([]) end; traces(_) -> emqx_ctl:usage([ {"traces list", "List all cluster traces started"}, - {"traces start client []", "Traces for a client in cluster"}, - {"traces start topic []", "Traces for a topic in cluster"}, - {"traces start ip_address []", + {"traces start client [] []", + "Traces for a client in cluster (Formatter=plain|json)"}, + {"traces start topic [] []", + "Traces for a topic in cluster (Formatter=plain|json)"}, + {"traces start ruleid [] []", + "Traces for a rule ID in cluster (Formatter=plain|json)"}, + {"traces start ip_address [] []", "Traces for a client IP in cluster\n" "Trace will start immediately on all nodes, including the core and replicant,\n" "and will end after seconds. The default value for is " ?DEFAULT_TRACE_DURATION - " seconds."}, - {"traces start ruleid []", "Traces for a rule ID in cluster"}, + " seconds. (Formatter=plain|json)"}, {"traces stop ", "Stop trace in cluster"}, {"traces delete ", "Delete trace in cluster"} ]). -trace_cluster_on(Name, Type, Filter, DurationS0) -> +trace_cluster_on(Name, Type, Filter, DurationS0, Formatter) -> Now = emqx_trace:now_second(), DurationS = list_to_integer(DurationS0), Trace = #{ @@ -621,7 +632,8 @@ trace_cluster_on(Name, Type, Filter, DurationS0) -> type => Type, Type => bin(Filter), start_at => Now, - end_at => Now + DurationS + end_at => Now + DurationS, + formatter => Formatter }, case emqx_trace:create(Trace) of {ok, _} -> @@ -645,10 +657,12 @@ trace_cluster_off(Name) -> {error, Error} -> emqx_ctl:print("[error] Stop cluster_trace ~s: ~p~n", [Name, Error]) end. -trace_type("client", ClientId) -> {ok, clientid, bin(ClientId)}; -trace_type("topic", Topic) -> {ok, topic, bin(Topic)}; -trace_type("ip_address", IP) -> {ok, ip_address, IP}; -trace_type(_, _) -> error. +trace_type(Op, Match, "plain") -> trace_type(Op, Match, plain); +trace_type(Op, Match, "json") -> trace_type(Op, Match, json); +trace_type("client", ClientId, Formatter) -> {ok, clientid, bin(ClientId), Formatter}; +trace_type("topic", Topic, Formatter) -> {ok, topic, bin(Topic), Formatter}; +trace_type("ip_address", IP, Formatter) -> {ok, ip_address, IP, Formatter}; +trace_type(_, _, _) -> error. %%-------------------------------------------------------------------- %% @doc Listeners Command From 968dc2ccdafd78128dbedc3dacc732138567efaa Mon Sep 17 00:00:00 2001 From: Kjell Winblad Date: Fri, 12 Apr 2024 17:33:20 +0200 Subject: [PATCH 192/234] fix(json trace format): use best_effort_json instead of new encoder --- .../src/emqx_trace/emqx_trace_handler.erl | 3 - .../emqx_trace/emqx_trace_json_formatter.erl | 95 ++++++------------- 2 files changed, 31 insertions(+), 67 deletions(-) diff --git a/apps/emqx/src/emqx_trace/emqx_trace_handler.erl b/apps/emqx/src/emqx_trace/emqx_trace_handler.erl index 169e0361b..c0172b2dc 100644 --- a/apps/emqx/src/emqx_trace/emqx_trace_handler.erl +++ b/apps/emqx/src/emqx_trace/emqx_trace_handler.erl @@ -199,9 +199,6 @@ filters(#{type := ruleid, filter := Filter, name := Name}) -> formatter(#{type := _Type, payload_encode := PayloadEncode, formatter := json}) -> {emqx_trace_json_formatter, #{ - single_line => true, - max_size => unlimited, - depth => unlimited, payload_encode => PayloadEncode }}; formatter(#{type := _Type, payload_encode := PayloadEncode}) -> diff --git a/apps/emqx/src/emqx_trace/emqx_trace_json_formatter.erl b/apps/emqx/src/emqx_trace/emqx_trace_json_formatter.erl index 6f30774a6..c6d4761ea 100644 --- a/apps/emqx/src/emqx_trace/emqx_trace_json_formatter.erl +++ b/apps/emqx/src/emqx_trace/emqx_trace_json_formatter.erl @@ -33,78 +33,45 @@ format( LogMap, #{payload_encode := PEncode} ) -> + %% We just make some basic transformations on the input LogMap and then do + %% an external call to create the JSON text Time = emqx_utils_calendar:now_to_rfc3339(microsecond), LogMap1 = LogMap#{time => Time}, - [format_log_map(LogMap1, PEncode), "\n"]. + LogMap2 = prepare_log_map(LogMap1, PEncode), + [emqx_logger_jsonfmt:best_effort_json(LogMap2, [force_utf8]), "\n"]. %%%----------------------------------------------------------------- %%% Helper Functions %%%----------------------------------------------------------------- -format_log_map(Map, PEncode) -> - KeyValuePairs = format_key_value_pairs(maps:to_list(Map), PEncode, []), - ["{", KeyValuePairs, "}"]. +prepare_log_map(LogMap, PEncode) when is_map(LogMap) -> + NewKeyValuePairs = [prepare_key_value(K, V, PEncode) || {K, V} <- maps:to_list(LogMap)], + maps:from_list(NewKeyValuePairs); +prepare_log_map(Term, _PEncode) -> + Term. -format_key_value_pairs([], _PEncode, Acc) -> - lists:join(",", Acc); -format_key_value_pairs([{payload, Value} | Rest], PEncode, Acc) -> - FormattedPayload = format_payload(Value, PEncode), - FormattedPayloadEscaped = escape(FormattedPayload), - Pair = ["\"payload\": \"", FormattedPayloadEscaped, "\""], - format_key_value_pairs(Rest, PEncode, [Pair | Acc]); -format_key_value_pairs([{packet, Value} | Rest], PEncode, Acc) -> - Formatted = format_packet(Value, PEncode), - FormattedEscaped = escape(Formatted), - Pair = ["\"packet\": \"", FormattedEscaped, "\""], - format_key_value_pairs(Rest, PEncode, [Pair | Acc]); -format_key_value_pairs([{Key, Value} | Rest], PEncode, Acc) -> - FormattedKey = format_key(Key), - FormattedValue = format_value(Value, PEncode), - Pair = ["\"", FormattedKey, "\":", FormattedValue], - format_key_value_pairs(Rest, PEncode, [Pair | Acc]). - -format_key(Term) -> - %% Keys must be strings - String = try_format_unicode(Term), - escape(String). - -format_value(Map, PEncode) when is_map(Map) -> - format_log_map(Map, PEncode); -format_value(V, _PEncode) when is_integer(V) -> - integer_to_list(V); -format_value(V, _PEncode) when is_float(V) -> - float_to_list(V, [{decimals, 2}]); -format_value(true, _PEncode) -> - "true"; -format_value(false, _PEncode) -> - "false"; -format_value(V, _PEncode) -> - String = try_format_unicode(V), - ["\"", escape(String), "\""]. - -try_format_unicode(undefined) -> - %% emqx_logger_textfmt:try_format_unicode converts the atom undefined to - %% the atom undefined - "undefined"; -try_format_unicode(V) -> - emqx_logger_textfmt:try_format_unicode(V). - -escape(IOList) -> - Bin = iolist_to_binary(IOList), - List = binary_to_list(Bin), - escape_list(List). - -escape_list([]) -> - []; -escape_list([$\n | Rest]) -> - %% 92 is backslash - [92, $n | escape_list(Rest)]; -escape_list([$" | Rest]) -> - [92, $" | escape_list(Rest)]; -escape_list([92 | Rest]) -> - [92, 92 | escape_list(Rest)]; -escape_list([X | Rest]) -> - [X | escape_list(Rest)]. +prepare_key_value(payload = K, V, PEncode) -> + NewV = + try + format_payload(V, PEncode) + catch + _:_:_ -> + V + end, + {K, NewV}; +prepare_key_value(packet = K, V, PEncode) -> + NewV = + try + format_packet(V, PEncode) + catch + _:_:_ -> + V + end, + {K, NewV}; +prepare_key_value(K, V, PEncode) when is_map(V) -> + {K, prepare_log_map(V, PEncode)}; +prepare_key_value(K, V, _PEncode) -> + {K, V}. format_packet(undefined, _) -> ""; format_packet(Packet, Encode) -> emqx_packet:format(Packet, Encode). From 6c0ee8bb01cb8d478b39656a6a5c1bad6040d8cd Mon Sep 17 00:00:00 2001 From: Kjell Winblad Date: Fri, 12 Apr 2024 17:40:48 +0200 Subject: [PATCH 193/234] test(emqx_trace_SUITE): fix failure due to new field --- apps/emqx/test/emqx_trace_SUITE.erl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/apps/emqx/test/emqx_trace_SUITE.erl b/apps/emqx/test/emqx_trace_SUITE.erl index 1652c11b9..957a9850d 100644 --- a/apps/emqx/test/emqx_trace_SUITE.erl +++ b/apps/emqx/test/emqx_trace_SUITE.erl @@ -96,7 +96,8 @@ t_base_create_delete(_Config) -> start_at => Now, end_at => Now + 30 * 60, payload_encode => text, - extra => #{} + extra => #{}, + formatter => plain } ], ?assertEqual(ExpectFormat, emqx_trace:format([TraceRec])), From ef9f8a8fdf4d94189fd8bd9f6cc174a9052cee73 Mon Sep 17 00:00:00 2001 From: Kjell Winblad Date: Sun, 14 Apr 2024 11:22:23 +0200 Subject: [PATCH 194/234] fix: unreachable clause found by dialyzer --- apps/emqx/src/emqx_trace/emqx_trace_json_formatter.erl | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/apps/emqx/src/emqx_trace/emqx_trace_json_formatter.erl b/apps/emqx/src/emqx_trace/emqx_trace_json_formatter.erl index c6d4761ea..205ccd5be 100644 --- a/apps/emqx/src/emqx_trace/emqx_trace_json_formatter.erl +++ b/apps/emqx/src/emqx_trace/emqx_trace_json_formatter.erl @@ -44,11 +44,9 @@ format( %%% Helper Functions %%%----------------------------------------------------------------- -prepare_log_map(LogMap, PEncode) when is_map(LogMap) -> +prepare_log_map(LogMap, PEncode) -> NewKeyValuePairs = [prepare_key_value(K, V, PEncode) || {K, V} <- maps:to_list(LogMap)], - maps:from_list(NewKeyValuePairs); -prepare_log_map(Term, _PEncode) -> - Term. + maps:from_list(NewKeyValuePairs). prepare_key_value(payload = K, V, PEncode) -> NewV = From 10957e7d79bbc62d26d8b9a41e8de26c7b3aa9f5 Mon Sep 17 00:00:00 2001 From: Kjell Winblad Date: Thu, 18 Apr 2024 10:42:51 +0200 Subject: [PATCH 195/234] fix: change name of the default trace log format from plain to text --- apps/emqx/include/emqx_trace.hrl | 2 +- .../src/emqx_trace/emqx_trace_handler.erl | 6 ++--- apps/emqx/test/emqx_trace_SUITE.erl | 2 +- .../src/emqx_mgmt_api_trace.erl | 4 ++-- apps/emqx_management/src/emqx_mgmt_cli.erl | 24 +++++++++---------- rel/i18n/emqx_mgmt_api_trace.hocon | 2 +- 6 files changed, 20 insertions(+), 20 deletions(-) diff --git a/apps/emqx/include/emqx_trace.hrl b/apps/emqx/include/emqx_trace.hrl index f273c2dbc..f3905bfdf 100644 --- a/apps/emqx/include/emqx_trace.hrl +++ b/apps/emqx/include/emqx_trace.hrl @@ -33,7 +33,7 @@ extra = #{} :: map() | '_', start_at :: integer() | undefined | '_', end_at :: integer() | undefined | '_', - formatter = plain :: plain | json | '_' + formatter = text :: text | json | '_' }). -define(SHARD, ?COMMON_SHARD). diff --git a/apps/emqx/src/emqx_trace/emqx_trace_handler.erl b/apps/emqx/src/emqx_trace/emqx_trace_handler.erl index c0172b2dc..8179f8c0a 100644 --- a/apps/emqx/src/emqx_trace/emqx_trace_handler.erl +++ b/apps/emqx/src/emqx_trace/emqx_trace_handler.erl @@ -48,7 +48,7 @@ type := clientid | topic | ip_address, filter := emqx_types:clientid() | emqx_types:topic() | emqx_trace:ip_address(), payload_encode := text | hidden | hex, - formatter => json | plain + formatter => json | text }. -define(CONFIG(_LogFile_), #{ @@ -72,7 +72,7 @@ Filter :: emqx_types:clientid() | emqx_types:topic() | string(), Level :: logger:level() | all, LogFilePath :: string(), - Formatter :: plain | json + Formatter :: text | json ) -> ok | {error, term()}. install(Name, Type, Filter, Level, LogFile, Formatter) -> Who = #{ @@ -92,7 +92,7 @@ install(Name, Type, Filter, Level, LogFile, Formatter) -> LogFilePath :: string() ) -> ok | {error, term()}. install(Name, Type, Filter, Level, LogFile) -> - install(Name, Type, Filter, Level, LogFile, plain). + install(Name, Type, Filter, Level, LogFile, text). -spec install( Type :: clientid | topic | ip_address, diff --git a/apps/emqx/test/emqx_trace_SUITE.erl b/apps/emqx/test/emqx_trace_SUITE.erl index 957a9850d..b5de0b979 100644 --- a/apps/emqx/test/emqx_trace_SUITE.erl +++ b/apps/emqx/test/emqx_trace_SUITE.erl @@ -97,7 +97,7 @@ t_base_create_delete(_Config) -> end_at => Now + 30 * 60, payload_encode => text, extra => #{}, - formatter => plain + formatter => text } ], ?assertEqual(ExpectFormat, emqx_trace:format([TraceRec])), diff --git a/apps/emqx_management/src/emqx_mgmt_api_trace.erl b/apps/emqx_management/src/emqx_mgmt_api_trace.erl index f0efe9f85..e5ccde4f2 100644 --- a/apps/emqx_management/src/emqx_mgmt_api_trace.erl +++ b/apps/emqx_management/src/emqx_mgmt_api_trace.erl @@ -317,10 +317,10 @@ fields(trace) -> )}, {formatter, hoconsc:mk( - hoconsc:union([plain, json]), + hoconsc:union([text, json]), #{ description => ?DESC(trace_log_formatter), - example => plain, + example => text, required => false } )} diff --git a/apps/emqx_management/src/emqx_mgmt_cli.erl b/apps/emqx_management/src/emqx_mgmt_cli.erl index 01c121e5b..32a24d9bd 100644 --- a/apps/emqx_management/src/emqx_mgmt_cli.erl +++ b/apps/emqx_management/src/emqx_mgmt_cli.erl @@ -507,14 +507,14 @@ trace(["list"]) -> ) end; trace(["stop", Operation, Filter0]) -> - case trace_type(Operation, Filter0, plain) of + case trace_type(Operation, Filter0, text) of {ok, Type, Filter, _} -> trace_off(Type, Filter); error -> trace([]) end; trace(["start", Operation, ClientId, LogFile]) -> trace(["start", Operation, ClientId, LogFile, "all"]); trace(["start", Operation, Filter0, LogFile, Level]) -> - trace(["start", Operation, Filter0, LogFile, Level, plain]); + trace(["start", Operation, Filter0, LogFile, Level, text]); trace(["start", Operation, Filter0, LogFile, Level, Formatter0]) -> case trace_type(Operation, Filter0, Formatter0) of {ok, Type, Filter, Formatter} -> @@ -533,16 +533,16 @@ trace(_) -> emqx_ctl:usage([ {"trace list", "List all traces started on local node"}, {"trace start client [] []", - "Traces for a client on local node (Formatter=plain|json)"}, + "Traces for a client on local node (Formatter=text|json)"}, {"trace stop client ", "Stop tracing for a client on local node"}, {"trace start topic [] []", - "Traces for a topic on local node (Formatter=plain|json)"}, + "Traces for a topic on local node (Formatter=text|json)"}, {"trace stop topic ", "Stop tracing for a topic on local node"}, {"trace start ip_address [] []", - "Traces for a client ip on local node (Formatter=plain|json)"}, + "Traces for a client ip on local node (Formatter=text|json)"}, {"trace stop ip_address ", "Stop tracing for a client ip on local node"}, {"trace start ruleid [] []", - "Traces for a rule ID on local node (Formatter=plain|json)"}, + "Traces for a rule ID on local node (Formatter=text|json)"}, {"trace stop ruleid ", "Stop tracing for a rule ID on local node"} ]). @@ -599,7 +599,7 @@ traces(["delete", Name]) -> traces(["start", Name, Operation, Filter]) -> traces(["start", Name, Operation, Filter, ?DEFAULT_TRACE_DURATION]); traces(["start", Name, Operation, Filter, DurationS]) -> - traces(["start", Name, Operation, Filter, DurationS, plain]); + traces(["start", Name, Operation, Filter, DurationS, text]); traces(["start", Name, Operation, Filter0, DurationS, Formatter0]) -> case trace_type(Operation, Filter0, Formatter0) of {ok, Type, Filter, Formatter} -> trace_cluster_on(Name, Type, Filter, DurationS, Formatter); @@ -609,17 +609,17 @@ traces(_) -> emqx_ctl:usage([ {"traces list", "List all cluster traces started"}, {"traces start client [] []", - "Traces for a client in cluster (Formatter=plain|json)"}, + "Traces for a client in cluster (Formatter=text|json)"}, {"traces start topic [] []", - "Traces for a topic in cluster (Formatter=plain|json)"}, + "Traces for a topic in cluster (Formatter=text|json)"}, {"traces start ruleid [] []", - "Traces for a rule ID in cluster (Formatter=plain|json)"}, + "Traces for a rule ID in cluster (Formatter=text|json)"}, {"traces start ip_address [] []", "Traces for a client IP in cluster\n" "Trace will start immediately on all nodes, including the core and replicant,\n" "and will end after seconds. The default value for is " ?DEFAULT_TRACE_DURATION - " seconds. (Formatter=plain|json)"}, + " seconds. (Formatter=text|json)"}, {"traces stop ", "Stop trace in cluster"}, {"traces delete ", "Delete trace in cluster"} ]). @@ -657,7 +657,7 @@ trace_cluster_off(Name) -> {error, Error} -> emqx_ctl:print("[error] Stop cluster_trace ~s: ~p~n", [Name, Error]) end. -trace_type(Op, Match, "plain") -> trace_type(Op, Match, plain); +trace_type(Op, Match, "text") -> trace_type(Op, Match, text); trace_type(Op, Match, "json") -> trace_type(Op, Match, json); trace_type("client", ClientId, Formatter) -> {ok, clientid, bin(ClientId), Formatter}; trace_type("topic", Topic, Formatter) -> {ok, topic, bin(Topic), Formatter}; diff --git a/rel/i18n/emqx_mgmt_api_trace.hocon b/rel/i18n/emqx_mgmt_api_trace.hocon index 3b12caf97..13d814c21 100644 --- a/rel/i18n/emqx_mgmt_api_trace.hocon +++ b/rel/i18n/emqx_mgmt_api_trace.hocon @@ -116,7 +116,7 @@ current_trace_offset.label: """Offset from the current trace position.""" trace_log_formatter.desc: -"""The formatter that will be used to format the trace log entries. Set this to plain to format the log entries as plain text (default). Set it to json to format each log entry as a JSON object.""" +"""The formatter that will be used to format the trace log entries. Set this to text to format the log entries as plain text (default). Set it to json to format each log entry as a JSON object.""" trace_log_formatter.label: """Trace Log Entry Formatter""" From aa388adba9bc8d9712d437ba1cb97cf2e19926b8 Mon Sep 17 00:00:00 2001 From: Kjell Winblad Date: Thu, 18 Apr 2024 11:17:45 +0200 Subject: [PATCH 196/234] fix(json trace format): format client_ids and rule_ids as lists --- .../emqx_trace/emqx_trace_json_formatter.erl | 35 +++++++++++++++++-- .../test/emqx_mgmt_api_trace_SUITE.erl | 19 ++++++++++ 2 files changed, 52 insertions(+), 2 deletions(-) diff --git a/apps/emqx/src/emqx_trace/emqx_trace_json_formatter.erl b/apps/emqx/src/emqx_trace/emqx_trace_json_formatter.erl index 205ccd5be..6fb655c0d 100644 --- a/apps/emqx/src/emqx_trace/emqx_trace_json_formatter.erl +++ b/apps/emqx/src/emqx_trace/emqx_trace_json_formatter.erl @@ -53,7 +53,7 @@ prepare_key_value(payload = K, V, PEncode) -> try format_payload(V, PEncode) catch - _:_:_ -> + _:_ -> V end, {K, NewV}; @@ -62,7 +62,25 @@ prepare_key_value(packet = K, V, PEncode) -> try format_packet(V, PEncode) catch - _:_:_ -> + _:_ -> + V + end, + {K, NewV}; +prepare_key_value(rule_ids = K, V, _PEncode) -> + NewV = + try + format_map_set_to_list(V) + catch + _:_ -> + V + end, + {K, NewV}; +prepare_key_value(client_ids = K, V, _PEncode) -> + NewV = + try + format_map_set_to_list(V) + catch + _:_ -> V end, {K, NewV}; @@ -83,3 +101,16 @@ format_payload(Payload, text) when ?MAX_PAYLOAD_FORMAT_LIMIT(Payload) -> format_payload(Payload, hex) when ?MAX_PAYLOAD_FORMAT_LIMIT(Payload) -> binary:encode_hex(Payload); format_payload(<> = Payload, Type) -> emqx_packet:format_truncated_payload(Part, byte_size(Payload), Type). + +format_map_set_to_list(Map) -> + Items = [ + begin + %% Assert that it is really a map set + true = V, + %% Assert that the keys have the expected type + true = is_binary(K), + K + end + || {K, V} <- maps:to_list(Map) + ], + lists:sort(Items). diff --git a/apps/emqx_management/test/emqx_mgmt_api_trace_SUITE.erl b/apps/emqx_management/test/emqx_mgmt_api_trace_SUITE.erl index de74316f4..22ee44b95 100644 --- a/apps/emqx_management/test/emqx_mgmt_api_trace_SUITE.erl +++ b/apps/emqx_management/test/emqx_mgmt_api_trace_SUITE.erl @@ -254,6 +254,15 @@ t_http_test_json_formatter(_Config) -> {<<"key2">>, <<"value2">>} ] }), + %% We do special formatting for client_ids and rule_ids + ?TRACE("CUSTOM", "my_log_msg", #{ + topic => Topic, + client_ids => maps:from_keys([<<"a">>, <<"b">>, <<"c">>], true) + }), + ?TRACE("CUSTOM", "my_log_msg", #{ + topic => Topic, + rule_ids => maps:from_keys([<<"a">>, <<"b">>, <<"c">>], true) + }), ok = emqx_trace_handler_SUITE:filesync(Name, topic), {ok, _Detail2} = request_api(get, api_path("trace/" ++ binary_to_list(Name) ++ "/log_detail")), {ok, Bin} = request_api(get, api_path("trace/" ++ binary_to_list(Name) ++ "/download")), @@ -301,6 +310,16 @@ t_http_test_json_formatter(_Config) -> <<"key2">> := <<"value2">> } } + }, + #{ + <<"meta">> := #{ + <<"client_ids">> := [<<"a">>, <<"b">>, <<"c">>] + } + }, + #{ + <<"meta">> := #{ + <<"rule_ids">> := [<<"a">>, <<"b">>, <<"c">>] + } } | _ ], From 3232ab5ea32429d2a80b1701a038d91d22d2fad4 Mon Sep 17 00:00:00 2001 From: Kjell Winblad Date: Thu, 18 Apr 2024 11:28:50 +0200 Subject: [PATCH 197/234] fix(rule tracing): unset trace meta data in try-after-end We wrap the reset of the process trace meta data in the after clause of a try-after-end expression to be sure we never get any lingering incorrect meta data. --- .../src/emqx_resource_buffer_worker.erl | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/apps/emqx_resource/src/emqx_resource_buffer_worker.erl b/apps/emqx_resource/src/emqx_resource_buffer_worker.erl index bc1aea734..e4935674f 100644 --- a/apps/emqx_resource/src/emqx_resource_buffer_worker.erl +++ b/apps/emqx_resource/src/emqx_resource_buffer_worker.erl @@ -1125,12 +1125,13 @@ call_query(QM, Id, Index, Ref, Query, QueryOpts) -> {ok, _Group, #{status := ?status_connecting, error := unhealthy_target}} -> {error, {unrecoverable_error, unhealthy_target}}; {ok, _Group, Resource} -> - set_rule_id_trace_meta_data(Query), - QueryResult = do_call_query(QM, Id, Index, Ref, Query, QueryOpts, Resource), - %% do_call_query does not throw an exception as the call to the - %% resource is wrapped in a try catch expression so we will always - %% unset the trace meta data - unset_rule_id_trace_meta_data(), + QueryResult = + try + set_rule_id_trace_meta_data(Query), + do_call_query(QM, Id, Index, Ref, Query, QueryOpts, Resource) + after + unset_rule_id_trace_meta_data() + end, QueryResult; {error, not_found} -> ?RESOURCE_ERROR(not_found, "resource not found") From ca56e7e8d74ea540a8e832daa99ccfa9f9114bf3 Mon Sep 17 00:00:00 2001 From: zmstone Date: Thu, 18 Apr 2024 13:04:36 +0200 Subject: [PATCH 198/234] fix(kafka): headers are template fields --- apps/emqx_bridge_kafka/src/emqx_bridge_kafka.erl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/apps/emqx_bridge_kafka/src/emqx_bridge_kafka.erl b/apps/emqx_bridge_kafka/src/emqx_bridge_kafka.erl index cf96ce6cb..83bc33266 100644 --- a/apps/emqx_bridge_kafka/src/emqx_bridge_kafka.erl +++ b/apps/emqx_bridge_kafka/src/emqx_bridge_kafka.erl @@ -389,7 +389,7 @@ fields(producer_kafka_opts) -> )}, {kafka_headers, mk( - binary(), + emqx_schema:template(), #{ required => false, validator => fun kafka_header_validator/1, @@ -462,12 +462,12 @@ fields(producer_kafka_ext_headers) -> [ {kafka_ext_header_key, mk( - binary(), + emqx_schema:template(), #{required => true, desc => ?DESC(producer_kafka_ext_header_key)} )}, {kafka_ext_header_value, mk( - binary(), + emqx_schema:template(), #{ required => true, validator => fun kafka_ext_header_value_validator/1, From ede4eeae9fdbdd0d13df8841154fb1585f1fba11 Mon Sep 17 00:00:00 2001 From: zmstone Date: Thu, 18 Apr 2024 13:04:56 +0200 Subject: [PATCH 199/234] fix(http_bridge): path is template field --- apps/emqx_bridge_http/src/emqx_bridge_http_connector.erl | 4 ++-- apps/emqx_bridge_http/src/emqx_bridge_http_schema.erl | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/apps/emqx_bridge_http/src/emqx_bridge_http_connector.erl b/apps/emqx_bridge_http/src/emqx_bridge_http_connector.erl index ae1e727ca..4eef6968b 100644 --- a/apps/emqx_bridge_http/src/emqx_bridge_http_connector.erl +++ b/apps/emqx_bridge_http/src/emqx_bridge_http_connector.erl @@ -128,8 +128,8 @@ fields("request") -> desc => ?DESC("method"), validator => fun ?MODULE:validate_method/1 })}, - {path, hoconsc:mk(binary(), #{required => false, desc => ?DESC("path")})}, - {body, hoconsc:mk(binary(), #{required => false, desc => ?DESC("body")})}, + {path, hoconsc:mk(emqx_schema:template(), #{required => false, desc => ?DESC("path")})}, + {body, hoconsc:mk(emqx_schema:template(), #{required => false, desc => ?DESC("body")})}, {headers, hoconsc:mk(map(), #{required => false, desc => ?DESC("headers")})}, {max_retries, sc( diff --git a/apps/emqx_bridge_http/src/emqx_bridge_http_schema.erl b/apps/emqx_bridge_http/src/emqx_bridge_http_schema.erl index ef150adfc..8b33b1523 100644 --- a/apps/emqx_bridge_http/src/emqx_bridge_http_schema.erl +++ b/apps/emqx_bridge_http/src/emqx_bridge_http_schema.erl @@ -114,7 +114,7 @@ fields("parameters_opts") -> [ {path, mk( - binary(), + emqx_schema:template(), #{ desc => ?DESC("config_path"), required => false From 5b38d592f02cdcbf67744e0577aed0ab08823a13 Mon Sep 17 00:00:00 2001 From: zmstone Date: Thu, 18 Apr 2024 13:16:29 +0200 Subject: [PATCH 200/234] feat(http): add `is_template` as HTTP headers field property is_template was designed to be type property. however for HTTP headers, it's a map() type, instead of creating a new type for it, it's easier to just add it as a field property. --- apps/emqx_bridge_http/src/emqx_bridge_http_connector.erl | 3 ++- apps/emqx_bridge_http/src/emqx_bridge_http_schema.erl | 3 ++- apps/emqx_dashboard/src/emqx_dashboard_swagger.erl | 6 +++++- 3 files changed, 9 insertions(+), 3 deletions(-) diff --git a/apps/emqx_bridge_http/src/emqx_bridge_http_connector.erl b/apps/emqx_bridge_http/src/emqx_bridge_http_connector.erl index 4eef6968b..e8143d87f 100644 --- a/apps/emqx_bridge_http/src/emqx_bridge_http_connector.erl +++ b/apps/emqx_bridge_http/src/emqx_bridge_http_connector.erl @@ -130,7 +130,8 @@ fields("request") -> })}, {path, hoconsc:mk(emqx_schema:template(), #{required => false, desc => ?DESC("path")})}, {body, hoconsc:mk(emqx_schema:template(), #{required => false, desc => ?DESC("body")})}, - {headers, hoconsc:mk(map(), #{required => false, desc => ?DESC("headers")})}, + {headers, + hoconsc:mk(map(), #{required => false, desc => ?DESC("headers"), is_template => true})}, {max_retries, sc( non_neg_integer(), diff --git a/apps/emqx_bridge_http/src/emqx_bridge_http_schema.erl b/apps/emqx_bridge_http/src/emqx_bridge_http_schema.erl index 8b33b1523..cadbcf0d2 100644 --- a/apps/emqx_bridge_http/src/emqx_bridge_http_schema.erl +++ b/apps/emqx_bridge_http/src/emqx_bridge_http_schema.erl @@ -270,7 +270,8 @@ headers_field() -> <<"content-type">> => <<"application/json">>, <<"keep-alive">> => <<"timeout=5">> }, - desc => ?DESC("config_headers") + desc => ?DESC("config_headers"), + is_template => true } )}. diff --git a/apps/emqx_dashboard/src/emqx_dashboard_swagger.erl b/apps/emqx_dashboard/src/emqx_dashboard_swagger.erl index 7a5ea1939..4ada5994c 100644 --- a/apps/emqx_dashboard/src/emqx_dashboard_swagger.erl +++ b/apps/emqx_dashboard/src/emqx_dashboard_swagger.erl @@ -57,7 +57,11 @@ allowEmptyValue, deprecated, minimum, - maximum + maximum, + %% is_template is a type property, + %% but some exceptions are made for them to be field property + %% for example, HTTP headers (which is a map type) + is_template ]). -define(INIT_SCHEMA, #{ From 8f1486f6d398e5d9b7e549aacf6f78320e722b86 Mon Sep 17 00:00:00 2001 From: Kjell Winblad Date: Thu, 18 Apr 2024 13:50:22 +0200 Subject: [PATCH 201/234] fix: clean up trace messages to make it easier to interpret This commit removes some redundant trace messages and renames some to make it easier to interpret what is happening for the user. --- .../test/emqx_bridge_http_SUITE.erl | 3 +-- apps/emqx_rule_engine/src/emqx_rule_runtime.erl | 15 ++++++--------- .../emqx_rule_engine_api_rule_apply_SUITE.erl | 3 +-- 3 files changed, 8 insertions(+), 13 deletions(-) diff --git a/apps/emqx_bridge_http/test/emqx_bridge_http_SUITE.erl b/apps/emqx_bridge_http/test/emqx_bridge_http_SUITE.erl index 73f6359ab..9d215d815 100644 --- a/apps/emqx_bridge_http/test/emqx_bridge_http_SUITE.erl +++ b/apps/emqx_bridge_http/test/emqx_bridge_http_SUITE.erl @@ -418,9 +418,8 @@ t_send_get_trace_messages(Config) -> begin Bin = read_rule_trace_file(TraceName, Now), ?assertNotEqual(nomatch, binary:match(Bin, [<<"rule_activated">>])), - ?assertNotEqual(nomatch, binary:match(Bin, [<<"SELECT_yielded_result">>])), + ?assertNotEqual(nomatch, binary:match(Bin, [<<"SQL_yielded_result">>])), ?assertNotEqual(nomatch, binary:match(Bin, [<<"bridge_action">>])), - ?assertNotEqual(nomatch, binary:match(Bin, [<<"action_activated">>])), ?assertNotEqual(nomatch, binary:match(Bin, [<<"action_template_rendered">>])), ?assertNotEqual(nomatch, binary:match(Bin, [<<"QUERY_ASYNC">>])) end diff --git a/apps/emqx_rule_engine/src/emqx_rule_runtime.erl b/apps/emqx_rule_engine/src/emqx_rule_runtime.erl index 3dfc5f6c8..3872fb973 100644 --- a/apps/emqx_rule_engine/src/emqx_rule_runtime.erl +++ b/apps/emqx_rule_engine/src/emqx_rule_runtime.erl @@ -132,7 +132,7 @@ apply_rule(Rule = #{id := RuleID}, Columns, Envs) -> reason => Error, stacktrace => StkTrace }, - warning + error ), {error, {Error, StkTrace}} after @@ -176,18 +176,18 @@ do_apply_rule( {ok, ColumnsAndSelected, FinalCollection} -> case FinalCollection of [] -> - trace_rule_sql("FOREACH_yielded_no_result"), + trace_rule_sql("SQL_yielded_no_result"), ok = emqx_metrics_worker:inc(rule_metrics, RuleId, 'failed.no_result'); _ -> trace_rule_sql( - "FOREACH_yielded_result", #{result => FinalCollection}, debug + "SQL_yielded_result", #{result => FinalCollection}, debug ), ok = emqx_metrics_worker:inc(rule_metrics, RuleId, 'passed') end, NewEnvs = maps:merge(ColumnsAndSelected, Envs), {ok, [handle_action_list(RuleId, Actions, Coll, NewEnvs) || Coll <- FinalCollection]}; false -> - trace_rule_sql("FOREACH_yielded_no_result_no_match"), + trace_rule_sql("SQL_yielded_no_result"), ok = emqx_metrics_worker:inc(rule_metrics, RuleId, 'failed.no_result'), {error, nomatch} end; @@ -204,11 +204,11 @@ do_apply_rule( ) -> case evaluate_select(Fields, Columns, Conditions) of {ok, Selected} -> - trace_rule_sql("SELECT_yielded_result", #{result => Selected}, debug), + trace_rule_sql("SQL_yielded_result", #{result => Selected}, debug), ok = emqx_metrics_worker:inc(rule_metrics, RuleId, 'passed'), {ok, handle_action_list(RuleId, Actions, Selected, maps:merge(Columns, Envs))}; false -> - trace_rule_sql("SELECT_yielded_no_result_no_match"), + trace_rule_sql("SQL_yielded_no_result"), ok = emqx_metrics_worker:inc(rule_metrics, RuleId, 'failed.no_result'), {error, nomatch} end. @@ -392,10 +392,8 @@ handle_action_list(RuleId, Actions, Selected, Envs) -> handle_action(RuleId, ActId, Selected, Envs) -> ok = emqx_metrics_worker:inc(rule_metrics, RuleId, 'actions.total'), - trace_action(ActId, "activating_action"), try Result = do_handle_action(RuleId, ActId, Selected, Envs), - trace_action(ActId, "action_activated", #{result => Result}), Result catch throw:out_of_service -> @@ -467,7 +465,6 @@ do_handle_action(RuleId, #{mod := Mod, func := Func} = Action, Selected, Envs) - Result = Mod:Func(Selected, Envs, Args), {_, IncCtx} = do_handle_action_get_trace_inc_metrics_context(RuleId, Action), inc_action_metrics(IncCtx, Result), - trace_action(Action, "call_action_function_result", #{result => Result}, debug), Result. do_handle_action_get_trace_inc_metrics_context(RuleID, Action) -> diff --git a/apps/emqx_rule_engine/test/emqx_rule_engine_api_rule_apply_SUITE.erl b/apps/emqx_rule_engine/test/emqx_rule_engine_api_rule_apply_SUITE.erl index c875617ce..837fc3274 100644 --- a/apps/emqx_rule_engine/test/emqx_rule_engine_api_rule_apply_SUITE.erl +++ b/apps/emqx_rule_engine/test/emqx_rule_engine_api_rule_apply_SUITE.erl @@ -128,9 +128,8 @@ basic_apply_rule_test_helper(Config, TraceType, StopAfterRender) -> Bin = read_rule_trace_file(TraceName, TraceType, Now), io:format("THELOG:~n~s", [Bin]), ?assertNotEqual(nomatch, binary:match(Bin, [<<"rule_activated">>])), - ?assertNotEqual(nomatch, binary:match(Bin, [<<"SELECT_yielded_result">>])), + ?assertNotEqual(nomatch, binary:match(Bin, [<<"SQL_yielded_result">>])), ?assertNotEqual(nomatch, binary:match(Bin, [<<"bridge_action">>])), - ?assertNotEqual(nomatch, binary:match(Bin, [<<"action_activated">>])), ?assertNotEqual(nomatch, binary:match(Bin, [<<"action_template_rendered">>])), ?assertNotEqual(nomatch, binary:match(Bin, [<<"QUERY_ASYNC">>])) end From f5b043972443532688649be5ca429374f5626165 Mon Sep 17 00:00:00 2001 From: Kjell Winblad Date: Thu, 18 Apr 2024 14:28:28 +0200 Subject: [PATCH 202/234] fix(emqx_rule_engine_api_rule_apply_SUITE): flaky test case --- apps/emqx/test/emqx_trace_SUITE.erl | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/apps/emqx/test/emqx_trace_SUITE.erl b/apps/emqx/test/emqx_trace_SUITE.erl index b5de0b979..4de9d09c0 100644 --- a/apps/emqx/test/emqx_trace_SUITE.erl +++ b/apps/emqx/test/emqx_trace_SUITE.erl @@ -512,4 +512,13 @@ build_old_trace_data() -> reload() -> catch ok = gen_server:stop(emqx_trace), - {ok, _Pid} = emqx_trace:start_link(). + case emqx_trace:start_link() of + {ok, _Pid} = Res -> + Res; + NotOKRes -> + ct:pal( + "emqx_trace:start_link() gave result: ~p\n" + "(perhaps it is already started)", + [NotOKRes] + ) + end. From 7be18730e876ea4e13f4094d18d1c34b7f80231f Mon Sep 17 00:00:00 2001 From: Kjell Winblad Date: Thu, 18 Apr 2024 15:16:18 +0200 Subject: [PATCH 203/234] test(emqx_rule_engine_api_rule_apply_SUITE): remove unnecessary code --- .../test/emqx_rule_engine_api_rule_apply_SUITE.erl | 2 -- 1 file changed, 2 deletions(-) diff --git a/apps/emqx_rule_engine/test/emqx_rule_engine_api_rule_apply_SUITE.erl b/apps/emqx_rule_engine/test/emqx_rule_engine_api_rule_apply_SUITE.erl index 837fc3274..e7a907927 100644 --- a/apps/emqx_rule_engine/test/emqx_rule_engine_api_rule_apply_SUITE.erl +++ b/apps/emqx_rule_engine/test/emqx_rule_engine_api_rule_apply_SUITE.erl @@ -172,8 +172,6 @@ create_trace(TraceName, TraceType, TraceValue) -> start_at => Start, end_at => End }, - emqx_trace_SUITE:reload(), - ok = emqx_trace:clear(), {ok, _} = emqx_trace:create(Trace). t_apply_rule_test_batch_separation_stop_after_render(_Config) -> From 285bfa936776ff85e2fe710b674cfc0ae16d5e9e Mon Sep 17 00:00:00 2001 From: Kjell Winblad Date: Thu, 18 Apr 2024 17:00:47 +0200 Subject: [PATCH 204/234] fix: improve rendering of action_template_rendered trace --- apps/emqx/src/emqx_trace/emqx_trace.erl | 10 +++++-- .../emqx_trace/emqx_trace_json_formatter.erl | 14 ++++++++++ .../src/emqx_bridge_http_connector.erl | 27 ++++++++++--------- .../test/emqx_mgmt_api_trace_SUITE.erl | 14 ++++++++++ .../emqx_rule_engine_api_rule_apply_SUITE.erl | 3 ++- 5 files changed, 52 insertions(+), 16 deletions(-) diff --git a/apps/emqx/src/emqx_trace/emqx_trace.erl b/apps/emqx/src/emqx_trace/emqx_trace.erl index 6a255806a..fc64b5073 100644 --- a/apps/emqx/src/emqx_trace/emqx_trace.erl +++ b/apps/emqx/src/emqx_trace/emqx_trace.erl @@ -88,8 +88,14 @@ unsubscribe(Topic, SubOpts) -> ?TRACE("UNSUBSCRIBE", "unsubscribe", #{topic => Topic, sub_opts => SubOpts}). rendered_action_template(ActionID, RenderResult) -> - Msg = lists:flatten(io_lib:format("action_template_rendered(~ts)", [ActionID])), - TraceResult = ?TRACE("QUERY_RENDER", Msg, RenderResult), + TraceResult = ?TRACE( + "QUERY_RENDER", + "action_template_rendered", + #{ + result => RenderResult, + action_id => ActionID + } + ), case logger:get_process_metadata() of #{stop_action_after_render := true} -> %% We throw an unrecoverable error to stop action before the diff --git a/apps/emqx/src/emqx_trace/emqx_trace_json_formatter.erl b/apps/emqx/src/emqx_trace/emqx_trace_json_formatter.erl index 6fb655c0d..35b09b9b0 100644 --- a/apps/emqx/src/emqx_trace/emqx_trace_json_formatter.erl +++ b/apps/emqx/src/emqx_trace/emqx_trace_json_formatter.erl @@ -84,6 +84,13 @@ prepare_key_value(client_ids = K, V, _PEncode) -> V end, {K, NewV}; +prepare_key_value(action_id = K, V, _PEncode) -> + try + {action_info, format_action_info(V)} + catch + _:_ -> + {K, V} + end; prepare_key_value(K, V, PEncode) when is_map(V) -> {K, prepare_log_map(V, PEncode)}; prepare_key_value(K, V, _PEncode) -> @@ -114,3 +121,10 @@ format_map_set_to_list(Map) -> || {K, V} <- maps:to_list(Map) ], lists:sort(Items). + +format_action_info(V) -> + [<<"action">>, Type, Name | _] = binary:split(V, <<":">>, [global]), + #{ + type => Type, + name => Name + }. diff --git a/apps/emqx_bridge_http/src/emqx_bridge_http_connector.erl b/apps/emqx_bridge_http/src/emqx_bridge_http_connector.erl index ec75922a7..db2e74510 100644 --- a/apps/emqx_bridge_http/src/emqx_bridge_http_connector.erl +++ b/apps/emqx_bridge_http/src/emqx_bridge_http_connector.erl @@ -315,7 +315,7 @@ on_query(InstId, {send_message, Msg}, State) -> ClientId = maps:get(clientid, Msg, undefined), on_query( InstId, - {ClientId, Method, {Path, Headers, Body}, Timeout, Retry}, + {undefined, ClientId, Method, {Path, Headers, Body}, Timeout, Retry}, State ) end; @@ -345,19 +345,19 @@ on_query( ClientId = clientid(Msg), on_query( InstId, - {ClientId, Method, {Path, Headers, Body}, Timeout, Retry}, + {ActionId, ClientId, Method, {Path, Headers, Body}, Timeout, Retry}, State ) end; on_query(InstId, {Method, Request}, State) -> %% TODO: Get retry from State - on_query(InstId, {undefined, Method, Request, 5000, _Retry = 2}, State); + on_query(InstId, {undefined, undefined, Method, Request, 5000, _Retry = 2}, State); on_query(InstId, {Method, Request, Timeout}, State) -> %% TODO: Get retry from State - on_query(InstId, {undefined, Method, Request, Timeout, _Retry = 2}, State); + on_query(InstId, {undefined, undefined, Method, Request, Timeout, _Retry = 2}, State); on_query( InstId, - {KeyOrNum, Method, Request, Timeout, Retry}, + {ActionId, KeyOrNum, Method, Request, Timeout, Retry}, #{base_path := BasePath} = State ) -> ?TRACE( @@ -367,11 +367,12 @@ on_query( request => redact_request(Request), note => ?READACT_REQUEST_NOTE, connector => InstId, + action_id => ActionId, state => redact(State) } ), NRequest = formalize_request(Method, BasePath, Request), - trace_rendered_action_template(InstId, Method, NRequest, Timeout), + trace_rendered_action_template(ActionId, Method, NRequest, Timeout), Worker = resolve_pool_worker(State, KeyOrNum), Result0 = ehttpc:request( Worker, @@ -428,7 +429,7 @@ on_query_async(InstId, {send_message, Msg}, ReplyFunAndArgs, State) -> ClientId = maps:get(clientid, Msg, undefined), on_query_async( InstId, - {ClientId, Method, {Path, Headers, Body}, Timeout}, + {undefined, ClientId, Method, {Path, Headers, Body}, Timeout}, ReplyFunAndArgs, State ) @@ -458,14 +459,14 @@ on_query_async( ClientId = clientid(Msg), on_query_async( InstId, - {ClientId, Method, {Path, Headers, Body}, Timeout}, + {ActionId, ClientId, Method, {Path, Headers, Body}, Timeout}, ReplyFunAndArgs, State ) end; on_query_async( InstId, - {KeyOrNum, Method, Request, Timeout}, + {ActionId, KeyOrNum, Method, Request, Timeout}, ReplyFunAndArgs, #{base_path := BasePath} = State ) -> @@ -481,7 +482,7 @@ on_query_async( } ), NRequest = formalize_request(Method, BasePath, Request), - trace_rendered_action_template(InstId, Method, NRequest, Timeout), + trace_rendered_action_template(ActionId, Method, NRequest, Timeout), MaxAttempts = maps:get(max_attempts, State, 3), Context = #{ attempt => 1, @@ -501,11 +502,11 @@ on_query_async( ), {ok, Worker}. -trace_rendered_action_template(InstId, Method, NRequest, Timeout) -> +trace_rendered_action_template(ActionId, Method, NRequest, Timeout) -> case NRequest of {Path, Headers} -> emqx_trace:rendered_action_template( - InstId, + ActionId, #{ path => Path, method => Method, @@ -515,7 +516,7 @@ trace_rendered_action_template(InstId, Method, NRequest, Timeout) -> ); {Path, Headers, Body} -> emqx_trace:rendered_action_template( - InstId, + ActionId, #{ path => Path, method => Method, diff --git a/apps/emqx_management/test/emqx_mgmt_api_trace_SUITE.erl b/apps/emqx_management/test/emqx_mgmt_api_trace_SUITE.erl index 22ee44b95..c5f5c475d 100644 --- a/apps/emqx_management/test/emqx_mgmt_api_trace_SUITE.erl +++ b/apps/emqx_management/test/emqx_mgmt_api_trace_SUITE.erl @@ -263,6 +263,12 @@ t_http_test_json_formatter(_Config) -> topic => Topic, rule_ids => maps:from_keys([<<"a">>, <<"b">>, <<"c">>], true) }), + %% action_id should be rendered as action_info + ?TRACE("CUSTOM", "my_log_msg", #{ + topic => Topic, + action_id => + <<"action:http:emqx_bridge_http_test_lib:connector:http:emqx_bridge_http_test_lib">> + }), ok = emqx_trace_handler_SUITE:filesync(Name, topic), {ok, _Detail2} = request_api(get, api_path("trace/" ++ binary_to_list(Name) ++ "/log_detail")), {ok, Bin} = request_api(get, api_path("trace/" ++ binary_to_list(Name) ++ "/download")), @@ -320,6 +326,14 @@ t_http_test_json_formatter(_Config) -> <<"meta">> := #{ <<"rule_ids">> := [<<"a">>, <<"b">>, <<"c">>] } + }, + #{ + <<"meta">> := #{ + <<"action_info">> := #{ + <<"type">> := <<"http">>, + <<"name">> := <<"emqx_bridge_http_test_lib">> + } + } } | _ ], diff --git a/apps/emqx_rule_engine/test/emqx_rule_engine_api_rule_apply_SUITE.erl b/apps/emqx_rule_engine/test/emqx_rule_engine_api_rule_apply_SUITE.erl index e7a907927..147e40d95 100644 --- a/apps/emqx_rule_engine/test/emqx_rule_engine_api_rule_apply_SUITE.erl +++ b/apps/emqx_rule_engine/test/emqx_rule_engine_api_rule_apply_SUITE.erl @@ -170,7 +170,8 @@ create_trace(TraceName, TraceType, TraceValue) -> type => TraceType, TraceType => TraceValue, start_at => Start, - end_at => End + end_at => End, + formatter => json }, {ok, _} = emqx_trace:create(Trace). From 09b414f36879660913ad3511e50875d36707966a Mon Sep 17 00:00:00 2001 From: Kjell Winblad Date: Thu, 18 Apr 2024 17:39:52 +0200 Subject: [PATCH 205/234] test: add necessary application to test suites --- .../test/emqx_rule_engine_api_rule_apply_SUITE.erl | 3 ++- .../test/emqx_rule_engine_api_rule_test_SUITE.erl | 4 ++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/apps/emqx_rule_engine/test/emqx_rule_engine_api_rule_apply_SUITE.erl b/apps/emqx_rule_engine/test/emqx_rule_engine_api_rule_apply_SUITE.erl index 147e40d95..b1e533d31 100644 --- a/apps/emqx_rule_engine/test/emqx_rule_engine_api_rule_apply_SUITE.erl +++ b/apps/emqx_rule_engine/test/emqx_rule_engine_api_rule_apply_SUITE.erl @@ -36,7 +36,8 @@ init_per_suite(Config) -> emqx_connector, emqx_bridge, emqx_bridge_http, - emqx_rule_engine + emqx_rule_engine, + emqx_modules ], %% I don't know why we need to stop the apps and then start them but if we %% don't do this and other suites run before this suite the test cases will diff --git a/apps/emqx_rule_engine/test/emqx_rule_engine_api_rule_test_SUITE.erl b/apps/emqx_rule_engine/test/emqx_rule_engine_api_rule_test_SUITE.erl index 7f74cc7d7..8b47669da 100644 --- a/apps/emqx_rule_engine/test/emqx_rule_engine_api_rule_test_SUITE.erl +++ b/apps/emqx_rule_engine/test/emqx_rule_engine_api_rule_test_SUITE.erl @@ -30,11 +30,11 @@ all() -> init_per_suite(Config) -> application:load(emqx_conf), ok = emqx_common_test_helpers:load_config(emqx_rule_engine_schema, ?CONF_DEFAULT), - ok = emqx_common_test_helpers:start_apps([emqx_conf, emqx_rule_engine]), + ok = emqx_common_test_helpers:start_apps([emqx_conf, emqx_rule_engine, emqx_modules]), Config. end_per_suite(_Config) -> - emqx_common_test_helpers:stop_apps([emqx_conf, emqx_rule_engine]), + emqx_common_test_helpers:stop_apps([emqx_conf, emqx_rule_engine, emqx_modules]), ok. t_ctx_pub(_) -> From 180130d684af17cdf87e097a48cd3f983b0f7d36 Mon Sep 17 00:00:00 2001 From: ieQu1 <99872536+ieQu1@users.noreply.github.com> Date: Tue, 16 Apr 2024 16:46:31 +0200 Subject: [PATCH 206/234] feat(sessds): List persistent subscriptions in the REST API --- .../src/emqx_mgmt_api_subscriptions.erl | 210 +++++++++++++++++- .../test/emqx_mgmt_api_subscription_SUITE.erl | 91 ++++++-- changes/ce/fix-12874.en.md | 7 + 3 files changed, 285 insertions(+), 23 deletions(-) create mode 100644 changes/ce/fix-12874.en.md diff --git a/apps/emqx_management/src/emqx_mgmt_api_subscriptions.erl b/apps/emqx_management/src/emqx_mgmt_api_subscriptions.erl index 9976bf881..b1a8fbce2 100644 --- a/apps/emqx_management/src/emqx_mgmt_api_subscriptions.erl +++ b/apps/emqx_management/src/emqx_mgmt_api_subscriptions.erl @@ -176,7 +176,8 @@ format(WhichNode, {{Topic, _Subscriber}, SubOpts}) -> #{ topic => emqx_topic:maybe_format_share(Topic), clientid => maps:get(subid, SubOpts, null), - node => WhichNode + node => WhichNode, + durable => false }, maps:with([qos, nl, rap, rh], SubOpts) ). @@ -196,7 +197,22 @@ check_match_topic(#{<<"match_topic">> := MatchTopic}) -> check_match_topic(_) -> ok. -do_subscriptions_query(QString) -> +do_subscriptions_query(QString0) -> + {IsDurable, QString} = maps:take( + <<"durable">>, maps:merge(#{<<"durable">> => undefined}, QString0) + ), + case emqx_persistent_message:is_persistence_enabled() andalso IsDurable of + false -> + do_subscriptions_query_mem(QString); + true -> + do_subscriptions_query_persistent(QString); + undefined -> + merge_queries( + QString, fun do_subscriptions_query_mem/1, fun do_subscriptions_query_persistent/1 + ) + end. + +do_subscriptions_query_mem(QString) -> Args = [?SUBOPTION, QString, ?SUBS_QSCHEMA, fun ?MODULE:qs2ms/2, fun ?MODULE:format/2], case maps:get(<<"node">>, QString, undefined) of undefined -> @@ -210,8 +226,196 @@ do_subscriptions_query(QString) -> end end. +do_subscriptions_query_persistent(#{<<"page">> := Page, <<"limit">> := Limit} = QString) -> + Count = emqx_persistent_session_ds_router:stats(n_routes), + %% TODO: filtering by client ID can be implemented more efficiently: + FilterTopic = maps:get(<<"topic">>, QString, '_'), + Stream0 = emqx_persistent_session_ds_router:stream(FilterTopic), + SubPred = fun(Sub) -> + compare_optional(<<"topic">>, QString, topic, Sub) andalso + compare_optional(<<"clientid">>, QString, clientid, Sub) andalso + compare_optional(<<"qos">>, QString, qos, Sub) andalso + compare_match_topic_optional(<<"match_topic">>, QString, topic, Sub) + end, + NDropped = (Page - 1) * Limit, + {_, Stream} = consume_n_matching( + fun persistent_route_to_subscription/1, SubPred, NDropped, Stream0 + ), + {Subscriptions, Stream1} = consume_n_matching( + fun persistent_route_to_subscription/1, SubPred, Limit, Stream + ), + HasNext = Stream1 =/= [], + Meta = + case maps:is_key(<<"match_topic">>, QString) orelse maps:is_key(<<"qos">>, QString) of + true -> + %% Fuzzy searches shouldn't return count: + #{ + limit => Limit, + page => Page, + hasnext => HasNext + }; + false -> + #{ + count => Count, + limit => Limit, + page => Page, + hasnext => HasNext + } + end, + + #{ + meta => Meta, + data => Subscriptions + }. + +compare_optional(QField, Query, SField, Subscription) -> + case Query of + #{QField := Expected} -> + maps:get(SField, Subscription) =:= Expected; + _ -> + true + end. + +compare_match_topic_optional(QField, Query, SField, Subscription) -> + case Query of + #{QField := TopicFilter} -> + Topic = maps:get(SField, Subscription), + emqx_topic:match(Topic, TopicFilter); + _ -> + true + end. + +%% @doc Drop elements from the stream until encountered N elements +%% matching the predicate function. +-spec consume_n_matching( + fun((T) -> Q), + fun((Q) -> boolean()), + non_neg_integer(), + emqx_utils_stream:stream(T) +) -> {[Q], emqx_utils_stream:stream(T) | empty}. +consume_n_matching(Map, Pred, N, S) -> + consume_n_matching(Map, Pred, N, S, []). + +consume_n_matching(_Map, _Pred, _N, [], Acc) -> + {lists:reverse(Acc), []}; +consume_n_matching(_Map, _Pred, 0, S, Acc) -> + {lists:reverse(Acc), S}; +consume_n_matching(Map, Pred, N, S0, Acc) -> + case emqx_utils_stream:next(S0) of + [] -> + consume_n_matching(Map, Pred, N, [], Acc); + [Elem | S] -> + Mapped = Map(Elem), + case Pred(Mapped) of + true -> consume_n_matching(Map, Pred, N - 1, S, [Mapped | Acc]); + false -> consume_n_matching(Map, Pred, N, S, Acc) + end + end. + +persistent_route_to_subscription(#route{topic = Topic, dest = SessionId}) -> + case emqx_persistent_session_ds:get_client_subscription(SessionId, Topic) of + #{subopts := SubOpts} -> + #{qos := Qos, nl := Nl, rh := Rh, rap := Rap} = SubOpts, + #{ + topic => Topic, + clientid => SessionId, + node => all, + + qos => Qos, + nl => Nl, + rh => Rh, + rap => Rap, + durable => true + }; + undefined -> + #{ + topic => Topic, + clientid => SessionId, + node => all, + durable => true + } + end. + +%% @private This function merges paginated results from two sources. +%% +%% Note: this implementation is far from ideal: `count' for the +%% queries may be missing, it may be larger than the actual number of +%% elements. This may lead to empty pages that can confuse the user. +%% +%% Not much can be done to mitigate that, though: since the count may +%% be incorrect, we cannot run simple math to determine when one +%% stream begins and another ends: it requires actual iteration. +%% +%% Ideally, the dashboard must be split between durable and mem +%% subscriptions, and this function should be removed for good. +merge_queries(QString0, Q1, Q2) -> + #{<<"limit">> := Limit, <<"page">> := Page} = QString0, + C1 = resp_count(QString0, Q1), + C2 = resp_count(QString0, Q2), + Meta = + case is_number(C1) andalso is_number(C2) of + true -> + #{ + count => C1 + C2, + limit => Limit, + page => Page + }; + false -> + #{ + limit => Limit, + page => Page + } + end, + case {C1, C2} of + {_, 0} -> + %% The second query is empty. Just return the result of Q1 as usual: + Q1(QString0); + {0, _} -> + %% The first query is empty. Just return the result of Q2 as usual: + Q2(QString0); + _ when is_number(C1) -> + %% Both queries are potentially non-empty, but we at least + %% have the page number for the first query. We try to + %% stich the pages together and thus respect the limit + %% (except for the page where the results switch from Q1 + %% to Q2). + + %% Page where data from the second query is estimated to + %% begin: + Q2Page = ceil(C1 / Limit), + case Page =< Q2Page of + true -> + #{data := Data, meta := #{hasnext := HN}} = Q1(QString0), + #{ + data => Data, + meta => Meta#{hasnext => HN orelse C2 > 0} + }; + false -> + QString = QString0#{<<"page">> => Page - Q2Page}, + #{data := Data, meta := #{hasnext := HN}} = Q2(QString), + #{data => Data, meta => Meta#{hasnext => HN}} + end; + _ -> + %% We don't know how many items is there in the first + %% query, and the second query is not empty (this includes + %% the case where `C2' is `undefined'). Best we can do is + %% to interleave the queries. This may produce less + %% results per page than `Limit'. + QString = QString0#{<<"limit">> => ceil(Limit / 2)}, + #{data := D1, meta := #{hasnext := HN1}} = Q1(QString), + #{data := D2, meta := #{hasnext := HN2}} = Q2(QString), + #{ + meta => Meta#{hasnext => HN1 or HN2}, + data => D1 ++ D2 + } + end. + +resp_count(Query, QFun) -> + #{meta := Meta} = QFun(Query#{<<"limit">> => 1, <<"page">> => 1}), + maps:get(count, Meta, undefined). + %%-------------------------------------------------------------------- -%% QueryString to MatchSpec +%% QueryString to MatchSpec (mem sessions) %%-------------------------------------------------------------------- -spec qs2ms(atom(), {list(), list()}) -> emqx_mgmt_api:match_spec_and_filter(). diff --git a/apps/emqx_management/test/emqx_mgmt_api_subscription_SUITE.erl b/apps/emqx_management/test/emqx_mgmt_api_subscription_SUITE.erl index 356ae97e4..435a837e3 100644 --- a/apps/emqx_management/test/emqx_mgmt_api_subscription_SUITE.erl +++ b/apps/emqx_management/test/emqx_mgmt_api_subscription_SUITE.erl @@ -36,17 +36,72 @@ -define(TOPIC_SORT, #{?TOPIC1 => 1, ?TOPIC2 => 2}). all() -> - emqx_common_test_helpers:all(?MODULE). + [ + {group, mem}, + {group, persistent} + ]. + +groups() -> + CommonTCs = emqx_common_test_helpers:all(?MODULE), + [ + {mem, CommonTCs}, + %% Shared subscriptions are currently not supported: + {persistent, CommonTCs -- [t_list_with_shared_sub, t_subscription_api]} + ]. init_per_suite(Config) -> - emqx_mgmt_api_test_util:init_suite(), + Apps = emqx_cth_suite:start( + [ + {emqx, + "session_persistence {\n" + " enable = true\n" + " renew_streams_interval = 10ms\n" + "}"}, + emqx_management, + emqx_mgmt_api_test_util:emqx_dashboard() + ], + #{work_dir => emqx_cth_suite:work_dir(Config)} + ), + [{apps, Apps} | Config]. + +end_per_suite(Config) -> + ok = emqx_cth_suite:stop(?config(apps, Config)). + +init_per_group(persistent, Config) -> + ClientConfig = #{ + username => ?USERNAME, + clientid => ?CLIENTID, + proto_ver => v5, + clean_start => true, + properties => #{'Session-Expiry-Interval' => 300} + }, + [{client_config, ClientConfig}, {durable, true} | Config]; +init_per_group(mem, Config) -> + ClientConfig = #{ + username => ?USERNAME, clientid => ?CLIENTID, proto_ver => v5, clean_start => true + }, + [{client_config, ClientConfig}, {durable, false} | Config]. + +end_per_group(_, Config) -> Config. -end_per_suite(_) -> - emqx_mgmt_api_test_util:end_suite(). +init_per_testcase(_TC, Config) -> + case ?config(client_config, Config) of + ClientConfig when is_map(ClientConfig) -> + {ok, Client} = emqtt:start_link(ClientConfig), + {ok, _} = emqtt:connect(Client), + [{client, Client} | Config]; + _ -> + Config + end. + +end_per_testcase(_TC, Config) -> + Client = proplists:get_value(client, Config), + emqtt:disconnect(Client). t_subscription_api(Config) -> Client = proplists:get_value(client, Config), + Durable = atom_to_list(?config(durable, Config)), {ok, _, _} = emqtt:subscribe( Client, [ {?TOPIC1, [{rh, ?TOPIC1RH}, {rap, ?TOPIC1RAP}, {nl, ?TOPIC1NL}, {qos, ?TOPIC1QOS}]} @@ -54,12 +109,13 @@ t_subscription_api(Config) -> ), {ok, _, _} = emqtt:subscribe(Client, ?TOPIC2), Path = emqx_mgmt_api_test_util:api_path(["subscriptions"]), + timer:sleep(100), {ok, Response} = emqx_mgmt_api_test_util:request_api(get, Path), Data = emqx_utils_json:decode(Response, [return_maps]), Meta = maps:get(<<"meta">>, Data), ?assertEqual(1, maps:get(<<"page">>, Meta)), ?assertEqual(emqx_mgmt:default_row_limit(), maps:get(<<"limit">>, Meta)), - ?assertEqual(2, maps:get(<<"count">>, Meta)), + ?assertEqual(2, maps:get(<<"count">>, Meta), Data), Subscriptions = maps:get(<<"data">>, Data), ?assertEqual(length(Subscriptions), 2), Sort = @@ -90,7 +146,8 @@ t_subscription_api(Config) -> {"node", atom_to_list(node())}, {"qos", "0"}, {"share_group", "test_group"}, - {"match_topic", "t/#"} + {"match_topic", "t/#"}, + {"durable", Durable} ], Headers = emqx_mgmt_api_test_util:auth_header_(), @@ -103,6 +160,7 @@ t_subscription_api(Config) -> t_subscription_fuzzy_search(Config) -> Client = proplists:get_value(client, Config), + Durable = atom_to_list(?config(durable, Config)), Topics = [ <<"t/foo">>, <<"t/foo/bar">>, @@ -116,7 +174,8 @@ t_subscription_fuzzy_search(Config) -> MatchQs = [ {"clientid", ?CLIENTID}, {"node", atom_to_list(node())}, - {"match_topic", "t/#"} + {"match_topic", "t/#"}, + {"durable", Durable} ], MatchData1 = #{<<"meta">> := MatchMeta1} = request_json(get, MatchQs, Headers), @@ -130,12 +189,13 @@ t_subscription_fuzzy_search(Config) -> LimitMatchQuery = [ {"clientid", ?CLIENTID}, {"match_topic", "+/+/+"}, - {"limit", "3"} + {"limit", "3"}, + {"durable", Durable} ], MatchData2 = #{<<"meta">> := MatchMeta2} = request_json(get, LimitMatchQuery, Headers), ?assertEqual(#{<<"page">> => 1, <<"limit">> => 3, <<"hasnext">> => true}, MatchMeta2), - ?assertEqual(3, length(maps:get(<<"data">>, MatchData2))), + ?assertEqual(3, length(maps:get(<<"data">>, MatchData2)), MatchData2), MatchData2P2 = #{<<"meta">> := MatchMeta2P2} = @@ -176,8 +236,8 @@ t_list_with_shared_sub(_Config) -> ok. -t_list_with_invalid_match_topic(_Config) -> - Client = proplists:get_value(client, _Config), +t_list_with_invalid_match_topic(Config) -> + Client = proplists:get_value(client, Config), RealTopic = <<"t/+">>, Topic = <<"$share/g1/", RealTopic/binary>>, @@ -212,12 +272,3 @@ request_json(Method, Query, Headers) when is_list(Query) -> path() -> emqx_mgmt_api_test_util:api_path(["subscriptions"]). - -init_per_testcase(_TC, Config) -> - {ok, Client} = emqtt:start_link(#{username => ?USERNAME, clientid => ?CLIENTID, proto_ver => v5}), - {ok, _} = emqtt:connect(Client), - [{client, Client} | Config]. - -end_per_testcase(_TC, Config) -> - Client = proplists:get_value(client, Config), - emqtt:disconnect(Client). diff --git a/changes/ce/fix-12874.en.md b/changes/ce/fix-12874.en.md new file mode 100644 index 000000000..1a5814b07 --- /dev/null +++ b/changes/ce/fix-12874.en.md @@ -0,0 +1,7 @@ +- Ensure consistency of the durable message replay when the subscriptions are modified before session reconnects + +- Persistent sessions save inflight packet IDs for the received QoS2 messages + +- Make behavior of the persistent sessions consistent with the non-persistent sessions in regard to overlapping subscriptions + +- List persistent subscriptions in the REST API From d12966db5b51c278d8ae5c6fe699c80230362283 Mon Sep 17 00:00:00 2001 From: ieQu1 <99872536+ieQu1@users.noreply.github.com> Date: Wed, 17 Apr 2024 17:00:06 +0200 Subject: [PATCH 207/234] test: Avoid dumping raw snabbkaffe traces to the console --- .../integration_test/emqx_persistent_session_ds_SUITE.erl | 7 +------ .../test/emqx_bridge_cassandra_SUITE.erl | 1 - .../test/emqx_bridge_gcp_pubsub_producer_SUITE.erl | 1 - apps/emqx_bridge_mysql/test/emqx_bridge_mysql_SUITE.erl | 1 - apps/emqx_bridge_pgsql/test/emqx_bridge_pgsql_SUITE.erl | 1 - apps/emqx_resource/test/emqx_resource_SUITE.erl | 2 -- 6 files changed, 1 insertion(+), 12 deletions(-) diff --git a/apps/emqx/integration_test/emqx_persistent_session_ds_SUITE.erl b/apps/emqx/integration_test/emqx_persistent_session_ds_SUITE.erl index ab062bff7..a5260f780 100644 --- a/apps/emqx/integration_test/emqx_persistent_session_ds_SUITE.erl +++ b/apps/emqx/integration_test/emqx_persistent_session_ds_SUITE.erl @@ -252,7 +252,6 @@ t_session_subscription_idempotency(Config) -> ok end, fun(Trace) -> - ct:pal("trace:\n ~p", [Trace]), Session = session_open(Node1, ClientId), ?assertMatch( #{SubTopicFilter := #{}}, @@ -326,7 +325,6 @@ t_session_unsubscription_idempotency(Config) -> ok end, fun(Trace) -> - ct:pal("trace:\n ~p", [Trace]), Session = session_open(Node1, ClientId), ?assertEqual( #{}, @@ -415,10 +413,7 @@ do_t_session_discard(Params) -> ok end, - fun(Trace) -> - ct:pal("trace:\n ~p", [Trace]), - ok - end + [] ), ok. diff --git a/apps/emqx_bridge_cassandra/test/emqx_bridge_cassandra_SUITE.erl b/apps/emqx_bridge_cassandra/test/emqx_bridge_cassandra_SUITE.erl index 868d0191e..449d1fa51 100644 --- a/apps/emqx_bridge_cassandra/test/emqx_bridge_cassandra_SUITE.erl +++ b/apps/emqx_bridge_cassandra/test/emqx_bridge_cassandra_SUITE.erl @@ -581,7 +581,6 @@ t_write_failure(Config) -> ) end), fun(Trace0) -> - ct:pal("trace: ~p", [Trace0]), Trace = ?of_kind( [buffer_worker_flush_nack, buffer_worker_retry_inflight_failed], Trace0 ), diff --git a/apps/emqx_bridge_gcp_pubsub/test/emqx_bridge_gcp_pubsub_producer_SUITE.erl b/apps/emqx_bridge_gcp_pubsub/test/emqx_bridge_gcp_pubsub_producer_SUITE.erl index 6666a3fd0..d96157f8c 100644 --- a/apps/emqx_bridge_gcp_pubsub/test/emqx_bridge_gcp_pubsub_producer_SUITE.erl +++ b/apps/emqx_bridge_gcp_pubsub/test/emqx_bridge_gcp_pubsub_producer_SUITE.erl @@ -1929,7 +1929,6 @@ t_bad_attributes(Config) -> ok end, fun(Trace) -> - ct:pal("trace:\n ~p", [Trace]), ?assertMatch( [ #{placeholder := [<<"payload">>, <<"ok">>], value := #{}}, diff --git a/apps/emqx_bridge_mysql/test/emqx_bridge_mysql_SUITE.erl b/apps/emqx_bridge_mysql/test/emqx_bridge_mysql_SUITE.erl index 8b719de9a..9ad2fbc5a 100644 --- a/apps/emqx_bridge_mysql/test/emqx_bridge_mysql_SUITE.erl +++ b/apps/emqx_bridge_mysql/test/emqx_bridge_mysql_SUITE.erl @@ -517,7 +517,6 @@ t_write_failure(Config) -> ok end, fun(Trace0) -> - ct:pal("trace: ~p", [Trace0]), Trace = ?of_kind(buffer_worker_flush_nack, Trace0), ?assertMatch([#{result := {error, _}} | _], Trace), [#{result := {error, Error}} | _] = Trace, diff --git a/apps/emqx_bridge_pgsql/test/emqx_bridge_pgsql_SUITE.erl b/apps/emqx_bridge_pgsql/test/emqx_bridge_pgsql_SUITE.erl index 3e9428c88..f4917f387 100644 --- a/apps/emqx_bridge_pgsql/test/emqx_bridge_pgsql_SUITE.erl +++ b/apps/emqx_bridge_pgsql/test/emqx_bridge_pgsql_SUITE.erl @@ -520,7 +520,6 @@ t_write_failure(Config) -> ) end), fun(Trace0) -> - ct:pal("trace: ~p", [Trace0]), Trace = ?of_kind(buffer_worker_flush_nack, Trace0), ?assertMatch([#{result := {error, _}} | _], Trace), [#{result := {error, Error}} | _] = Trace, diff --git a/apps/emqx_resource/test/emqx_resource_SUITE.erl b/apps/emqx_resource/test/emqx_resource_SUITE.erl index 99e85424d..171baf4ad 100644 --- a/apps/emqx_resource/test/emqx_resource_SUITE.erl +++ b/apps/emqx_resource/test/emqx_resource_SUITE.erl @@ -3346,7 +3346,6 @@ wait_n_events(NEvents, Timeout, EventName) -> end. assert_sync_retry_fail_then_succeed_inflight(Trace) -> - ct:pal(" ~p", [Trace]), ?assert( ?strict_causality( #{?snk_kind := buffer_worker_flush_nack, ref := _Ref}, @@ -3366,7 +3365,6 @@ assert_sync_retry_fail_then_succeed_inflight(Trace) -> ok. assert_async_retry_fail_then_succeed_inflight(Trace) -> - ct:pal(" ~p", [Trace]), ?assert( ?strict_causality( #{?snk_kind := handle_async_reply, action := nack}, From f1e6565ddd03b61b19dc21eaecf8d0b0971388b7 Mon Sep 17 00:00:00 2001 From: ieQu1 <99872536+ieQu1@users.noreply.github.com> Date: Fri, 19 Apr 2024 00:10:30 +0200 Subject: [PATCH 208/234] refactor(sessds): Move all subscription logic to the subs module --- apps/emqx/src/emqx_persistent_session_ds.erl | 69 ++++++------------- .../src/emqx_persistent_session_ds_subs.erl | 55 ++++++++++++--- 2 files changed, 66 insertions(+), 58 deletions(-) diff --git a/apps/emqx/src/emqx_persistent_session_ds.erl b/apps/emqx/src/emqx_persistent_session_ds.erl index b8c853431..20c382934 100644 --- a/apps/emqx/src/emqx_persistent_session_ds.erl +++ b/apps/emqx/src/emqx_persistent_session_ds.erl @@ -368,52 +368,31 @@ subscribe( subscribe( TopicFilter, SubOpts, - Session = #{id := ID} + Session ) -> - {UpdateRouter, S1} = emqx_persistent_session_ds_subs:on_subscribe( - TopicFilter, SubOpts, Session - ), - case UpdateRouter of - true -> - ok = emqx_persistent_session_ds_router:do_add_route(TopicFilter, ID); - false -> - ok - end, - S = emqx_persistent_session_ds_state:commit(S1), - UpdateRouter andalso - ?tp(persistent_session_ds_subscription_added, #{topic_filter => TopicFilter, session => ID}), - {ok, Session#{s => S}}. + case emqx_persistent_session_ds_subs:on_subscribe(TopicFilter, SubOpts, Session) of + {ok, S1} -> + S = emqx_persistent_session_ds_state:commit(S1), + {ok, Session#{s => S}}; + Error = {error, _} -> + Error + end. -spec unsubscribe(topic_filter(), session()) -> {ok, session(), emqx_types:subopts()} | {error, emqx_types:reason_code()}. unsubscribe( TopicFilter, - Session = #{id := ID, s := S0} + Session = #{id := SessionId, s := S0} ) -> - case emqx_persistent_session_ds_subs:lookup(TopicFilter, S0) of - undefined -> - {error, ?RC_NO_SUBSCRIPTION_EXISTED}; - Subscription = #{subopts := SubOpts} -> - S1 = do_unsubscribe(ID, TopicFilter, Subscription, S0), - S = emqx_persistent_session_ds_state:commit(S1), - {ok, Session#{s => S}, SubOpts} + case emqx_persistent_session_ds_subs:on_unsubscribe(SessionId, TopicFilter, S0) of + {ok, S1, #{id := SubId, subopts := SubOpts}} -> + S2 = emqx_persistent_session_ds_stream_scheduler:on_unsubscribe(SubId, S1), + S = emqx_persistent_session_ds_state:commit(S2), + {ok, Session#{s => S}, SubOpts}; + Error = {error, _} -> + Error end. --spec do_unsubscribe(id(), topic_filter(), subscription(), emqx_persistent_session_ds_state:t()) -> - emqx_persistent_session_ds_state:t(). -do_unsubscribe(SessionId, TopicFilter, #{id := SubId}, S0) -> - S1 = emqx_persistent_session_ds_subs:on_unsubscribe(TopicFilter, S0), - ?tp(persistent_session_ds_subscription_delete, #{ - session_id => SessionId, topic_filter => TopicFilter - }), - S = emqx_persistent_session_ds_stream_scheduler:on_unsubscribe(SubId, S1), - ?tp_span( - persistent_session_ds_subscription_route_delete, - #{session_id => SessionId, topic_filter => TopicFilter}, - ok = emqx_persistent_session_ds_router:do_delete_route(TopicFilter, SessionId) - ), - S. - -spec get_subscription(topic_filter(), session()) -> emqx_types:subopts() | undefined. get_subscription(#share{}, _) -> @@ -860,18 +839,12 @@ session_ensure_new( %% @doc Called when a client reconnects with `clean session=true' or %% during session GC -spec session_drop(id(), _Reason) -> ok. -session_drop(ID, Reason) -> - case emqx_persistent_session_ds_state:open(ID) of +session_drop(SessionId, Reason) -> + case emqx_persistent_session_ds_state:open(SessionId) of {ok, S0} -> - ?tp(debug, drop_persistent_session, #{client_id => ID, reason => Reason}), - _S = emqx_persistent_session_ds_subs:fold( - fun(TopicFilter, Subscription, S) -> - do_unsubscribe(ID, TopicFilter, Subscription, S) - end, - S0, - S0 - ), - emqx_persistent_session_ds_state:delete(ID); + ?tp(debug, drop_persistent_session, #{client_id => SessionId, reason => Reason}), + emqx_persistent_session_ds_subs:on_session_drop(SessionId, S0), + emqx_persistent_session_ds_state:delete(SessionId); undefined -> ok end. diff --git a/apps/emqx/src/emqx_persistent_session_ds_subs.erl b/apps/emqx/src/emqx_persistent_session_ds_subs.erl index 99ad9f9fc..8b4f70a69 100644 --- a/apps/emqx/src/emqx_persistent_session_ds_subs.erl +++ b/apps/emqx/src/emqx_persistent_session_ds_subs.erl @@ -26,7 +26,8 @@ %% API: -export([ on_subscribe/3, - on_unsubscribe/2, + on_unsubscribe/3, + on_session_drop/2, gc/1, lookup/2, to_map/1, @@ -41,6 +42,8 @@ -export_type([subscription_state_id/0, subscription/0, subscription_state/0]). -include("emqx_persistent_session_ds.hrl"). +-include("emqx_mqtt.hrl"). +-include_lib("snabbkaffe/include/trace.hrl"). %%================================================================================ %% Type declarations @@ -81,14 +84,15 @@ emqx_types:subopts(), emqx_persistent_session_ds:session() ) -> - {_UpdateRouter :: boolean(), emqx_persistent_session_ds_state:t()}. -on_subscribe(TopicFilter, SubOpts, #{s := S0, props := Props}) -> + {ok, emqx_persistent_session_ds_state:t()} | {error, ?RC_QUOTA_EXCEEDED}. +on_subscribe(TopicFilter, SubOpts, #{id := SessionId, s := S0, props := Props}) -> #{upgrade_qos := UpgradeQoS, max_subscriptions := MaxSubscriptions} = Props, case emqx_persistent_session_ds_state:get_subscription(TopicFilter, S0) of undefined -> %% This is a new subscription: case emqx_persistent_session_ds_state:n_subscriptions(S0) < MaxSubscriptions of true -> + ok = emqx_persistent_session_ds_router:do_add_route(TopicFilter, SessionId), {SubId, S1} = emqx_persistent_session_ds_state:new_id(S0), {SStateId, S2} = emqx_persistent_session_ds_state:new_id(S1), SState = #{ @@ -105,16 +109,19 @@ on_subscribe(TopicFilter, SubOpts, #{s := S0, props := Props}) -> S = emqx_persistent_session_ds_state:put_subscription( TopicFilter, Subscription, S3 ), - {true, S}; + ?tp(persistent_session_ds_subscription_added, #{ + topic_filter => TopicFilter, session => SessionId + }), + {ok, S}; false -> - {false, S0} + {error, ?RC_QUOTA_EXCEEDED} end; Sub0 = #{current_state := SStateId0, id := SubId} -> SState = #{parent_subscription => SubId, upgrade_qos => UpgradeQoS, subopts => SubOpts}, case emqx_persistent_session_ds_state:get_subscription_state(SStateId0, S0) of SState -> %% Client resubscribed with the same parameters: - {false, S0}; + {ok, S0}; _ -> %% Subsription parameters changed: {SStateId, S1} = emqx_persistent_session_ds_state:new_id(S0), @@ -123,18 +130,46 @@ on_subscribe(TopicFilter, SubOpts, #{s := S0, props := Props}) -> ), Sub = Sub0#{current_state => SStateId}, S = emqx_persistent_session_ds_state:put_subscription(TopicFilter, Sub, S2), - {false, S} + {ok, S} end end. %% @doc Process UNSUBSCRIBE -spec on_unsubscribe( + emqx_persistent_session_ds:id(), emqx_persistent_session_ds:topic_filter(), emqx_persistent_session_ds_state:t() ) -> - emqx_persistent_session_ds_state:t(). -on_unsubscribe(TopicFilter, S0) -> - emqx_persistent_session_ds_state:del_subscription(TopicFilter, S0). + {ok, emqx_persistent_session_ds_state:t(), emqx_persistent_session_ds:subscription()} + | {error, ?RC_NO_SUBSCRIPTION_EXISTED}. +on_unsubscribe(SessionId, TopicFilter, S0) -> + case lookup(TopicFilter, S0) of + undefined -> + {error, ?RC_NO_SUBSCRIPTION_EXISTED}; + Subscription -> + ?tp(persistent_session_ds_subscription_delete, #{ + session_id => SessionId, topic_filter => TopicFilter + }), + ?tp_span( + persistent_session_ds_subscription_route_delete, + #{session_id => SessionId, topic_filter => TopicFilter}, + ok = emqx_persistent_session_ds_router:do_delete_route(TopicFilter, SessionId) + ), + {ok, emqx_persistent_session_ds_state:del_subscription(TopicFilter, S0), Subscription} + end. + +-spec on_session_drop(emqx_persistent_session_ds:id(), emqx_persistent_session_ds_state:t()) -> ok. +on_session_drop(SessionId, S0) -> + fold( + fun(TopicFilter, _Subscription, S) -> + case on_unsubscribe(SessionId, TopicFilter, S) of + {ok, S1, _} -> S1; + _ -> S + end + end, + S0, + S0 + ). %% @doc Remove subscription states that don't have a parent, and that %% don't have any unacked messages: From c0521fd250c3e60432d26ed475550590ef1a0697 Mon Sep 17 00:00:00 2001 From: zhongwencool Date: Fri, 19 Apr 2024 08:45:48 +0800 Subject: [PATCH 209/234] feat: support iotdb 1.3.0 --- .../docker-compose-iotdb.yaml | 47 ++++++-- .ci/docker-compose-file/toxiproxy.json | 10 +- .../test/emqx_bridge_v2_testlib.erl | 2 +- .../include/emqx_bridge_iotdb.hrl | 2 + .../src/emqx_bridge_iotdb.erl | 4 +- .../src/emqx_bridge_iotdb_connector.erl | 45 ++++--- .../test/emqx_bridge_iotdb_impl_SUITE.erl | 113 ++++++++++-------- 7 files changed, 145 insertions(+), 78 deletions(-) diff --git a/.ci/docker-compose-file/docker-compose-iotdb.yaml b/.ci/docker-compose-file/docker-compose-iotdb.yaml index 2a2b0e603..f5448a1ef 100644 --- a/.ci/docker-compose-file/docker-compose-iotdb.yaml +++ b/.ci/docker-compose-file/docker-compose-iotdb.yaml @@ -1,24 +1,53 @@ version: '3.9' services: - iotdb: - container_name: iotdb - hostname: iotdb - image: apache/iotdb:1.1.0-standalone + iotdb_1_3_0: + container_name: iotdb130 + hostname: iotdb130 + image: apache/iotdb:1.3.0-standalone restart: always environment: - enable_rest_service=true - - cn_internal_address=iotdb + - cn_internal_address=iotdb130 - cn_internal_port=10710 - cn_consensus_port=10720 - - cn_target_config_node_list=iotdb:10710 - - dn_rpc_address=iotdb - - dn_internal_address=iotdb + - cn_seed_config_node=iotdb130:10710 + - dn_rpc_address=iotdb130 + - dn_internal_address=iotdb130 - dn_rpc_port=6667 - dn_mpp_data_exchange_port=10740 - dn_schema_region_consensus_port=10750 - dn_data_region_consensus_port=10760 - - dn_target_config_node_list=iotdb:10710 + - dn_seed_config_node=iotdb130:10710 + # volumes: + # - ./data:/iotdb/data + # - ./logs:/iotdb/logs + expose: + - "18080" + # IoTDB's REST interface, uncomment for local testing + # ports: + # - "18080:18080" + networks: + - emqx_bridge + + iotdb_1_1_0: + container_name: iotdb110 + hostname: iotdb110 + image: apache/iotdb:1.1.0-standalone + restart: always + environment: + - enable_rest_service=true + - cn_internal_address=iotdb110 + - cn_internal_port=10710 + - cn_consensus_port=10720 + - cn_target_config_node_list=iotdb110:10710 + - dn_rpc_address=iotdb110 + - dn_internal_address=iotdb110 + - dn_rpc_port=6667 + - dn_mpp_data_exchange_port=10740 + - dn_schema_region_consensus_port=10750 + - dn_data_region_consensus_port=10760 + - dn_target_config_node_list=iotdb110:10710 # volumes: # - ./data:/iotdb/data # - ./logs:/iotdb/logs diff --git a/.ci/docker-compose-file/toxiproxy.json b/.ci/docker-compose-file/toxiproxy.json index 103bae924..a3c1dfbf4 100644 --- a/.ci/docker-compose-file/toxiproxy.json +++ b/.ci/docker-compose-file/toxiproxy.json @@ -139,9 +139,15 @@ "enabled": true }, { - "name": "iotdb", + "name": "iotdb110", "listen": "0.0.0.0:18080", - "upstream": "iotdb:18080", + "upstream": "iotdb110:18080", + "enabled": true + }, + { + "name": "iotdb130", + "listen": "0.0.0.0:28080", + "upstream": "iotdb130:18080", "enabled": true }, { diff --git a/apps/emqx_bridge/test/emqx_bridge_v2_testlib.erl b/apps/emqx_bridge/test/emqx_bridge_v2_testlib.erl index 8a781d6e7..a3316e39d 100644 --- a/apps/emqx_bridge/test/emqx_bridge_v2_testlib.erl +++ b/apps/emqx_bridge/test/emqx_bridge_v2_testlib.erl @@ -705,7 +705,7 @@ t_async_query(Config, MakeMessageFun, IsSuccessCheck, TracePoint) -> ), receive {result, Result} -> IsSuccessCheck(Result) - after 5_000 -> + after 8_000 -> throw(timeout) end, ok. diff --git a/apps/emqx_bridge_iotdb/include/emqx_bridge_iotdb.hrl b/apps/emqx_bridge_iotdb/include/emqx_bridge_iotdb.hrl index 8ce7bce6d..6cf0c5508 100644 --- a/apps/emqx_bridge_iotdb/include/emqx_bridge_iotdb.hrl +++ b/apps/emqx_bridge_iotdb/include/emqx_bridge_iotdb.hrl @@ -5,6 +5,8 @@ -ifndef(EMQX_BRIDGE_IOTDB_HRL). -define(EMQX_BRIDGE_IOTDB_HRL, true). +-define(VSN_1_3_X, 'v1.3.x'). +-define(VSN_1_2_X, 'v1.2.x'). -define(VSN_1_1_X, 'v1.1.x'). -define(VSN_1_0_X, 'v1.0.x'). -define(VSN_0_13_X, 'v0.13.x'). diff --git a/apps/emqx_bridge_iotdb/src/emqx_bridge_iotdb.erl b/apps/emqx_bridge_iotdb/src/emqx_bridge_iotdb.erl index 134868978..cbad8ca63 100644 --- a/apps/emqx_bridge_iotdb/src/emqx_bridge_iotdb.erl +++ b/apps/emqx_bridge_iotdb/src/emqx_bridge_iotdb.erl @@ -220,10 +220,10 @@ basic_config() -> )}, {iotdb_version, mk( - hoconsc:enum([?VSN_1_1_X, ?VSN_1_0_X, ?VSN_0_13_X]), + hoconsc:enum([?VSN_1_3_X, ?VSN_1_1_X, ?VSN_1_0_X, ?VSN_0_13_X]), #{ desc => ?DESC("config_iotdb_version"), - default => ?VSN_1_1_X + default => ?VSN_1_3_X } )} ] ++ resource_creation_opts() ++ diff --git a/apps/emqx_bridge_iotdb/src/emqx_bridge_iotdb_connector.erl b/apps/emqx_bridge_iotdb/src/emqx_bridge_iotdb_connector.erl index 92316f0cf..4bde75967 100644 --- a/apps/emqx_bridge_iotdb/src/emqx_bridge_iotdb_connector.erl +++ b/apps/emqx_bridge_iotdb/src/emqx_bridge_iotdb_connector.erl @@ -94,7 +94,7 @@ connector_example_values() -> name => <<"iotdb_connector">>, type => iotdb, enable => true, - iotdb_version => ?VSN_1_1_X, + iotdb_version => ?VSN_1_3_X, authentication => #{ <<"username">> => <<"root">>, <<"password">> => <<"******">> @@ -133,10 +133,10 @@ fields("connection_fields") -> )}, {iotdb_version, mk( - hoconsc:enum([?VSN_1_1_X, ?VSN_1_0_X, ?VSN_0_13_X]), + hoconsc:enum([?VSN_1_3_X, ?VSN_1_1_X, ?VSN_1_0_X, ?VSN_0_13_X]), #{ desc => ?DESC(emqx_bridge_iotdb, "config_iotdb_version"), - default => ?VSN_1_1_X + default => ?VSN_1_3_X } )}, {authentication, @@ -342,6 +342,7 @@ on_add_channel( Path = case Version of ?VSN_1_1_X -> InsertTabletPathV2; + ?VSN_1_3_X -> InsertTabletPathV2; _ -> InsertTabletPathV1 end, @@ -442,14 +443,14 @@ maybe_preproc_tmpl(Value) when is_binary(Value) -> maybe_preproc_tmpl(Value) -> Value. -proc_data(PreProcessedData, Msg) -> +proc_data(PreProcessedData, Msg, IoTDBVsn) -> NowNS = erlang:system_time(nanosecond), Nows = #{ now_ms => erlang:convert_time_unit(NowNS, nanosecond, millisecond), now_us => erlang:convert_time_unit(NowNS, nanosecond, microsecond), now_ns => NowNS }, - proc_data(PreProcessedData, Msg, Nows, []). + proc_data(PreProcessedData, Msg, Nows, IoTDBVsn, []). proc_data( [ @@ -463,15 +464,16 @@ proc_data( ], Msg, Nows, + IotDbVsn, Acc ) -> DataType = list_to_binary( string:uppercase(binary_to_list(emqx_placeholder:proc_tmpl(DataType0, Msg))) ), try - proc_data(T, Msg, Nows, [ + proc_data(T, Msg, Nows, IotDbVsn, [ #{ - timestamp => iot_timestamp(TimestampTkn, Msg, Nows), + timestamp => iot_timestamp(IotDbVsn, TimestampTkn, Msg, Nows), measurement => emqx_placeholder:proc_tmpl(Measurement, Msg), data_type => DataType, value => proc_value(DataType, ValueTkn, Msg) @@ -485,23 +487,28 @@ proc_data( ?SLOG(debug, #{exception => Error, reason => Reason, stacktrace => Stacktrace}), {error, invalid_data} end; -proc_data([], _Msg, _Nows, Acc) -> +proc_data([], _Msg, _Nows, _IotDbVsn, Acc) -> {ok, lists:reverse(Acc)}. -iot_timestamp(Timestamp, _, _) when is_integer(Timestamp) -> +iot_timestamp(_IotDbVsn, Timestamp, _, _) when is_integer(Timestamp) -> Timestamp; -iot_timestamp(TimestampTkn, Msg, Nows) -> - iot_timestamp(emqx_placeholder:proc_tmpl(TimestampTkn, Msg), Nows). +iot_timestamp(IotDbVsn, TimestampTkn, Msg, Nows) -> + iot_timestamp(IotDbVsn, emqx_placeholder:proc_tmpl(TimestampTkn, Msg), Nows). -iot_timestamp(<<"now_us">>, #{now_us := NowUs}) -> +%% > v1.3.0 don't allow write nanoseconds nor microseconds +iot_timestamp(?VSN_1_3_X, <<"now_us">>, #{now_ms := NowMs}) -> + NowMs; +iot_timestamp(?VSN_1_3_X, <<"now_ns">>, #{now_ms := NowMs}) -> + NowMs; +iot_timestamp(_IotDbVsn, <<"now_us">>, #{now_us := NowUs}) -> NowUs; -iot_timestamp(<<"now_ns">>, #{now_ns := NowNs}) -> +iot_timestamp(_IotDbVsn, <<"now_ns">>, #{now_ns := NowNs}) -> NowNs; -iot_timestamp(Timestamp, #{now_ms := NowMs}) when +iot_timestamp(_IotDbVsn, Timestamp, #{now_ms := NowMs}) when Timestamp =:= <<"now">>; Timestamp =:= <<"now_ms">>; Timestamp =:= <<>> -> NowMs; -iot_timestamp(Timestamp, _) when is_binary(Timestamp) -> +iot_timestamp(_IotDbVsn, Timestamp, _) when is_binary(Timestamp) -> binary_to_integer(Timestamp). proc_value(<<"TEXT">>, ValueTkn, Msg) -> @@ -633,18 +640,24 @@ insert_value(1, Data, [Value | Values]) -> insert_value(Index, Data, [Value | Values]) -> [[null | Value] | insert_value(Index - 1, Data, Values)]. +iotdb_field_key(is_aligned, ?VSN_1_3_X) -> + <<"is_aligned">>; iotdb_field_key(is_aligned, ?VSN_1_1_X) -> <<"is_aligned">>; iotdb_field_key(is_aligned, ?VSN_1_0_X) -> <<"is_aligned">>; iotdb_field_key(is_aligned, ?VSN_0_13_X) -> <<"isAligned">>; +iotdb_field_key(device_id, ?VSN_1_3_X) -> + <<"device">>; iotdb_field_key(device_id, ?VSN_1_1_X) -> <<"device">>; iotdb_field_key(device_id, ?VSN_1_0_X) -> <<"device">>; iotdb_field_key(device_id, ?VSN_0_13_X) -> <<"deviceId">>; +iotdb_field_key(data_types, ?VSN_1_3_X) -> + <<"data_types">>; iotdb_field_key(data_types, ?VSN_1_1_X) -> <<"data_types">>; iotdb_field_key(data_types, ?VSN_1_0_X) -> @@ -725,7 +738,7 @@ render_channel_message(#{is_aligned := IsAligned} = Channel, IoTDBVsn, Message) [] -> {error, invalid_template}; DataTemplate -> - case proc_data(DataTemplate, Message) of + case proc_data(DataTemplate, Message, IoTDBVsn) of {ok, DataList} -> make_iotdb_insert_request(DataList, IsAligned, DeviceId, IoTDBVsn); Error -> diff --git a/apps/emqx_bridge_iotdb/test/emqx_bridge_iotdb_impl_SUITE.erl b/apps/emqx_bridge_iotdb/test/emqx_bridge_iotdb_impl_SUITE.erl index 693f16d05..ce8cd01e8 100644 --- a/apps/emqx_bridge_iotdb/test/emqx_bridge_iotdb_impl_SUITE.erl +++ b/apps/emqx_bridge_iotdb/test/emqx_bridge_iotdb_impl_SUITE.erl @@ -20,14 +20,16 @@ all() -> [ - {group, plain}, + {group, iotdb110}, + {group, iotdb130}, {group, legacy} ]. groups() -> AllTCs = emqx_common_test_helpers:all(?MODULE), [ - {plain, AllTCs}, + {iotdb110, AllTCs}, + {iotdb130, AllTCs}, {legacy, AllTCs} ]. @@ -37,10 +39,15 @@ init_per_suite(Config) -> end_per_suite(Config) -> emqx_bridge_v2_testlib:end_per_suite(Config). -init_per_group(plain = Type, Config0) -> +init_per_group(Type, Config0) when Type =:= iotdb110 orelse Type =:= iotdb130 -> Host = os:getenv("IOTDB_PLAIN_HOST", "toxiproxy.emqx.net"), - Port = list_to_integer(os:getenv("IOTDB_PLAIN_PORT", "18080")), - ProxyName = "iotdb", + ProxyName = atom_to_list(Type), + {IotDbVersion, DefaultPort} = + case Type of + iotdb110 -> {?VSN_1_1_X, "18080"}; + iotdb130 -> {?VSN_1_3_X, "28080"} + end, + Port = list_to_integer(os:getenv("IOTDB_PLAIN_PORT", DefaultPort)), case emqx_common_test_helpers:is_tcp_server_available(Host, Port) of true -> Config = emqx_bridge_v2_testlib:init_per_group(Type, ?BRIDGE_TYPE_BIN, Config0), @@ -48,7 +55,7 @@ init_per_group(plain = Type, Config0) -> {bridge_host, Host}, {bridge_port, Port}, {proxy_name, ProxyName}, - {iotdb_version, ?VSN_1_1_X}, + {iotdb_version, IotDbVersion}, {iotdb_rest_prefix, <<"/rest/v2/">>} | Config ]; @@ -87,7 +94,8 @@ init_per_group(_Group, Config) -> Config. end_per_group(Group, Config) when - Group =:= plain; + Group =:= iotdb110; + Group =:= iotdb130; Group =:= legacy -> emqx_bridge_v2_testlib:end_per_group(Config), @@ -245,7 +253,9 @@ iotdb_query(Config, Query) -> iotdb_request(Config, Path, Body, Opts). is_success_check({ok, 200, _, Body}) -> - ?assert(is_code(200, emqx_utils_json:decode(Body))). + ?assert(is_code(200, emqx_utils_json:decode(Body))); +is_success_check(Other) -> + throw(Other). is_code(Code, #{<<"code">> := Code}) -> true; is_code(_, _) -> false. @@ -359,64 +369,64 @@ t_async_query(Config) -> t_sync_query_aggregated(Config) -> DeviceId = iotdb_device(Config), + MS = erlang:system_time(millisecond) - 5000, Payload = [ - make_iotdb_payload(DeviceId, "temp", "INT32", "36", 1685112026290), - make_iotdb_payload(DeviceId, "temp", "INT32", 37, 1685112026291), - make_iotdb_payload(DeviceId, "temp", "INT32", 38.7, 1685112026292), - make_iotdb_payload(DeviceId, "temp", "INT32", "39", <<"1685112026293">>), - make_iotdb_payload(DeviceId, "temp", "INT64", "36", 1685112026294), - make_iotdb_payload(DeviceId, "temp", "INT64", 36, 1685112026295), - make_iotdb_payload(DeviceId, "temp", "INT64", 36.7, 1685112026296), - %% implicit 'now()' timestamp - make_iotdb_payload(DeviceId, "temp", "INT32", "40"), + make_iotdb_payload(DeviceId, "temp", "INT32", "36", MS - 7000), + make_iotdb_payload(DeviceId, "temp", "INT32", 37, MS - 6000), + make_iotdb_payload(DeviceId, "temp", "INT32", 38.7, MS - 5000), + make_iotdb_payload(DeviceId, "temp", "INT32", "39", integer_to_binary(MS - 4000)), + make_iotdb_payload(DeviceId, "temp", "INT32", "34", MS - 3000), + make_iotdb_payload(DeviceId, "temp", "INT32", 33.7, MS - 2000), + make_iotdb_payload(DeviceId, "temp", "INT32", 32, MS - 1000), %% [FIXME] neither nanoseconds nor microseconds don't seem to be supported by IoTDB (make_iotdb_payload(DeviceId, "temp", "INT32", "41"))#{timestamp => <<"now_us">>}, - (make_iotdb_payload(DeviceId, "temp", "INT32", "42"))#{timestamp => <<"now_ns">>}, - make_iotdb_payload(DeviceId, "weight", "FLOAT", "87.3", 1685112026290), - make_iotdb_payload(DeviceId, "weight", "FLOAT", 87.3, 1685112026291), - make_iotdb_payload(DeviceId, "weight", "FLOAT", 87, 1685112026292), - make_iotdb_payload(DeviceId, "weight", "DOUBLE", "87.3", 1685112026293), - make_iotdb_payload(DeviceId, "weight", "DOUBLE", 87.3, 1685112026294), - make_iotdb_payload(DeviceId, "weight", "DOUBLE", 87, 1685112026295), + make_iotdb_payload(DeviceId, "weight", "FLOAT", "87.3", MS - 6000), + make_iotdb_payload(DeviceId, "weight", "FLOAT", 87.3, MS - 5000), + make_iotdb_payload(DeviceId, "weight", "FLOAT", 87, MS - 4000), + make_iotdb_payload(DeviceId, "weight", "DOUBLE", "87.3", MS - 3000), + make_iotdb_payload(DeviceId, "weight", "DOUBLE", 87.3, MS - 2000), + make_iotdb_payload(DeviceId, "weight", "DOUBLE", 87, MS - 1000), - make_iotdb_payload(DeviceId, "charged", "BOOLEAN", "1", 1685112026300), - make_iotdb_payload(DeviceId, "floated", "BOOLEAN", 1, 1685112026300), - make_iotdb_payload(DeviceId, "started", "BOOLEAN", true, 1685112026300), - make_iotdb_payload(DeviceId, "stoked", "BOOLEAN", "true", 1685112026300), - make_iotdb_payload(DeviceId, "enriched", "BOOLEAN", "TRUE", 1685112026300), - make_iotdb_payload(DeviceId, "gutted", "BOOLEAN", "True", 1685112026300), - make_iotdb_payload(DeviceId, "drained", "BOOLEAN", "0", 1685112026300), - make_iotdb_payload(DeviceId, "toasted", "BOOLEAN", 0, 1685112026300), - make_iotdb_payload(DeviceId, "uncharted", "BOOLEAN", false, 1685112026300), - make_iotdb_payload(DeviceId, "dazzled", "BOOLEAN", "false", 1685112026300), - make_iotdb_payload(DeviceId, "unplugged", "BOOLEAN", "FALSE", 1685112026300), - make_iotdb_payload(DeviceId, "unraveled", "BOOLEAN", "False", 1685112026300), - make_iotdb_payload(DeviceId, "undecided", "BOOLEAN", null, 1685112026300), + make_iotdb_payload(DeviceId, "charged", "BOOLEAN", "1", MS + 1000), + make_iotdb_payload(DeviceId, "floated", "BOOLEAN", 1, MS + 1000), + make_iotdb_payload(DeviceId, "started", "BOOLEAN", true, MS + 1000), + make_iotdb_payload(DeviceId, "stoked", "BOOLEAN", "true", MS + 1000), + make_iotdb_payload(DeviceId, "enriched", "BOOLEAN", "TRUE", MS + 1000), + make_iotdb_payload(DeviceId, "gutted", "BOOLEAN", "True", MS + 1000), + make_iotdb_payload(DeviceId, "drained", "BOOLEAN", "0", MS + 1000), + make_iotdb_payload(DeviceId, "toasted", "BOOLEAN", 0, MS + 1000), + make_iotdb_payload(DeviceId, "uncharted", "BOOLEAN", false, MS + 1000), + make_iotdb_payload(DeviceId, "dazzled", "BOOLEAN", "false", MS + 1000), + make_iotdb_payload(DeviceId, "unplugged", "BOOLEAN", "FALSE", MS + 1000), + make_iotdb_payload(DeviceId, "unraveled", "BOOLEAN", "False", MS + 1000), + make_iotdb_payload(DeviceId, "undecided", "BOOLEAN", null, MS + 1000), - make_iotdb_payload(DeviceId, "foo", "TEXT", "bar", 1685112026300) + make_iotdb_payload(DeviceId, "foo", "TEXT", "bar", MS + 1000) ], MakeMessageFun = make_message_fun(iotdb_topic(Config), Payload), ok = emqx_bridge_v2_testlib:t_sync_query( Config, MakeMessageFun, fun is_success_check/1, iotdb_bridge_on_query ), - %% check temp - QueryTemp = <<"select temp from ", DeviceId/binary>>, - {ok, {{_, 200, _}, _, ResultTemp}} = iotdb_query(Config, QueryTemp), - ?assertMatch( - #{<<"values">> := [[36, 37, 38, 39, 36, 36, 36, 40, 41, 42]]}, - emqx_utils_json:decode(ResultTemp) - ), + Time = integer_to_binary(MS - 20000), %% check weight - QueryWeight = <<"select weight from ", DeviceId/binary>>, + QueryWeight = <<"select weight from ", DeviceId/binary, " where time > ", Time/binary>>, {ok, {{_, 200, _}, _, ResultWeight}} = iotdb_query(Config, QueryWeight), ?assertMatch( #{<<"values">> := [[87.3, 87.3, 87.0, 87.3, 87.3, 87.0]]}, emqx_utils_json:decode(ResultWeight) ), - %% check rest ts = 1685112026300 - QueryRest = <<"select * from ", DeviceId/binary, " where time = 1685112026300">>, + case ?config(iotdb_version, Config) of + ?VSN_1_3_X -> + ct:pal("waiting:~p ~p ~n, [DeviceId]", [DeviceId, MS + 1000]), + timer:sleep(3600 * 1000 * 1000); + _ -> + ok + end, + %% check rest ts = MS + 1000 + CheckTime = integer_to_binary(MS + 1000), + QueryRest = <<"select * from ", DeviceId/binary, " where time = ", CheckTime/binary>>, {ok, {{_, 200, _}, _, ResultRest}} = iotdb_query(Config, QueryRest), #{<<"values">> := Values, <<"expressions">> := Expressions} = emqx_utils_json:decode( ResultRest @@ -442,6 +452,13 @@ t_sync_query_aggregated(Config) -> }, ?assertEqual(Exp, Results), + %% check temp + QueryTemp = <<"select temp from ", DeviceId/binary, " where time > ", Time/binary>>, + {ok, {{_, 200, _}, _, ResultTemp}} = iotdb_query(Config, QueryTemp), + ?assertMatch( + #{<<"values">> := [[36, 37, 38, 39, 34, 33, 32, 41]]}, + emqx_utils_json:decode(ResultTemp) + ), ok. exp(Dev, M0) -> From b163a873861c16f2f8f7a147d43d89f20290a310 Mon Sep 17 00:00:00 2001 From: zhongwencool Date: Fri, 19 Apr 2024 14:52:56 +0800 Subject: [PATCH 210/234] feat: support batch_size on iotdb --- .../src/emqx_bridge_iotdb.erl | 15 +-- .../src/emqx_bridge_iotdb_connector.erl | 113 ++++++++++++++++-- .../test/emqx_bridge_iotdb_impl_SUITE.erl | 82 ++++++------- .../src/emqx_resource_buffer_worker.erl | 24 +++- 4 files changed, 167 insertions(+), 67 deletions(-) diff --git a/apps/emqx_bridge_iotdb/src/emqx_bridge_iotdb.erl b/apps/emqx_bridge_iotdb/src/emqx_bridge_iotdb.erl index cbad8ca63..a2471fa3d 100644 --- a/apps/emqx_bridge_iotdb/src/emqx_bridge_iotdb.erl +++ b/apps/emqx_bridge_iotdb/src/emqx_bridge_iotdb.erl @@ -66,12 +66,7 @@ fields(action_config) -> ] ); fields(action_resource_opts) -> - lists:filter( - fun({K, _V}) -> - not lists:member(K, unsupported_opts()) - end, - emqx_bridge_v2_schema:action_resource_opts_fields() - ); + emqx_bridge_v2_schema:action_resource_opts_fields(); fields(action_parameters) -> [ {is_aligned, @@ -150,7 +145,7 @@ fields("get_bridge_v2") -> fields("config") -> basic_config() ++ request_config(); fields("creation_opts") -> - proplists_without(unsupported_opts(), emqx_resource_schema:fields("creation_opts")); + emqx_resource_schema:fields("creation_opts"); fields(auth_basic) -> [ {username, mk(binary(), #{required => true, desc => ?DESC("config_auth_basic_username")})}, @@ -268,12 +263,6 @@ resource_creation_opts() -> )} ]. -unsupported_opts() -> - [ - batch_size, - batch_time - ]. - %%------------------------------------------------------------------------------------------------- %% v2 examples %%------------------------------------------------------------------------------------------------- diff --git a/apps/emqx_bridge_iotdb/src/emqx_bridge_iotdb_connector.erl b/apps/emqx_bridge_iotdb/src/emqx_bridge_iotdb_connector.erl index 4bde75967..d26b47f73 100644 --- a/apps/emqx_bridge_iotdb/src/emqx_bridge_iotdb_connector.erl +++ b/apps/emqx_bridge_iotdb/src/emqx_bridge_iotdb_connector.erl @@ -21,6 +21,8 @@ on_get_status/2, on_query/3, on_query_async/4, + on_batch_query/3, + on_batch_query_async/4, on_add_channel/4, on_remove_channel/3, on_get_channels/1, @@ -280,8 +282,8 @@ on_query( state => emqx_utils:redact(State) }), - case try_render_message(Req, IoTDBVsn, Channels) of - {ok, IoTDBPayload} -> + case try_render_messages([Req], IoTDBVsn, Channels) of + {ok, [IoTDBPayload]} -> handle_response( emqx_bridge_http_connector:on_query( InstanceId, {ChannelId, IoTDBPayload}, State @@ -306,8 +308,8 @@ on_query_async( send_message => Req, state => emqx_utils:redact(State) }), - case try_render_message(Req, IoTDBVsn, Channels) of - {ok, IoTDBPayload} -> + case try_render_messages([Req], IoTDBVsn, Channels) of + {ok, [IoTDBPayload]} -> ReplyFunAndArgs = { fun(Result) -> @@ -323,6 +325,71 @@ on_query_async( Error end. +on_batch_query_async( + InstId, + Requests, + Callback, + #{iotdb_version := IoTDBVsn, channels := Channels} = State +) -> + ?tp(iotdb_bridge_on_batch_query_async, #{instance_id => InstId}), + [{ChannelId, _Message} | _] = Requests, + ?SLOG(debug, #{ + msg => "iotdb_bridge_on_query_batch_async_called", + instance_id => InstId, + send_message => Requests, + state => emqx_utils:redact(State) + }), + case try_render_messages(Requests, IoTDBVsn, Channels) of + {ok, IoTDBPayloads} -> + ReplyFunAndArgs = + { + fun(Result) -> + Response = handle_response(Result), + emqx_resource:apply_reply_fun(Callback, Response) + end, + [] + }, + lists:map( + fun(IoTDBPayload) -> + emqx_bridge_http_connector:on_query_async( + InstId, {ChannelId, IoTDBPayload}, ReplyFunAndArgs, State + ) + end, + IoTDBPayloads + ); + Error -> + Error + end. + +on_batch_query( + InstId, + [{ChannelId, _Message}] = Requests, + #{iotdb_version := IoTDBVsn, channels := Channels} = State +) -> + ?tp(iotdb_bridge_on_batch_query, #{instance_id => InstId}), + ?SLOG(debug, #{ + msg => "iotdb_bridge_on_batch_query_called", + instance_id => InstId, + send_message => Requests, + state => emqx_utils:redact(State) + }), + + case try_render_messages(Requests, IoTDBVsn, Channels) of + {ok, IoTDBPayloads} -> + lists:map( + fun(IoTDBPayload) -> + handle_response( + emqx_bridge_http_connector:on_query( + InstId, {ChannelId, IoTDBPayload}, State + ) + ) + end, + IoTDBPayloads + ); + Error -> + Error + end. + on_add_channel( InstanceId, #{iotdb_version := Version, channels := Channels} = OldState0, @@ -576,11 +643,10 @@ convert_float(undefined) -> make_iotdb_insert_request(DataList, IsAligned, DeviceId, IoTDBVsn) -> InitAcc = #{timestamps => [], measurements => [], dtypes => [], values => []}, Rows = replace_dtypes(aggregate_rows(DataList, InitAcc), IoTDBVsn), - {ok, - maps:merge(Rows, #{ - iotdb_field_key(is_aligned, IoTDBVsn) => IsAligned, - iotdb_field_key(device_id, IoTDBVsn) => DeviceId - })}. + maps:merge(Rows, #{ + iotdb_field_key(is_aligned, IoTDBVsn) => IsAligned, + iotdb_field_key(device_id, IoTDBVsn) => DeviceId + }). replace_dtypes(Rows0, IoTDBVsn) -> {Types, Rows} = maps:take(dtypes, Rows0), @@ -720,14 +786,37 @@ preproc_data_template(DataList) -> DataList ). -try_render_message({ChannelId, Msg}, IoTDBVsn, Channels) -> +try_render_messages([{ChannelId, _} | _] = Msgs, IoTDBVsn, Channels) -> case maps:find(ChannelId, Channels) of {ok, Channel} -> - render_channel_message(Channel, IoTDBVsn, Msg); + case do_render_message(Msgs, Channel, IoTDBVsn, #{}) of + RenderMsgs when is_map(RenderMsgs) -> + {ok, + lists:map( + fun({{DeviceId, IsAligned}, DataList}) -> + make_iotdb_insert_request(DataList, IsAligned, DeviceId, IoTDBVsn) + end, + maps:to_list(RenderMsgs) + )}; + Error -> + Error + end; _ -> {error, {unrecoverable_error, {invalid_channel_id, ChannelId}}} end. +do_render_message([], _Channel, _IoTDBVsn, Acc) -> + Acc; +do_render_message([{_, Msg} | Msgs], Channel, IoTDBVsn, Acc) -> + case render_channel_message(Channel, IoTDBVsn, Msg) of + {ok, NewDataList, DeviceId, IsAligned} -> + Fun = fun(V) -> NewDataList ++ V end, + Acc1 = maps:update_with({DeviceId, IsAligned}, Fun, NewDataList, Acc), + do_render_message(Msgs, Channel, IoTDBVsn, Acc1); + Error -> + Error + end. + render_channel_message(#{is_aligned := IsAligned} = Channel, IoTDBVsn, Message) -> Payloads = to_list(parse_payload(get_payload(Message))), case device_id(Message, Payloads, Channel) of @@ -740,7 +829,7 @@ render_channel_message(#{is_aligned := IsAligned} = Channel, IoTDBVsn, Message) DataTemplate -> case proc_data(DataTemplate, Message, IoTDBVsn) of {ok, DataList} -> - make_iotdb_insert_request(DataList, IsAligned, DeviceId, IoTDBVsn); + {ok, DataList, DeviceId, IsAligned}; Error -> Error end diff --git a/apps/emqx_bridge_iotdb/test/emqx_bridge_iotdb_impl_SUITE.erl b/apps/emqx_bridge_iotdb/test/emqx_bridge_iotdb_impl_SUITE.erl index ce8cd01e8..d5661e2fe 100644 --- a/apps/emqx_bridge_iotdb/test/emqx_bridge_iotdb_impl_SUITE.erl +++ b/apps/emqx_bridge_iotdb/test/emqx_bridge_iotdb_impl_SUITE.erl @@ -373,9 +373,9 @@ t_sync_query_aggregated(Config) -> Payload = [ make_iotdb_payload(DeviceId, "temp", "INT32", "36", MS - 7000), make_iotdb_payload(DeviceId, "temp", "INT32", 37, MS - 6000), - make_iotdb_payload(DeviceId, "temp", "INT32", 38.7, MS - 5000), - make_iotdb_payload(DeviceId, "temp", "INT32", "39", integer_to_binary(MS - 4000)), - make_iotdb_payload(DeviceId, "temp", "INT32", "34", MS - 3000), + make_iotdb_payload(DeviceId, "temp", "INT64", 38.7, MS - 5000), + make_iotdb_payload(DeviceId, "temp", "INT64", "39", integer_to_binary(MS - 4000)), + make_iotdb_payload(DeviceId, "temp", "INT64", "34", MS - 3000), make_iotdb_payload(DeviceId, "temp", "INT32", 33.7, MS - 2000), make_iotdb_payload(DeviceId, "temp", "INT32", 32, MS - 1000), %% [FIXME] neither nanoseconds nor microseconds don't seem to be supported by IoTDB @@ -417,48 +417,48 @@ t_sync_query_aggregated(Config) -> #{<<"values">> := [[87.3, 87.3, 87.0, 87.3, 87.3, 87.0]]}, emqx_utils_json:decode(ResultWeight) ), + %% [FIXME] https://github.com/apache/iotdb/issues/12375 + %% null don't seem to be supported by IoTDB insertTablet when 1.3.0 case ?config(iotdb_version, Config) of ?VSN_1_3_X -> - ct:pal("waiting:~p ~p ~n, [DeviceId]", [DeviceId, MS + 1000]), - timer:sleep(3600 * 1000 * 1000); + skip; _ -> - ok - end, - %% check rest ts = MS + 1000 - CheckTime = integer_to_binary(MS + 1000), - QueryRest = <<"select * from ", DeviceId/binary, " where time = ", CheckTime/binary>>, - {ok, {{_, 200, _}, _, ResultRest}} = iotdb_query(Config, QueryRest), - #{<<"values">> := Values, <<"expressions">> := Expressions} = emqx_utils_json:decode( - ResultRest - ), - Results = maps:from_list(lists:zipwith(fun(K, [V]) -> {K, V} end, Expressions, Values)), - Exp = #{ - exp(DeviceId, "charged") => true, - exp(DeviceId, "floated") => true, - exp(DeviceId, "started") => true, - exp(DeviceId, "stoked") => true, - exp(DeviceId, "enriched") => true, - exp(DeviceId, "gutted") => true, - exp(DeviceId, "drained") => false, - exp(DeviceId, "toasted") => false, - exp(DeviceId, "uncharted") => false, - exp(DeviceId, "dazzled") => false, - exp(DeviceId, "unplugged") => false, - exp(DeviceId, "unraveled") => false, - exp(DeviceId, "undecided") => null, - exp(DeviceId, "foo") => <<"bar">>, - exp(DeviceId, "temp") => null, - exp(DeviceId, "weight") => null - }, - ?assertEqual(Exp, Results), + %% check rest ts = MS + 1000 + CheckTime = integer_to_binary(MS + 1000), + QueryRest = <<"select * from ", DeviceId/binary, " where time = ", CheckTime/binary>>, + {ok, {{_, 200, _}, _, ResultRest}} = iotdb_query(Config, QueryRest), + #{<<"values">> := Values, <<"expressions">> := Expressions} = emqx_utils_json:decode( + ResultRest + ), + Results = maps:from_list(lists:zipwith(fun(K, [V]) -> {K, V} end, Expressions, Values)), + Exp = #{ + exp(DeviceId, "charged") => true, + exp(DeviceId, "floated") => true, + exp(DeviceId, "started") => true, + exp(DeviceId, "stoked") => true, + exp(DeviceId, "enriched") => true, + exp(DeviceId, "gutted") => true, + exp(DeviceId, "drained") => false, + exp(DeviceId, "toasted") => false, + exp(DeviceId, "uncharted") => false, + exp(DeviceId, "dazzled") => false, + exp(DeviceId, "unplugged") => false, + exp(DeviceId, "unraveled") => false, + exp(DeviceId, "undecided") => null, + exp(DeviceId, "foo") => <<"bar">>, + exp(DeviceId, "temp") => null, + exp(DeviceId, "weight") => null + }, + ?assertEqual(Exp, Results), - %% check temp - QueryTemp = <<"select temp from ", DeviceId/binary, " where time > ", Time/binary>>, - {ok, {{_, 200, _}, _, ResultTemp}} = iotdb_query(Config, QueryTemp), - ?assertMatch( - #{<<"values">> := [[36, 37, 38, 39, 34, 33, 32, 41]]}, - emqx_utils_json:decode(ResultTemp) - ), + %% check temp + QueryTemp = <<"select temp from ", DeviceId/binary, " where time > ", Time/binary>>, + {ok, {{_, 200, _}, _, ResultTemp}} = iotdb_query(Config, QueryTemp), + ?assertMatch( + #{<<"values">> := [[36, 37, 38, 39, 34, 33, 32, 41]]}, + emqx_utils_json:decode(ResultTemp) + ) + end, ok. exp(Dev, M0) -> diff --git a/apps/emqx_resource/src/emqx_resource_buffer_worker.erl b/apps/emqx_resource/src/emqx_resource_buffer_worker.erl index bc1aea734..d1fc3081a 100644 --- a/apps/emqx_resource/src/emqx_resource_buffer_worker.erl +++ b/apps/emqx_resource/src/emqx_resource_buffer_worker.erl @@ -1024,7 +1024,29 @@ handle_query_async_result_pure(Id, {error, Reason} = Error, HasBeenSent) -> handle_query_async_result_pure(_Id, {ok, Pid}, _HasBeenSent) when is_pid(Pid) -> {ack, fun() -> ok end, #{}}; handle_query_async_result_pure(_Id, ok, _HasBeenSent) -> - {ack, fun() -> ok end, #{}}. + {ack, fun() -> ok end, #{}}; +handle_query_async_result_pure(Id, Results, HasBeenSent) when is_list(Results) -> + All = fun(L) -> + case L of + {ok, Pid} -> is_pid(Pid); + _ -> false + end + end, + case lists:all(All, Results) of + true -> + {ack, fun() -> ok end, #{}}; + false -> + PostFn = fun() -> + ?SLOG(error, #{ + id => Id, + msg => "async_batch_send_error", + reason => Results, + has_been_sent => HasBeenSent + }), + ok + end, + {nack, PostFn, #{}} + end. -spec aggregate_counters(data(), counters()) -> data(). aggregate_counters(Data = #{counters := OldCounters}, DeltaCounters) -> From c163956d08f05249dc833dbc2852965ca1799870 Mon Sep 17 00:00:00 2001 From: Kjell Winblad Date: Fri, 19 Apr 2024 09:52:20 +0200 Subject: [PATCH 211/234] fix(trace formatter): remove record field to enable rolling upgrade --- apps/emqx/include/emqx_trace.hrl | 5 ++--- apps/emqx/src/emqx_trace/emqx_trace.erl | 14 +++++++++++--- 2 files changed, 13 insertions(+), 6 deletions(-) diff --git a/apps/emqx/include/emqx_trace.hrl b/apps/emqx/include/emqx_trace.hrl index f3905bfdf..d1e70b184 100644 --- a/apps/emqx/include/emqx_trace.hrl +++ b/apps/emqx/include/emqx_trace.hrl @@ -30,10 +30,9 @@ | '_', enable = true :: boolean() | '_', payload_encode = text :: hex | text | hidden | '_', - extra = #{} :: map() | '_', + extra = #{formatter => text} :: #{formatter => text | json} | '_', start_at :: integer() | undefined | '_', - end_at :: integer() | undefined | '_', - formatter = text :: text | json | '_' + end_at :: integer() | undefined | '_' }). -define(SHARD, ?COMMON_SHARD). diff --git a/apps/emqx/src/emqx_trace/emqx_trace.erl b/apps/emqx/src/emqx_trace/emqx_trace.erl index fc64b5073..9e57b778a 100644 --- a/apps/emqx/src/emqx_trace/emqx_trace.erl +++ b/apps/emqx/src/emqx_trace/emqx_trace.erl @@ -254,7 +254,10 @@ format(Traces) -> lists:map( fun(Trace0 = #?TRACE{}) -> [_ | Values] = tuple_to_list(Trace0), - maps:from_list(lists:zip(Fields, Values)) + Map0 = maps:from_list(lists:zip(Fields, Values)), + Extra = maps:get(extra, Map0, #{}), + Formatter = maps:get(formatter, Extra, text), + Map0#{formatter => Formatter} end, Traces ). @@ -401,8 +404,9 @@ start_trace(Trace) -> filter = Filter, start_at = Start, payload_encode = PayloadEncode, - formatter = Formatter + extra = Extra } = Trace, + Formatter = maps:get(formatter, Extra, text), Who = #{ name => Name, type => Type, @@ -575,7 +579,11 @@ to_trace(#{end_at := EndAt} = Trace, Rec) -> {error, "end_at time has already passed"} end; to_trace(#{formatter := Formatter} = Trace, Rec) -> - to_trace(maps:remove(formatter, Trace), Rec#?TRACE{formatter = Formatter}); + Extra = Rec#?TRACE.extra, + to_trace( + maps:remove(formatter, Trace), + Rec#?TRACE{extra = Extra#{formatter => Formatter}} + ); to_trace(_, Rec) -> {ok, Rec}. From ede72468827ad3eb1f1086b1d923291add4c156b Mon Sep 17 00:00:00 2001 From: ieQu1 <99872536+ieQu1@users.noreply.github.com> Date: Fri, 19 Apr 2024 13:39:04 +0200 Subject: [PATCH 212/234] fix(sessds): Avoid double-enriching transient messages --- apps/emqx/src/emqx_persistent_session_ds.erl | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) diff --git a/apps/emqx/src/emqx_persistent_session_ds.erl b/apps/emqx/src/emqx_persistent_session_ds.erl index 20c382934..4bfefe5b6 100644 --- a/apps/emqx/src/emqx_persistent_session_ds.erl +++ b/apps/emqx/src/emqx_persistent_session_ds.erl @@ -1066,7 +1066,9 @@ process_batch( %% Transient messages %%-------------------------------------------------------------------- -enqueue_transient(ClientInfo, Msg0, Session = #{s := S, props := #{upgrade_qos := UpgradeQoS}}) -> +enqueue_transient( + _ClientInfo, Msg = #message{qos = Qos}, Session = #{inflight := Inflight0, s := S0} +) -> %% TODO: Such messages won't be retransmitted, should the session %% reconnect before transient messages are acked. %% @@ -1076,16 +1078,6 @@ enqueue_transient(ClientInfo, Msg0, Session = #{s := S, props := #{upgrade_qos : %% queued messages. Since streams in this DB are exclusive to the %% session, messages from the queue can be dropped as soon as they %% are acked. - case emqx_persistent_session_ds_state:get_subscription(Msg0#message.topic, S) of - #{current_state := CS} -> - #{subopts := SubOpts} = emqx_persistent_session_ds_state:get_subscription_state(CS, S); - undefined -> - SubOpts = undefined - end, - Msgs = emqx_session:enrich_message(ClientInfo, Msg0, SubOpts, UpgradeQoS), - lists:foldl(fun do_enqueue_transient/2, Session, Msgs). - -do_enqueue_transient(Msg = #message{qos = Qos}, Session = #{inflight := Inflight0, s := S0}) -> case Qos of ?QOS_0 -> S = S0, From ec83fbe3dc529dc30c88d2d0992577222fb62fe4 Mon Sep 17 00:00:00 2001 From: zmstone Date: Thu, 18 Apr 2024 16:58:41 +0200 Subject: [PATCH 213/234] feat(license): add business critical customer type --- apps/emqx_license/include/emqx_license.hrl | 1 + apps/emqx_license/src/emqx_license.erl | 11 +++- .../emqx_license/src/emqx_license_checker.erl | 49 +++++++++++---- .../src/emqx_license_http_api.erl | 13 +++- apps/emqx_license/src/emqx_license_parser.erl | 15 ++++- apps/emqx_license/src/emqx_license_schema.erl | 59 +++++++++++++++---- .../test/emqx_license_cli_SUITE.erl | 1 + .../test/emqx_license_http_api_SUITE.erl | 47 ++++++++++++--- rel/i18n/emqx_license_schema.hocon | 19 +++--- 9 files changed, 172 insertions(+), 43 deletions(-) diff --git a/apps/emqx_license/include/emqx_license.hrl b/apps/emqx_license/include/emqx_license.hrl index bfc1d2cfe..35aa62f5b 100644 --- a/apps/emqx_license/include/emqx_license.hrl +++ b/apps/emqx_license/include/emqx_license.hrl @@ -31,6 +31,7 @@ -define(SMALL_CUSTOMER, 0). -define(MEDIUM_CUSTOMER, 1). -define(LARGE_CUSTOMER, 2). +-define(BUSINESS_CRITICAL_CUSTOMER, 3). -define(EVALUATION_CUSTOMER, 10). -define(EXPIRED_DAY, -90). diff --git a/apps/emqx_license/src/emqx_license.erl b/apps/emqx_license/src/emqx_license.erl index 73c1cdf4e..dfb747a96 100644 --- a/apps/emqx_license/src/emqx_license.erl +++ b/apps/emqx_license/src/emqx_license.erl @@ -154,7 +154,16 @@ do_update({key, Content}, Conf) when is_binary(Content); is_list(Content) -> {error, Reason} -> erlang:throw(Reason) end; -do_update({setting, Setting}, Conf) -> +do_update({setting, Setting0}, Conf) -> + #{<<"key">> := Key} = Conf, + %% only allow updating dynamic_max_connections when it's BUSINESS_CRITICAL + Setting = + case emqx_license_parser:is_business_critical(Key) of + true -> + Setting0; + false -> + maps:without([<<"dynamic_max_connections">>], Setting0) + end, maps:merge(Conf, Setting); do_update(NewConf, _PrevConf) -> #{<<"key">> := NewKey} = NewConf, diff --git a/apps/emqx_license/src/emqx_license_checker.erl b/apps/emqx_license/src/emqx_license_checker.erl index 8270e03d2..5d8393037 100644 --- a/apps/emqx_license/src/emqx_license_checker.erl +++ b/apps/emqx_license/src/emqx_license_checker.erl @@ -33,7 +33,9 @@ expiry_epoch/0, purge/0, limits/0, - print_warnings/1 + print_warnings/1, + get_max_connections/1, + get_dynamic_max_connections/0 ]). %% gen_server callbacks @@ -46,21 +48,23 @@ -define(LICENSE_TAB, emqx_license). +-type limits() :: #{max_connections := non_neg_integer() | ?ERR_EXPIRED}. +-type license() :: emqx_license_parser:license(). +-type fetcher() :: fun(() -> {ok, license()} | {error, term()}). + %%------------------------------------------------------------------------------ %% API %%------------------------------------------------------------------------------ --type limits() :: #{max_connections := non_neg_integer() | ?ERR_EXPIRED}. - --spec start_link(emqx_license_parser:license()) -> {ok, pid()}. +-spec start_link(fetcher()) -> {ok, pid()}. start_link(LicenseFetcher) -> start_link(LicenseFetcher, ?CHECK_INTERVAL). --spec start_link(emqx_license_parser:license(), timeout()) -> {ok, pid()}. +-spec start_link(fetcher(), timeout()) -> {ok, pid()}. start_link(LicenseFetcher, CheckInterval) -> gen_server:start_link({local, ?MODULE}, ?MODULE, [LicenseFetcher, CheckInterval], []). --spec update(emqx_license_parser:license()) -> map(). +-spec update(license()) -> map(). update(License) -> gen_server:call(?MODULE, {update, License}, infinity). @@ -210,8 +214,7 @@ check_license(License) -> DaysLeft = days_left(License), IsOverdue = is_overdue(License, DaysLeft), NeedRestriction = IsOverdue, - MaxConn = emqx_license_parser:max_connections(License), - Limits = limits(License, NeedRestriction), + #{max_connections := MaxConn} = Limits = limits(License, NeedRestriction), true = apply_limits(Limits), #{ warn_evaluation => warn_evaluation(License, NeedRestriction, MaxConn), @@ -223,8 +226,34 @@ warn_evaluation(License, false, MaxConn) -> warn_evaluation(_License, _NeedRestrict, _Limits) -> false. -limits(License, false) -> #{max_connections => emqx_license_parser:max_connections(License)}; -limits(_License, true) -> #{max_connections => ?ERR_EXPIRED}. +limits(License, false) -> + #{ + max_connections => get_max_connections(License) + }; +limits(_License, true) -> + #{ + max_connections => ?ERR_EXPIRED + }. + +%% @doc Return the max_connections limit defined in license. +%% For business-critical type, it returns the dynamic value set in config. +-spec get_max_connections(license()) -> non_neg_integer(). +get_max_connections(License) -> + Max = emqx_license_parser:max_connections(License), + Dyn = + case emqx_license_parser:customer_type(License) of + ?BUSINESS_CRITICAL_CUSTOMER -> + min(get_dynamic_max_connections(), Max); + _ -> + Max + end, + min(Max, Dyn). + +%% @doc Get the dynamic max_connections limit set in config. +%% It's only meaningful for business-critical license. +-spec get_dynamic_max_connections() -> non_neg_integer(). +get_dynamic_max_connections() -> + emqx_conf:get([license, dynamic_max_connections]). days_left(License) -> DateEnd = emqx_license_parser:expiry_date(License), diff --git a/apps/emqx_license/src/emqx_license_http_api.erl b/apps/emqx_license/src/emqx_license_http_api.erl index dcf7afc7e..4d869f840 100644 --- a/apps/emqx_license/src/emqx_license_http_api.erl +++ b/apps/emqx_license/src/emqx_license_http_api.erl @@ -147,7 +147,7 @@ error_msg(Code, Msg) -> {400, error_msg(?BAD_REQUEST, <<"Invalid request params">>)}. '/license/setting'(get, _Params) -> - {200, maps:remove(<<"key">>, emqx_config:get_raw([license]))}; + {200, get_setting()}; '/license/setting'(put, #{body := Setting}) -> case emqx_license:update_setting(Setting) of {error, Error} -> @@ -170,3 +170,14 @@ fields(key_license) -> setting() -> lists:keydelete(key, 1, emqx_license_schema:fields(key_license)). + +%% Drop dynamic_max_connections unless it's a BUSINESS_CRITICAL license. +get_setting() -> + #{<<"key">> := Key} = Raw = emqx_config:get_raw([license]), + Result = maps:remove(<<"key">>, Raw), + case emqx_license_parser:is_business_critical(Key) of + true -> + Result; + false -> + maps:remove(<<"dynamic_max_connections">>, Result) + end. diff --git a/apps/emqx_license/src/emqx_license_parser.erl b/apps/emqx_license/src/emqx_license_parser.erl index d7fcde338..67ad801bc 100644 --- a/apps/emqx_license/src/emqx_license_parser.erl +++ b/apps/emqx_license/src/emqx_license_parser.erl @@ -28,6 +28,7 @@ ?SMALL_CUSTOMER | ?MEDIUM_CUSTOMER | ?LARGE_CUSTOMER + | ?BUSINESS_CRITICAL_CUSTOMER | ?EVALUATION_CUSTOMER. -type license_type() :: ?OFFICIAL | ?TRIAL. @@ -41,6 +42,8 @@ source := binary() }. +-type raw_license() :: string() | binary() | default. + -export_type([ license_data/0, customer_type/0, @@ -56,7 +59,8 @@ customer_type/1, license_type/1, expiry_date/1, - max_connections/1 + max_connections/1, + is_business_critical/1 ]). %% for testing purpose @@ -94,7 +98,7 @@ default() -> emqx_license_schema:default_license(). %% @doc Parse license key. %% If the license key is prefixed with "file://path/to/license/file", %% then the license key is read from the file. --spec parse(default | string() | binary()) -> {ok, license()} | {error, map()}. +-spec parse(raw_license()) -> {ok, license()} | {error, map()}. parse(Content) -> parse(to_bin(Content), ?MODULE:pubkey()). @@ -146,6 +150,13 @@ expiry_date(#{module := Module, data := LicenseData}) -> max_connections(#{module := Module, data := LicenseData}) -> Module:max_connections(LicenseData). +-spec is_business_critical(license() | raw_license()) -> boolean(). +is_business_critical(#{module := Module, data := LicenseData}) -> + Module:customer_type(LicenseData) =:= ?BUSINESS_CRITICAL_CUSTOMER; +is_business_critical(Key) when is_binary(Key) -> + {ok, License} = parse(Key), + is_business_critical(License). + %%-------------------------------------------------------------------- %% Private functions %%-------------------------------------------------------------------- diff --git a/apps/emqx_license/src/emqx_license_schema.erl b/apps/emqx_license/src/emqx_license_schema.erl index 1a1f388d9..0780f5971 100644 --- a/apps/emqx_license/src/emqx_license_schema.erl +++ b/apps/emqx_license/src/emqx_license_schema.erl @@ -16,7 +16,8 @@ -export([namespace/0, roots/0, fields/1, validations/0, desc/1, tags/0]). -export([ - default_license/0 + default_license/0, + default_setting/0 ]). namespace() -> "license". @@ -45,16 +46,26 @@ fields(key_license) -> required => true, desc => ?DESC(key_field) }}, + %% This feature is not made GA yet, hence hidden. + %% When license is issued to cutomer-type BUSINESS_CRITICAL (code 3) + %% This config is taken as the real max_connections limit. + {dynamic_max_connections, #{ + type => non_neg_integer(), + default => default(dynamic_max_connections), + required => false, + importance => ?IMPORTANCE_HIDDEN, + desc => ?DESC(dynamic_max_connections) + }}, {connection_low_watermark, #{ type => emqx_schema:percent(), - default => <<"75%">>, - example => <<"75%">>, + default => default(connection_low_watermark), + example => default(connection_low_watermark), desc => ?DESC(connection_low_watermark_field) }}, {connection_high_watermark, #{ type => emqx_schema:percent(), - default => <<"80%">>, - example => <<"80%">>, + default => default(connection_high_watermark), + example => default(connection_high_watermark), desc => ?DESC(connection_high_watermark_field) }} ]. @@ -87,11 +98,39 @@ check_license_watermark(Conf) -> %% @doc The default license key. %% This default license has 25 connections limit. -%% Issued on 2023-12-08 and valid for 5 years (1825 days) -%% NOTE: when updating a new key, the schema doc in emqx_license_schema.hocon -%% should be updated accordingly +%% Issued on 2024-04-18 and valid for 5 years (1825 days) +%% +%% NOTE: when updating a new key, below should be updated accordingly: +%% - emqx_license_schema.hocon default connections limit +%% - default(dynamic_max_connections) return value default_license() -> << - "MjIwMTExCjAKMTAKRXZhbHVhdGlvbgpjb250YWN0QGVtcXguaW8KdHJpYWwKMjAyMzEyMDgKMTgyNQoyNQo=." - "MEUCIE271MtH+4bb39OZKD4mvVkurwZ3LX44KUvuOxkbjQz2AiEAqL7BP44PMUS5z5SAN1M4y3v3h47J8qORAqcuetnyexw=" + "MjIwMTExCjAKMTAKRXZhbHVhdGlvbgpjb250YWN0QGVtcXguaW8KdHJpYWwKMjAyNDA0MTgKMTgyNQoyNQo=" + "." + "MEUCICMWWkfrvyMwQaQAOXEsEcs+d6+5uXc1BDxR7j25fRy4AiEAmblQ4p+FFmdsvnKgcRRkv1zj7PExmZKVk3mVcxH3fgw=" >>. + +%% @doc Exported for testing +default_setting() -> + Keys = + [ + connection_low_watermark, + connection_high_watermark, + dynamic_max_connections + ], + maps:from_list( + lists:map( + fun(K) -> + {K, default(K)} + end, + Keys + ) + ). + +default(connection_low_watermark) -> + <<"75%">>; +default(connection_high_watermark) -> + <<"80%">>; +default(dynamic_max_connections) -> + %Must match the value encoded in default license. + 25. diff --git a/apps/emqx_license/test/emqx_license_cli_SUITE.erl b/apps/emqx_license/test/emqx_license_cli_SUITE.erl index b362efd95..1e8cfe7de 100644 --- a/apps/emqx_license/test/emqx_license_cli_SUITE.erl +++ b/apps/emqx_license/test/emqx_license_cli_SUITE.erl @@ -65,6 +65,7 @@ t_conf_update(_Config) -> #{ connection_high_watermark => 0.5, connection_low_watermark => 0.45, + dynamic_max_connections => 25, key => LicenseKey }, emqx:get_config([license]) diff --git a/apps/emqx_license/test/emqx_license_http_api_SUITE.erl b/apps/emqx_license/test/emqx_license_http_api_SUITE.erl index c207b3a40..b64a4d5af 100644 --- a/apps/emqx_license/test/emqx_license_http_api_SUITE.erl +++ b/apps/emqx_license/test/emqx_license_http_api_SUITE.erl @@ -19,17 +19,16 @@ all() -> init_per_suite(Config) -> emqx_license_test_lib:mock_parser(), + Setting = emqx_license_schema:default_setting(), + Key = emqx_license_test_lib:make_license(#{max_connections => "100"}), + LicenseConf = maps:merge(#{key => Key}, Setting), Apps = emqx_cth_suite:start( [ emqx, emqx_conf, {emqx_license, #{ config => #{ - license => #{ - key => emqx_license_test_lib:make_license(#{max_connections => "100"}), - connection_low_watermark => <<"75%">>, - connection_high_watermark => <<"80%">> - } + license => LicenseConf } }}, {emqx_dashboard, @@ -50,7 +49,7 @@ init_per_testcase(_TestCase, Config) -> Config. end_per_testcase(_TestCase, _Config) -> - {ok, _} = reset_license(), + ok = reset_license(), ok. %%------------------------------------------------------------------------------ @@ -70,7 +69,11 @@ default_license() -> emqx_license_test_lib:make_license(#{max_connections => "100"}). reset_license() -> - emqx_license:update_key(default_license()). + {ok, _} = emqx_license:update_key(default_license()), + Setting = emqx_license_schema:default_setting(), + Req = maps:from_list([{atom_to_binary(K), V} || {K, V} <- maps:to_list(Setting)]), + {ok, _} = emqx_license:update_setting(Req), + ok. assert_untouched_license() -> ?assertMatch( @@ -224,6 +227,26 @@ t_license_setting(_Config) -> ), ok. +t_license_setting_bc(_Config) -> + %% Create a BC license + Key = emqx_license_test_lib:make_license(#{customer_type => "3"}), + Res = request(post, uri(["license"]), #{key => Key}), + ?assertMatch({ok, 200, _}, Res), + %% get + GetRes = request(get, uri(["license", "setting"]), []), + validate_setting(GetRes, <<"75%">>, <<"80%">>, 25), + %% update + Low = <<"50%">>, + High = <<"55%">>, + UpdateRes = request(put, uri(["license", "setting"]), #{ + <<"connection_low_watermark">> => Low, + <<"connection_high_watermark">> => High, + <<"dynamic_max_connections">> => 26 + }), + validate_setting(UpdateRes, Low, High, 26), + ?assertEqual(26, emqx_config:get([license, dynamic_max_connections])), + ok. + validate_setting(Res, ExpectLow, ExpectHigh) -> ?assertMatch({ok, 200, _}, Res), {ok, 200, Payload} = Res, @@ -234,3 +257,13 @@ validate_setting(Res, ExpectLow, ExpectHigh) -> }, emqx_utils_json:decode(Payload, [return_maps]) ). + +validate_setting(Res, ExpectLow, ExpectHigh, DynMax) -> + ?assertMatch({ok, 200, _}, Res), + {ok, 200, Payload} = Res, + #{ + <<"connection_low_watermark">> := ExpectLow, + <<"connection_high_watermark">> := ExpectHigh, + <<"dynamic_max_connections">> := DynMax + } = + emqx_utils_json:decode(Payload, [return_maps]). diff --git a/rel/i18n/emqx_license_schema.hocon b/rel/i18n/emqx_license_schema.hocon index 72f31266b..e3d418029 100644 --- a/rel/i18n/emqx_license_schema.hocon +++ b/rel/i18n/emqx_license_schema.hocon @@ -12,17 +12,12 @@ connection_low_watermark_field.desc: connection_low_watermark_field.label: """Connection low watermark""" -connection_high_watermark_field_deprecated.desc: -"""deprecated use /license/setting instead""" - -connection_high_watermark_field_deprecated.label: -"""deprecated use /license/setting instead""" - -connection_low_watermark_field_deprecated.desc: -"""deprecated use /license/setting instead""" - -connection_low_watermark_field_deprecated.label: -"""deprecated use /license/setting instead""" +dynamic_max_connections { + label: "Dynamic Connections Limit" + desc: """~ + Only applicable for "Business Critical" license type. This config sets the current allocation of license for the current cluster. + This value cannot exceed the connections limit assigned in the license key.""" +} key_field.desc: """This configuration parameter is designated for the license key and supports below input formats: @@ -43,7 +38,7 @@ license_root.desc: """Defines the EMQX Enterprise license. EMQX Enterprise is initially provided with a default trial license. -This license, issued in December 2023, is valid for a period of 5 years. +This license, issued in April 2024, is valid for a period of 5 years. It supports up to 25 concurrent connections, catering to early-stage development and testing needs. For deploying EMQX Enterprise in a production environment, a different license is required. You can apply for a production license by visiting https://www.emqx.com/apply-licenses/emqx?version=5""" From 9e46c18443feea352e3bc4c1376f6eb6090e1a23 Mon Sep 17 00:00:00 2001 From: Kjell Winblad Date: Fri, 19 Apr 2024 15:41:50 +0200 Subject: [PATCH 214/234] fix(emqx_trace_SUITE:t_base_create_delete): broken test case --- apps/emqx/src/emqx_trace/emqx_trace.erl | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/apps/emqx/src/emqx_trace/emqx_trace.erl b/apps/emqx/src/emqx_trace/emqx_trace.erl index 9e57b778a..988c2b808 100644 --- a/apps/emqx/src/emqx_trace/emqx_trace.erl +++ b/apps/emqx/src/emqx_trace/emqx_trace.erl @@ -255,9 +255,11 @@ format(Traces) -> fun(Trace0 = #?TRACE{}) -> [_ | Values] = tuple_to_list(Trace0), Map0 = maps:from_list(lists:zip(Fields, Values)), - Extra = maps:get(extra, Map0, #{}), - Formatter = maps:get(formatter, Extra, text), - Map0#{formatter => Formatter} + Extra0 = maps:get(extra, Map0, #{}), + Formatter = maps:get(formatter, Extra0, text), + Map1 = Map0#{formatter => Formatter}, + Extra1 = maps:remove(formatter, Extra0), + maps:put(extra, Extra1, Map1) end, Traces ). From 2890bc26199dbb395a88e2c7c593373362f25f04 Mon Sep 17 00:00:00 2001 From: Kjell Winblad Date: Fri, 19 Apr 2024 17:05:34 +0200 Subject: [PATCH 215/234] fix(tracing): remove internal extra field from the trace config This commit removes the internal extra field from the trace config structure exposed to the user via the HTTP API. --- apps/emqx/src/emqx_trace/emqx_trace.erl | 7 +++---- apps/emqx/test/emqx_trace_SUITE.erl | 1 - 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/apps/emqx/src/emqx_trace/emqx_trace.erl b/apps/emqx/src/emqx_trace/emqx_trace.erl index 988c2b808..7bbe59b2b 100644 --- a/apps/emqx/src/emqx_trace/emqx_trace.erl +++ b/apps/emqx/src/emqx_trace/emqx_trace.erl @@ -255,11 +255,10 @@ format(Traces) -> fun(Trace0 = #?TRACE{}) -> [_ | Values] = tuple_to_list(Trace0), Map0 = maps:from_list(lists:zip(Fields, Values)), - Extra0 = maps:get(extra, Map0, #{}), - Formatter = maps:get(formatter, Extra0, text), + Extra = maps:get(extra, Map0, #{}), + Formatter = maps:get(formatter, Extra, text), Map1 = Map0#{formatter => Formatter}, - Extra1 = maps:remove(formatter, Extra0), - maps:put(extra, Extra1, Map1) + maps:remove(extra, Map1) end, Traces ). diff --git a/apps/emqx/test/emqx_trace_SUITE.erl b/apps/emqx/test/emqx_trace_SUITE.erl index 4de9d09c0..ad2991445 100644 --- a/apps/emqx/test/emqx_trace_SUITE.erl +++ b/apps/emqx/test/emqx_trace_SUITE.erl @@ -96,7 +96,6 @@ t_base_create_delete(_Config) -> start_at => Now, end_at => Now + 30 * 60, payload_encode => text, - extra => #{}, formatter => text } ], From 43f8346c0062c7d16798ffafab6d6aeabc1d1f95 Mon Sep 17 00:00:00 2001 From: Andrew Mayorov Date: Fri, 19 Apr 2024 18:52:33 +0200 Subject: [PATCH 216/234] fix(dssnap): ensure idempotent write of empty chunks --- apps/emqx_durable_storage/src/emqx_ds_storage_snapshot.erl | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/apps/emqx_durable_storage/src/emqx_ds_storage_snapshot.erl b/apps/emqx_durable_storage/src/emqx_ds_storage_snapshot.erl index 74459893b..86648ed58 100644 --- a/apps/emqx_durable_storage/src/emqx_ds_storage_snapshot.erl +++ b/apps/emqx_durable_storage/src/emqx_ds_storage_snapshot.erl @@ -267,7 +267,9 @@ new_writer_file(#writer{dirpath = DirPath}, RelPath) -> write_chunk_file(WFile0 = #wfile{fd = IoDev, pos = Pos, abspath = AbsPath}, Pos, More, Chunk) -> ChunkSize = byte_size(Chunk), - case file:write(IoDev, Chunk) of + case (ChunkSize > 0) andalso file:write(IoDev, Chunk) of + false -> + WFile0; ok -> WFile1 = WFile0#wfile{pos = Pos + ChunkSize}, case More of From 95f3e49edb586c771b83c27a34be73f62a81bcf5 Mon Sep 17 00:00:00 2001 From: JianBo He Date: Sat, 20 Apr 2024 08:33:55 +0800 Subject: [PATCH 217/234] fix(stomp): pass the Content-Type from the MQTT message --- apps/emqx_gateway_stomp/src/emqx_gateway_stomp.app.src | 2 +- apps/emqx_gateway_stomp/src/emqx_stomp_channel.erl | 9 ++++++++- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/apps/emqx_gateway_stomp/src/emqx_gateway_stomp.app.src b/apps/emqx_gateway_stomp/src/emqx_gateway_stomp.app.src index 08214aee2..c7c9b6143 100644 --- a/apps/emqx_gateway_stomp/src/emqx_gateway_stomp.app.src +++ b/apps/emqx_gateway_stomp/src/emqx_gateway_stomp.app.src @@ -1,7 +1,7 @@ %% -*- mode: erlang -*- {application, emqx_gateway_stomp, [ {description, "Stomp Gateway"}, - {vsn, "0.1.5"}, + {vsn, "0.1.6"}, {registered, []}, {applications, [kernel, stdlib, emqx, emqx_gateway]}, {env, []}, diff --git a/apps/emqx_gateway_stomp/src/emqx_stomp_channel.erl b/apps/emqx_gateway_stomp/src/emqx_stomp_channel.erl index 20d769378..71458f15e 100644 --- a/apps/emqx_gateway_stomp/src/emqx_stomp_channel.erl +++ b/apps/emqx_gateway_stomp/src/emqx_stomp_channel.erl @@ -1039,7 +1039,7 @@ handle_deliver( {<<"subscription">>, Id}, {<<"message-id">>, next_msgid()}, {<<"destination">>, emqx_message:topic(NMessage)}, - {<<"content-type">>, <<"text/plain">>} + {<<"content-type">>, content_type_from_mqtt_message(NMessage)} ], Headers1 = case Ack of @@ -1080,6 +1080,13 @@ handle_deliver( ), {ok, [{outgoing, lists:reverse(Frames0)}], Channel}. +content_type_from_mqtt_message(Message) -> + Properties = emqx_message:get_header(properties, Message, #{}), + case maps:get('Content-Type', Properties, undefined) of + undefined -> <<"text/plain">>; + ContentType -> ContentType + end. + %%-------------------------------------------------------------------- %% Handle timeout %%-------------------------------------------------------------------- From aaf79539689e6ee3593ec068e21c2a7b347f7b92 Mon Sep 17 00:00:00 2001 From: JianBo He Date: Sat, 20 Apr 2024 08:31:10 +0800 Subject: [PATCH 218/234] chore: apply suggestions from code review Co-authored-by: Zaiming (Stone) Shi --- .../src/emqx_gateway_api_listeners.erl | 24 +++++++++---------- changes/ee/fix-12892.md | 4 ++-- 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/apps/emqx_gateway/src/emqx_gateway_api_listeners.erl b/apps/emqx_gateway/src/emqx_gateway_api_listeners.erl index aee04969b..30e9762e4 100644 --- a/apps/emqx_gateway/src/emqx_gateway_api_listeners.erl +++ b/apps/emqx_gateway/src/emqx_gateway_api_listeners.erl @@ -775,9 +775,9 @@ examples_listener() -> <<"tlsv1.1">>, <<"tlsv1">> ], - cacertfile => <<"etc/certs/cacert.pem">>, - certfile => <<"etc/certs/cert.pem">>, - keyfile => <<"etc/certs/key.pem">>, + cacertfile => <<"${EMQX_ETC_DIR}/certs/cacert.pem">>, + certfile => <<"${EMQX_ETC_DIR}/certs/cert.pem">>, + keyfile => <<"${EMQX_ETC_DIR}/certs/key.pem">>, verify => <<"verify_none">>, fail_if_no_peer_cert => false }, @@ -821,9 +821,9 @@ examples_listener() -> dtls_options => #{ versions => [<<"dtlsv1.2">>, <<"dtlsv1">>], - cacertfile => <<"etc/certs/cacert.pem">>, - certfile => <<"etc/certs/cert.pem">>, - keyfile => <<"etc/certs/key.pem">>, + cacertfile => <<"${EMQX_ETC_DIR}/certs/cacert.pem">>, + certfile => <<"${EMQX_ETC_DIR}/certs/cert.pem">>, + keyfile => <<"${EMQX_ETC_DIR}/certs/key.pem">>, verify => <<"verify_none">>, fail_if_no_peer_cert => false }, @@ -848,9 +848,9 @@ examples_listener() -> dtls_options => #{ versions => [<<"dtlsv1.2">>, <<"dtlsv1">>], - cacertfile => <<"etc/certs/cacert.pem">>, - certfile => <<"etc/certs/cert.pem">>, - keyfile => <<"etc/certs/key.pem">>, + cacertfile => <<"${EMQX_ETC_DIR}/certs/cacert.pem">>, + certfile => <<"${EMQX_ETC_DIR}/certs/cert.pem">>, + keyfile => <<"${EMQX_ETC_DIR}/certs/key.pem">>, verify => <<"verify_none">>, user_lookup_fun => <<"emqx_tls_psk:lookup">>, ciphers => @@ -951,9 +951,9 @@ examples_listener() -> <<"tlsv1.1">>, <<"tlsv1">> ], - cacertfile => <<"etc/certs/cacert.pem">>, - certfile => <<"etc/certs/cert.pem">>, - keyfile => <<"etc/certs/key.pem">>, + cacertfile => <<"${EMQX_ETC_DIR}/certs/cacert.pem">>, + certfile => <<"${EMQX_ETC_DIR}/certs/cert.pem">>, + keyfile => <<"${EMQX_ETC_DIR}/certs/key.pem">>, verify => <<"verify_none">>, fail_if_no_peer_cert => false }, diff --git a/changes/ee/fix-12892.md b/changes/ee/fix-12892.md index 47463925b..45fd5c825 100644 --- a/changes/ee/fix-12892.md +++ b/changes/ee/fix-12892.md @@ -1,3 +1,3 @@ -Fix a logical error in OCPP gateway's handling of downstream BootNotification. +Fix an error in OCPP gateway's handling of downstream BootNotification. -Fix the `gateways/ocpp/listeners` endpoint to return the correct current connection number. +Fix the `gateways/ocpp/listeners` endpoint to return the correct number of current connections. From 5520e54147b5cf69532fd0f7d7dd65bbf01fcca0 Mon Sep 17 00:00:00 2001 From: JianBo He Date: Sat, 20 Apr 2024 08:59:36 +0800 Subject: [PATCH 219/234] chore: add tests --- .../test/emqx_stomp_SUITE.erl | 61 +++++++++++++++++++ 1 file changed, 61 insertions(+) diff --git a/apps/emqx_gateway_stomp/test/emqx_stomp_SUITE.erl b/apps/emqx_gateway_stomp/test/emqx_stomp_SUITE.erl index 44c498405..64d95dc42 100644 --- a/apps/emqx_gateway_stomp/test/emqx_stomp_SUITE.erl +++ b/apps/emqx_gateway_stomp/test/emqx_stomp_SUITE.erl @@ -289,6 +289,67 @@ t_subscribe_inuse(_) -> with_connection(TopicIdInuseViaHttp), with_connection(SubscriptionInuseViaHttp). +t_receive_from_mqtt_publish(_) -> + with_connection(fun(Sock) -> + ok = send_connection_frame(Sock, <<"guest">>, <<"guest">>), + ?assertMatch({ok, #stomp_frame{command = <<"CONNECTED">>}}, recv_a_frame(Sock)), + + ok = send_subscribe_frame(Sock, 0, <<"/queue/foo">>), + ?assertMatch({ok, #stomp_frame{command = <<"RECEIPT">>}}, recv_a_frame(Sock)), + + %% send mqtt publish with content-type + Msg = emqx_message:make( + _From = from_testsuite, + _QoS = 1, + _Topic = <<"/queue/foo">>, + _Payload = <<"hello">>, + _Flags = #{}, + _Headers = #{properties => #{'Content-Type' => <<"application/json">>}} + ), + emqx:publish(Msg), + + {ok, Frame} = recv_a_frame(Sock), + ?assertEqual( + <<"application/json">>, + proplists:get_value(<<"content-type">>, Frame#stomp_frame.headers) + ), + + ?assertMatch( + #stomp_frame{ + command = <<"MESSAGE">>, + headers = _, + body = <<"hello">> + }, + Frame + ), + lists:foreach( + fun({Key, Val}) -> + Val = proplists:get_value(Key, Frame#stomp_frame.headers) + end, + [ + {<<"destination">>, <<"/queue/foo">>}, + {<<"subscription">>, <<"0">>} + ] + ), + + %% assert subscription stats + [ClientInfo1] = clients(), + ?assertMatch(#{subscriptions_cnt := 1}, ClientInfo1), + + %% Unsubscribe + ok = send_unsubscribe_frame(Sock, 0), + ?assertMatch({ok, #stomp_frame{command = <<"RECEIPT">>}}, recv_a_frame(Sock)), + + %% assert subscription stats + [ClientInfo2] = clients(), + ?assertMatch(#{subscriptions_cnt := 0}, ClientInfo2), + + ok = send_message_frame(Sock, <<"/queue/foo">>, <<"You will not receive this msg">>), + ?assertMatch({ok, #stomp_frame{command = <<"RECEIPT">>}}, recv_a_frame(Sock)), + + {error, timeout} = gen_tcp:recv(Sock, 0, 500) + end). + t_transaction(_) -> with_connection(fun(Sock) -> gen_tcp:send( From 2e21bc51806903657ead4196c387dd2848c560c9 Mon Sep 17 00:00:00 2001 From: JianBo He Date: Sat, 20 Apr 2024 09:03:13 +0800 Subject: [PATCH 220/234] chore: update changes --- changes/ce/fix-12902.md | 1 + 1 file changed, 1 insertion(+) create mode 100644 changes/ce/fix-12902.md diff --git a/changes/ce/fix-12902.md b/changes/ce/fix-12902.md new file mode 100644 index 000000000..83409ee6d --- /dev/null +++ b/changes/ce/fix-12902.md @@ -0,0 +1 @@ +Pass the Content-type of MQTT message to the Stomp message. From 4c76a2574d636c7167616fa93ae9ebea04db1a12 Mon Sep 17 00:00:00 2001 From: ieQu1 <99872536+ieQu1@users.noreply.github.com> Date: Sun, 21 Apr 2024 14:20:59 +0200 Subject: [PATCH 221/234] fix(ds): Fix egress flush condition --- .../src/emqx_ds_replication_layer_egress.erl | 24 +++++++++---------- 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/apps/emqx_durable_storage/src/emqx_ds_replication_layer_egress.erl b/apps/emqx_durable_storage/src/emqx_ds_replication_layer_egress.erl index 4122d937d..9201ccf04 100644 --- a/apps/emqx_durable_storage/src/emqx_ds_replication_layer_egress.erl +++ b/apps/emqx_durable_storage/src/emqx_ds_replication_layer_egress.erl @@ -127,7 +127,7 @@ init([DB, Shard]) -> metrics_id = MetricsId, queue = queue:new() }, - {ok, start_timer(S)}. + {ok, S}. handle_call( #enqueue_req{ @@ -195,7 +195,6 @@ enqueue( true -> %% Adding this batch would cause buffer to overflow. Flush %% it now, and retry: - cancel_timer(S0), S1 = flush(S0), enqueue(Sync, Atomic, Msgs, BatchSize, BatchBytes, S1); false -> @@ -203,12 +202,11 @@ enqueue( %% entirety: Q1 = lists:foldl(fun queue:in/2, Q0, Msgs), S1 = S0#s{n = NMsgs, n_bytes = NBytes, queue = Q1}, - case NMsgs >= NMax orelse NBytes >= NBytes of + case NMsgs >= NMax orelse NBytes >= NBytesMax of true -> - cancel_timer(S1), flush(S1); false -> - S1 + ensure_timer(S1) end end. @@ -216,7 +214,7 @@ enqueue( -define(COOLDOWN_MAX, 5000). flush(S) -> - start_timer(do_flush(S)). + do_flush(cancel_timer(S)). do_flush(S0 = #s{n = 0}) -> S0; @@ -372,16 +370,18 @@ compose_errors({error, recoverable, _}, {error, unrecoverable, Err}) -> compose_errors(ErrAcc, _Err) -> ErrAcc. -start_timer(S) -> +ensure_timer(S = #s{tref = undefined}) -> Interval = application:get_env(emqx_durable_storage, egress_flush_interval, 100), Tref = erlang:send_after(Interval, self(), ?flush), - S#s{tref = Tref}. + S#s{tref = Tref}; +ensure_timer(S) -> + S. -cancel_timer(#s{tref = undefined}) -> - ok; -cancel_timer(#s{tref = TRef}) -> +cancel_timer(S = #s{tref = undefined}) -> + S; +cancel_timer(S = #s{tref = TRef}) -> _ = erlang:cancel_timer(TRef), - ok. + S#s{tref = undefined}. %% @doc Return approximate size of the MQTT message (it doesn't take %% all things into account, for example headers and extras) From a1e85e3c59e31a607e2a7cabeb5281f578d79008 Mon Sep 17 00:00:00 2001 From: firest Date: Mon, 22 Apr 2024 13:09:34 +0800 Subject: [PATCH 222/234] fix(dynamo): Added missing keys for DynamoDB --- .../src/emqx_bridge_dynamo.erl | 29 ++++++++- .../src/emqx_bridge_dynamo_connector.erl | 59 +++++++++++++++---- .../test/emqx_bridge_dynamo_SUITE.erl | 29 +++++++-- rel/i18n/emqx_bridge_dynamo.hocon | 5 ++ rel/i18n/emqx_bridge_dynamo_connector.hocon | 3 + 5 files changed, 107 insertions(+), 18 deletions(-) diff --git a/apps/emqx_bridge_dynamo/src/emqx_bridge_dynamo.erl b/apps/emqx_bridge_dynamo/src/emqx_bridge_dynamo.erl index d568fee25..8dafa3922 100644 --- a/apps/emqx_bridge_dynamo/src/emqx_bridge_dynamo.erl +++ b/apps/emqx_bridge_dynamo/src/emqx_bridge_dynamo.erl @@ -87,6 +87,7 @@ connector_values() -> <<"url">> => <<"http://127.0.0.1:8000">>, <<"aws_access_key_id">> => <<"root">>, <<"aws_secret_access_key">> => <<"******">>, + <<"region">> => <<"us-west-2">>, <<"pool_size">> => 8, <<"resource_opts">> => #{ @@ -113,7 +114,8 @@ action_values() -> <<"parameters">> => #{ <<"table">> => <<"mqtt_msg">>, - <<"template">> => ?DEFAULT_TEMPLATE + <<"template">> => ?DEFAULT_TEMPLATE, + <<"hash_key">> => <<"clientid">> } }. @@ -160,7 +162,19 @@ fields(dynamo_action) -> ); fields(action_parameters) -> Parameters = - [{template, template_field_schema()}] ++ emqx_bridge_dynamo_connector:fields(config), + [ + {template, template_field_schema()}, + {hash_key, + mk( + binary(), + #{desc => ?DESC("hash_key"), required => true} + )}, + {range_key, + mk( + binary(), + #{desc => ?DESC("range_key"), required => false} + )} + ] ++ emqx_bridge_dynamo_connector:fields(config), lists:foldl( fun(Key, Acc) -> proplists:delete(Key, Acc) @@ -168,6 +182,7 @@ fields(action_parameters) -> Parameters, [ url, + region, aws_access_key_id, aws_secret_access_key, pool_size, @@ -199,6 +214,16 @@ fields("config") -> binary(), #{desc => ?DESC("local_topic"), default => undefined} )}, + {hash_key, + mk( + binary(), + #{desc => ?DESC("hash_key"), required => true} + )}, + {range_key, + mk( + binary(), + #{desc => ?DESC("range_key"), required => false} + )}, {resource_opts, mk( ref(?MODULE, "creation_opts"), diff --git a/apps/emqx_bridge_dynamo/src/emqx_bridge_dynamo_connector.erl b/apps/emqx_bridge_dynamo/src/emqx_bridge_dynamo_connector.erl index 36f54a63f..93b7e3529 100644 --- a/apps/emqx_bridge_dynamo/src/emqx_bridge_dynamo_connector.erl +++ b/apps/emqx_bridge_dynamo/src/emqx_bridge_dynamo_connector.erl @@ -45,6 +45,7 @@ roots() -> fields(config) -> [ {url, mk(binary(), #{required => true, desc => ?DESC("url")})}, + {region, mk(binary(), #{required => true, desc => ?DESC("region")})}, {table, mk(binary(), #{required => true, desc => ?DESC("table")})}, {aws_access_key_id, mk( @@ -102,6 +103,12 @@ on_start( pool_name => InstanceId, installed_channels => #{} }, + case Config of + #{region := Region} -> + application:set_env(erlcloud, aws_region, to_str(Region)); + _ -> + ok + end, case emqx_resource_pool:start(InstanceId, ?MODULE, Options) of ok -> {ok, State}; @@ -126,12 +133,20 @@ on_add_channel( create_channel_state( #{parameters := Conf} = _ChannelConfig ) -> - #{ - table := Table - } = Conf, + Keys = maps:with([hash_key, range_key], Conf), + Keys1 = maps:fold( + fun(K, V, Acc) -> + Acc#{K := erlang:binary_to_atom(V)} + end, + Keys, + Keys + ), + + Base = maps:without([template, hash_key, range_key], Conf), + Base1 = maps:merge(Base, Keys1), + Templates = parse_template_from_conf(Conf), - State = #{ - table => Table, + State = Base1#{ templates => Templates }, {ok, State}. @@ -232,11 +247,16 @@ do_query( templates := Templates } = ChannelState, Result = - ecpool:pick_and_do( - PoolName, - {emqx_bridge_dynamo_connector_client, query, [Table, QueryTuple, Templates]}, - no_handover - ), + case ensuare_dynamo_keys(Query, ChannelState) of + true -> + ecpool:pick_and_do( + PoolName, + {emqx_bridge_dynamo_connector_client, query, [Table, QueryTuple, Templates]}, + no_handover + ); + _ -> + {error, missing_filter_or_range_key} + end, case Result of {error, Reason} -> @@ -288,6 +308,25 @@ get_query_tuple([{_ChannelId, {_QueryType, _Data}} | _]) -> get_query_tuple([InsertQuery | _]) -> get_query_tuple(InsertQuery). +ensuare_dynamo_keys({_, Data} = Query, State) when is_map(Data) -> + ensuare_dynamo_keys([Query], State); +ensuare_dynamo_keys([{_, Data} | _] = Queries, State) when is_map(Data) -> + Keys = maps:to_list(maps:with([hash_key, range_key], State)), + lists:all( + fun({_, Query}) -> + lists:all( + fun({_, Key}) -> + maps:is_key(Key, Query) + end, + Keys + ) + end, + Queries + ); +%% this is not a insert query +ensuare_dynamo_keys(_Query, _State) -> + true. + connect(Opts) -> Config = proplists:get_value(config, Opts), {ok, _Pid} = emqx_bridge_dynamo_connector_client:start_link(Config). diff --git a/apps/emqx_bridge_dynamo/test/emqx_bridge_dynamo_SUITE.erl b/apps/emqx_bridge_dynamo/test/emqx_bridge_dynamo_SUITE.erl index dab7b21f0..ff3d5824e 100644 --- a/apps/emqx_bridge_dynamo/test/emqx_bridge_dynamo_SUITE.erl +++ b/apps/emqx_bridge_dynamo/test/emqx_bridge_dynamo_SUITE.erl @@ -16,6 +16,7 @@ -define(TABLE_BIN, to_bin(?TABLE)). -define(ACCESS_KEY_ID, "root"). -define(SECRET_ACCESS_KEY, "public"). +-define(REGION, "us-west-2"). -define(HOST, "dynamo"). -define(PORT, 8000). -define(SCHEMA, "http://"). @@ -177,7 +178,9 @@ dynamo_config(BridgeType, Config) -> "bridges.~s.~s {" "\n enable = true" "\n url = \"http://~s:~p\"" + "\n region = ~p" "\n table = ~p" + "\n hash_key =\"clientid\"" "\n aws_access_key_id = ~p" "\n aws_secret_access_key = ~p" "\n resource_opts = {" @@ -191,6 +194,7 @@ dynamo_config(BridgeType, Config) -> Name, Host, Port, + ?REGION, ?TABLE, ?ACCESS_KEY_ID, %% NOTE: using file-based secrets with HOCON configs @@ -210,7 +214,8 @@ action_config(Config) -> <<"enable">> => true, <<"parameters">> => #{ - <<"table">> => ?TABLE + <<"table">> => ?TABLE, + <<"hash_key">> => <<"clientid">> }, <<"resource_opts">> => #{ @@ -234,6 +239,7 @@ connector_config(Config) -> <<"url">> => URL, <<"aws_access_key_id">> => ?ACCESS_KEY_ID, <<"aws_secret_access_key">> => AccessKey, + <<"region">> => ?REGION, <<"enable">> => true, <<"pool_size">> => 8, <<"resource_opts">> => @@ -355,7 +361,7 @@ t_setup_via_config_and_publish(Config) -> create_bridge(Config) ), MsgId = emqx_utils:gen_id(), - SentData = #{id => MsgId, payload => ?PAYLOAD}, + SentData = #{clientid => <<"clientid">>, id => MsgId, payload => ?PAYLOAD}, ?check_trace( begin ?wait_async_action( @@ -421,7 +427,7 @@ t_setup_via_http_api_and_publish(Config) -> create_bridge_http(PgsqlConfig) ), MsgId = emqx_utils:gen_id(), - SentData = #{id => MsgId, payload => ?PAYLOAD}, + SentData = #{clientid => <<"clientid">>, id => MsgId, payload => ?PAYLOAD}, ?check_trace( begin ?wait_async_action( @@ -486,7 +492,7 @@ t_write_failure(Config) -> #{?snk_kind := resource_connected_enter}, 20_000 ), - SentData = #{id => emqx_utils:gen_id(), payload => ?PAYLOAD}, + SentData = #{clientid => <<"clientid">>, id => emqx_utils:gen_id(), payload => ?PAYLOAD}, emqx_common_test_helpers:with_failure(down, ProxyName, ProxyHost, ProxyPort, fun() -> ?assertMatch( {error, {resource_error, #{reason := timeout}}}, send_message(Config, SentData) @@ -513,12 +519,21 @@ t_simple_query(Config) -> ok. t_missing_data(Config) -> + ?assertMatch( + {ok, _}, + create_bridge(Config) + ), + Result = send_message(Config, #{clientid => <<"clientid">>}), + ?assertMatch({error, {<<"ValidationException">>, <<>>}}, Result), + ok. + +t_missing_hash_key(Config) -> ?assertMatch( {ok, _}, create_bridge(Config) ), Result = send_message(Config, #{}), - ?assertMatch({error, {unrecoverable_error, {invalid_request, _}}}, Result), + ?assertMatch({error, missing_filter_or_range_key}, Result), ok. t_bad_parameter(Config) -> @@ -543,7 +558,9 @@ t_action_create_via_http(Config) -> emqx_bridge_v2_testlib:t_create_via_http(Config). t_action_sync_query(Config) -> - MakeMessageFun = fun() -> #{id => <<"the_message_id">>, payload => ?PAYLOAD} end, + MakeMessageFun = fun() -> + #{clientid => <<"clientid">>, id => <<"the_message_id">>, payload => ?PAYLOAD} + end, IsSuccessCheck = fun(Result) -> ?assertEqual({ok, []}, Result) end, TracePoint = dynamo_connector_query_return, emqx_bridge_v2_testlib:t_sync_query(Config, MakeMessageFun, IsSuccessCheck, TracePoint). diff --git a/rel/i18n/emqx_bridge_dynamo.hocon b/rel/i18n/emqx_bridge_dynamo.hocon index 0d3bcd3f9..31771832a 100644 --- a/rel/i18n/emqx_bridge_dynamo.hocon +++ b/rel/i18n/emqx_bridge_dynamo.hocon @@ -60,4 +60,9 @@ config_connector.desc: config_connector.label: """DynamoDB Connector Configuration""" +hash_key.desc: +"""DynamoDB Hash Key""" + +range_key.desc: +"""DynamoDB Range Key""" } diff --git a/rel/i18n/emqx_bridge_dynamo_connector.hocon b/rel/i18n/emqx_bridge_dynamo_connector.hocon index 7c37676b5..a956dfe30 100644 --- a/rel/i18n/emqx_bridge_dynamo_connector.hocon +++ b/rel/i18n/emqx_bridge_dynamo_connector.hocon @@ -18,6 +18,9 @@ table.desc: table.label: """Table """ +region.label: +"""Region of the AWS Dynamo""" + url.desc: """The url of DynamoDB endpoint.""" From 46f339dfabe415e61bf80536074d8057c9c36dfc Mon Sep 17 00:00:00 2001 From: firest Date: Thu, 18 Apr 2024 17:06:09 +0800 Subject: [PATCH 223/234] chore: updte change log --- .../emqx_bridge_dynamo/src/emqx_bridge_dynamo_connector.erl | 2 +- changes/ee/fix-12895.en.md | 6 ++++++ rel/i18n/emqx_bridge_dynamo_connector.hocon | 4 ++-- 3 files changed, 9 insertions(+), 3 deletions(-) create mode 100644 changes/ee/fix-12895.en.md diff --git a/apps/emqx_bridge_dynamo/src/emqx_bridge_dynamo_connector.erl b/apps/emqx_bridge_dynamo/src/emqx_bridge_dynamo_connector.erl index 93b7e3529..372472dda 100644 --- a/apps/emqx_bridge_dynamo/src/emqx_bridge_dynamo_connector.erl +++ b/apps/emqx_bridge_dynamo/src/emqx_bridge_dynamo_connector.erl @@ -136,7 +136,7 @@ create_channel_state( Keys = maps:with([hash_key, range_key], Conf), Keys1 = maps:fold( fun(K, V, Acc) -> - Acc#{K := erlang:binary_to_atom(V)} + Acc#{K := erlang:binary_to_existing_atom(V)} end, Keys, Keys diff --git a/changes/ee/fix-12895.en.md b/changes/ee/fix-12895.en.md new file mode 100644 index 000000000..dbfd52e2b --- /dev/null +++ b/changes/ee/fix-12895.en.md @@ -0,0 +1,6 @@ +Complemented some necessary but missed keys for the DynamoDB connector and the action. + +## Breaking changes +* The old configuration no longer works, although it actually didn't work properly until this fix. +* For DynamoDB connector, a new key `region` is necessary. +* `hash_key` and `range_key` are now supported in the DynamoDB action, and `hash_key` is required. diff --git a/rel/i18n/emqx_bridge_dynamo_connector.hocon b/rel/i18n/emqx_bridge_dynamo_connector.hocon index a956dfe30..18c3670aa 100644 --- a/rel/i18n/emqx_bridge_dynamo_connector.hocon +++ b/rel/i18n/emqx_bridge_dynamo_connector.hocon @@ -18,8 +18,8 @@ table.desc: table.label: """Table """ -region.label: -"""Region of the AWS Dynamo""" +region.desc: +"""Region of AWS Dynamo""" url.desc: """The url of DynamoDB endpoint.""" From d5cdc07eabd31eacc81083728b158a437077653a Mon Sep 17 00:00:00 2001 From: firest Date: Mon, 22 Apr 2024 13:14:11 +0800 Subject: [PATCH 224/234] feat(rocketmq): add support for namespace and key dispatch strategy --- .../src/emqx_bridge_rocketmq.erl | 16 +++-- .../src/emqx_bridge_rocketmq_connector.erl | 69 +++++++++++++++---- rel/i18n/emqx_bridge_rocketmq.hocon | 3 + rel/i18n/emqx_bridge_rocketmq_connector.hocon | 6 ++ 4 files changed, 78 insertions(+), 16 deletions(-) diff --git a/apps/emqx_bridge_rocketmq/src/emqx_bridge_rocketmq.erl b/apps/emqx_bridge_rocketmq/src/emqx_bridge_rocketmq.erl index 750993e9a..f7e6d9b57 100644 --- a/apps/emqx_bridge_rocketmq/src/emqx_bridge_rocketmq.erl +++ b/apps/emqx_bridge_rocketmq/src/emqx_bridge_rocketmq.erl @@ -163,10 +163,12 @@ fields(action_parameters) -> {template, mk( emqx_schema:template(), - #{ - desc => ?DESC("template"), - default => ?DEFAULT_TEMPLATE - } + #{desc => ?DESC("template"), default => ?DEFAULT_TEMPLATE} + )}, + {strategy, + mk( + hoconsc:union([roundrobin, binary()]), + #{desc => ?DESC("strategy"), default => roundrobin} )} ] ++ emqx_bridge_rocketmq_connector:fields(config), lists:foldl( @@ -176,6 +178,7 @@ fields(action_parameters) -> Parameters, [ servers, + namespace, pool_size, auto_reconnect, access_key, @@ -215,6 +218,11 @@ fields("config") -> mk( binary(), #{desc => ?DESC("local_topic"), required => false} + )}, + {strategy, + mk( + hoconsc:union([roundrobin, binary()]), + #{desc => ?DESC("strategy"), default => roundrobin} )} ] ++ emqx_resource_schema:fields("resource_opts") ++ emqx_bridge_rocketmq_connector:fields(config); diff --git a/apps/emqx_bridge_rocketmq/src/emqx_bridge_rocketmq_connector.erl b/apps/emqx_bridge_rocketmq/src/emqx_bridge_rocketmq_connector.erl index 0bea5a8ff..0141c3fd0 100644 --- a/apps/emqx_bridge_rocketmq/src/emqx_bridge_rocketmq_connector.erl +++ b/apps/emqx_bridge_rocketmq/src/emqx_bridge_rocketmq_connector.erl @@ -45,6 +45,11 @@ roots() -> fields(config) -> [ {servers, servers()}, + {namespace, + mk( + binary(), + #{required => false, desc => ?DESC(namespace)} + )}, {topic, mk( emqx_schema:template(), @@ -107,7 +112,7 @@ on_start( ), ClientId = client_id(InstanceId), ACLInfo = acl_info(AccessKey, SecretKey, SecurityToken), - ClientCfg = #{acl_info => ACLInfo}, + ClientCfg = namespace(#{acl_info => ACLInfo}, Config), State = #{ client_id => ClientId, @@ -156,10 +161,12 @@ create_channel_state( TopicTks = emqx_placeholder:preproc_tmpl(Topic), ProducerOpts = make_producer_opts(Conf, ACLInfo), Templates = parse_template(Conf), + DispatchStrategy = parse_dispatch_strategy(Conf), State = #{ topic => Topic, topic_tokens => TopicTks, templates => Templates, + dispatch_strategy => DispatchStrategy, sync_timeout => SyncTimeout, acl_info => ACLInfo, producers_opts => ProducerOpts @@ -250,12 +257,13 @@ do_query( #{ topic_tokens := TopicTks, templates := Templates, + dispatch_strategy := DispatchStrategy, sync_timeout := RequestTimeout, producers_opts := ProducerOpts } = maps:get(ChannelId, Channels), TopicKey = get_topic_key(Query, TopicTks), - Data = apply_template(Query, Templates), + Data = apply_template(Query, Templates, DispatchStrategy), Result = safe_do_produce( InstanceId, QueryFunc, ClientId, TopicKey, Data, ProducerOpts, RequestTimeout @@ -315,24 +323,57 @@ parse_template([{Key, H} | T], Templates) -> parse_template([], Templates) -> Templates. +%% returns a procedure to generate the produce context +parse_dispatch_strategy(#{strategy := roundrobin}) -> + fun(_) -> + #{} + end; +parse_dispatch_strategy(#{strategy := Template}) -> + Tokens = emqx_placeholder:preproc_tmpl(Template), + fun(Msg) -> + #{ + key => + case emqx_placeholder:proc_tmpl(Tokens, Msg) of + <<"undefined">> -> + %% Since the key may be absent on some kinds of events (ex: + %% `topic' is absent in `client.disconnected'), and this key is + %% used for routing, we generate a random key when it's absent to + %% better distribute the load, effectively making it `random' + %% dispatch if the key is absent and we are using `key_dispatch'. + %% Otherwise, it'll be deterministic. + emqx_guid:gen(); + Key -> + Key + end + } + end. + get_topic_key({_, Msg}, TopicTks) -> emqx_placeholder:proc_tmpl(TopicTks, Msg); get_topic_key([Query | _], TopicTks) -> get_topic_key(Query, TopicTks). -apply_template({Key, Msg} = _Req, Templates) -> +%% return a message data and its context, +%% {binary(), rocketmq_producers:produce_context()}) +apply_template({Key, Msg} = _Req, Templates, DispatchStrategy) -> + { + case maps:get(Key, Templates, undefined) of + undefined -> + emqx_utils_json:encode(Msg); + Template -> + emqx_placeholder:proc_tmpl(Template, Msg) + end, + DispatchStrategy(Msg) + }; +apply_template([{Key, _} | _] = Reqs, Templates, DispatchStrategy) -> case maps:get(Key, Templates, undefined) of undefined -> - emqx_utils_json:encode(Msg); + [{emqx_utils_json:encode(Msg), DispatchStrategy(Msg)} || {_, Msg} <- Reqs]; Template -> - emqx_placeholder:proc_tmpl(Template, Msg) - end; -apply_template([{Key, _} | _] = Reqs, Templates) -> - case maps:get(Key, Templates, undefined) of - undefined -> - [emqx_utils_json:encode(Msg) || {_, Msg} <- Reqs]; - Template -> - [emqx_placeholder:proc_tmpl(Template, Msg) || {_, Msg} <- Reqs] + [ + {emqx_placeholder:proc_tmpl(Template, Msg), DispatchStrategy(Msg)} + || {_, Msg} <- Reqs + ] end. client_id(ResourceId) -> @@ -377,6 +418,10 @@ acl_info(AccessKey, SecretKey, SecurityToken) when is_binary(AccessKey) -> acl_info(_, _, _) -> #{}. +namespace(ClientCfg, Config) -> + Namespace = maps:get(namespace, Config, <<>>), + ClientCfg#{namespace => Namespace}. + create_producers_map(ClientId) -> _ = ets:new(ClientId, [public, named_table, {read_concurrency, true}]), ok. diff --git a/rel/i18n/emqx_bridge_rocketmq.hocon b/rel/i18n/emqx_bridge_rocketmq.hocon index b6bb3aad6..fe6ca8c8d 100644 --- a/rel/i18n/emqx_bridge_rocketmq.hocon +++ b/rel/i18n/emqx_bridge_rocketmq.hocon @@ -59,4 +59,7 @@ config_connector.desc: config_connector.label: """RocketMQ Client Configuration""" +strategy.desc: +"""Producer key dispatch strategy, the default is `roundrobin`, also supports placeholders, such as: `clientid`, `messageid`, `username`.""" + } diff --git a/rel/i18n/emqx_bridge_rocketmq_connector.hocon b/rel/i18n/emqx_bridge_rocketmq_connector.hocon index b13e015c2..b65ce5405 100644 --- a/rel/i18n/emqx_bridge_rocketmq_connector.hocon +++ b/rel/i18n/emqx_bridge_rocketmq_connector.hocon @@ -50,4 +50,10 @@ topic.desc: topic.label: """RocketMQ Topic""" +namespace.desc: +"""The namespace field MUST be set if you are using the RocketMQ service in +aliyun cloud and also the namespace is enabled, +or if you have configured a namespace in your RocketMQ server. +For RocketMQ in aliyun cloud, the namespace is the instance ID.""" + } From 617b2137b4f0bddde8eb60bd4e1d3f046d924fbf Mon Sep 17 00:00:00 2001 From: firest Date: Fri, 19 Apr 2024 15:16:56 +0800 Subject: [PATCH 225/234] chore: update changes --- changes/ee/feat-12899.en.md | 1 + scripts/spellcheck/dicts/emqx.txt | 1 + 2 files changed, 2 insertions(+) create mode 100644 changes/ee/feat-12899.en.md diff --git a/changes/ee/feat-12899.en.md b/changes/ee/feat-12899.en.md new file mode 100644 index 000000000..8d5b62bcc --- /dev/null +++ b/changes/ee/feat-12899.en.md @@ -0,0 +1 @@ +Added support for namespace and key dispatch strategy. diff --git a/scripts/spellcheck/dicts/emqx.txt b/scripts/spellcheck/dicts/emqx.txt index c7c266434..d68c85716 100644 --- a/scripts/spellcheck/dicts/emqx.txt +++ b/scripts/spellcheck/dicts/emqx.txt @@ -306,3 +306,4 @@ elasticsearch ElasticSearch doc_as_upsert upsert +aliyun From 5da9486b0651a6f5430bb7eead583e5ebd2cee07 Mon Sep 17 00:00:00 2001 From: zhongwencool Date: Mon, 22 Apr 2024 15:09:58 +0800 Subject: [PATCH 226/234] chore: add iotdb130 changelog --- changes/ee/feat-12898.en.md | 1 + 1 file changed, 1 insertion(+) create mode 100644 changes/ee/feat-12898.en.md diff --git a/changes/ee/feat-12898.en.md b/changes/ee/feat-12898.en.md new file mode 100644 index 000000000..67e5ea965 --- /dev/null +++ b/changes/ee/feat-12898.en.md @@ -0,0 +1 @@ +IoTDB bridge support for iotdb 1.3.0 and batch insert(batch_size/batch_time) options. From 6049b4966f3103522d9f96670964c44e46bedfaf Mon Sep 17 00:00:00 2001 From: zhongwencool Date: Mon, 22 Apr 2024 16:34:46 +0800 Subject: [PATCH 227/234] fix: bump escokd to 5.11.2 to handle udp_error/closed message --- apps/emqx/rebar.config | 2 +- changes/ee/fix-12909.en.md | 1 + mix.exs | 2 +- rebar.config | 2 +- 4 files changed, 4 insertions(+), 3 deletions(-) create mode 100644 changes/ee/fix-12909.en.md diff --git a/apps/emqx/rebar.config b/apps/emqx/rebar.config index 57c71e73b..b58734cf8 100644 --- a/apps/emqx/rebar.config +++ b/apps/emqx/rebar.config @@ -27,7 +27,7 @@ {lc, {git, "https://github.com/emqx/lc.git", {tag, "0.3.2"}}}, {gproc, {git, "https://github.com/emqx/gproc", {tag, "0.9.0.1"}}}, {cowboy, {git, "https://github.com/emqx/cowboy", {tag, "2.9.2"}}}, - {esockd, {git, "https://github.com/emqx/esockd", {tag, "5.11.1"}}}, + {esockd, {git, "https://github.com/emqx/esockd", {tag, "5.11.2"}}}, {ekka, {git, "https://github.com/emqx/ekka", {tag, "0.19.3"}}}, {gen_rpc, {git, "https://github.com/emqx/gen_rpc", {tag, "3.3.1"}}}, {hocon, {git, "https://github.com/emqx/hocon.git", {tag, "0.42.1"}}}, diff --git a/changes/ee/fix-12909.en.md b/changes/ee/fix-12909.en.md new file mode 100644 index 000000000..c400abcff --- /dev/null +++ b/changes/ee/fix-12909.en.md @@ -0,0 +1 @@ +Fixed UDP listener process handling on errors or closure, The fix ensures the UDP listener is cleanly stopped and restarted as needed if these error conditions occur. diff --git a/mix.exs b/mix.exs index 4f51d0108..bbb11cc52 100644 --- a/mix.exs +++ b/mix.exs @@ -53,7 +53,7 @@ defmodule EMQXUmbrella.MixProject do {:gproc, github: "emqx/gproc", tag: "0.9.0.1", override: true}, {:jiffy, github: "emqx/jiffy", tag: "1.0.6", override: true}, {:cowboy, github: "emqx/cowboy", tag: "2.9.2", override: true}, - {:esockd, github: "emqx/esockd", tag: "5.11.1", override: true}, + {:esockd, github: "emqx/esockd", tag: "5.11.2", override: true}, {:rocksdb, github: "emqx/erlang-rocksdb", tag: "1.8.0-emqx-2", override: true}, {:ekka, github: "emqx/ekka", tag: "0.19.3", override: true}, {:gen_rpc, github: "emqx/gen_rpc", tag: "3.3.1", override: true}, diff --git a/rebar.config b/rebar.config index 14608ec4c..e0f88893c 100644 --- a/rebar.config +++ b/rebar.config @@ -81,7 +81,7 @@ {gproc, {git, "https://github.com/emqx/gproc", {tag, "0.9.0.1"}}}, {jiffy, {git, "https://github.com/emqx/jiffy", {tag, "1.0.6"}}}, {cowboy, {git, "https://github.com/emqx/cowboy", {tag, "2.9.2"}}}, - {esockd, {git, "https://github.com/emqx/esockd", {tag, "5.11.1"}}}, + {esockd, {git, "https://github.com/emqx/esockd", {tag, "5.11.2"}}}, {rocksdb, {git, "https://github.com/emqx/erlang-rocksdb", {tag, "1.8.0-emqx-2"}}}, {ekka, {git, "https://github.com/emqx/ekka", {tag, "0.19.3"}}}, {gen_rpc, {git, "https://github.com/emqx/gen_rpc", {tag, "3.3.1"}}}, From e9d498dde2aae4181678f9e9052730f0de40c94f Mon Sep 17 00:00:00 2001 From: Kjell Winblad Date: Mon, 22 Apr 2024 13:51:48 +0200 Subject: [PATCH 228/234] feat(rule tracing): add rule trigger time meta data field Fixes: https://emqx.atlassian.net/browse/EMQX-12025 --- .../src/emqx_resource_buffer_worker.erl | 16 +++++++- .../src/emqx_rule_runtime.erl | 38 +++++++++++++------ .../emqx_rule_engine_api_rule_apply_SUITE.erl | 14 +++++++ 3 files changed, 54 insertions(+), 14 deletions(-) diff --git a/apps/emqx_resource/src/emqx_resource_buffer_worker.erl b/apps/emqx_resource/src/emqx_resource_buffer_worker.erl index 1cbcfe0b8..e35453c94 100644 --- a/apps/emqx_resource/src/emqx_resource_buffer_worker.erl +++ b/apps/emqx_resource/src/emqx_resource_buffer_worker.erl @@ -1163,6 +1163,7 @@ set_rule_id_trace_meta_data(Requests) when is_list(Requests) -> %% Get the rule ids from requests RuleIDs = lists:foldl(fun collect_rule_id/2, #{}, Requests), ClientIDs = lists:foldl(fun collect_client_id/2, #{}, Requests), + RuleTriggerTimes = lists:foldl(fun collect_rule_trigger_times/2, [], Requests), StopAfterRenderVal = case Requests of %% We know that the batch is not mixed since we prevent this by @@ -1173,7 +1174,10 @@ set_rule_id_trace_meta_data(Requests) when is_list(Requests) -> false end, logger:update_process_metadata(#{ - rule_ids => RuleIDs, client_ids => ClientIDs, stop_action_after_render => StopAfterRenderVal + rule_ids => RuleIDs, + client_ids => ClientIDs, + rule_trigger_times => RuleTriggerTimes, + stop_action_after_render => StopAfterRenderVal }), ok; set_rule_id_trace_meta_data(Request) -> @@ -1190,9 +1194,17 @@ collect_client_id(?QUERY(_, _, _, _, #{clientid := ClientId}), Acc) -> collect_client_id(?QUERY(_, _, _, _, _), Acc) -> Acc. +collect_rule_trigger_times(?QUERY(_, _, _, _, #{rule_trigger_time := Time}), Acc) -> + [Time | Acc]; +collect_rule_trigger_times(?QUERY(_, _, _, _, _), Acc) -> + Acc. + unset_rule_id_trace_meta_data() -> logger:update_process_metadata(#{ - rule_ids => #{}, client_ids => #{}, stop_action_after_render => false + rule_ids => #{}, + client_ids => #{}, + stop_action_after_render => false, + rule_trigger_times => [] }). %% action:kafka_producer:myproducer1:connector:kafka_producer:mykakfaclient1 diff --git a/apps/emqx_rule_engine/src/emqx_rule_runtime.erl b/apps/emqx_rule_engine/src/emqx_rule_runtime.erl index 3872fb973..f99341a9b 100644 --- a/apps/emqx_rule_engine/src/emqx_rule_runtime.erl +++ b/apps/emqx_rule_engine/src/emqx_rule_runtime.erl @@ -139,25 +139,35 @@ apply_rule(Rule = #{id := RuleID}, Columns, Envs) -> reset_process_trace_metadata(Columns) end. -set_process_trace_metadata(RuleID, #{clientid := ClientID}) -> +set_process_trace_metadata(RuleID, #{clientid := ClientID} = Columns) -> + logger:update_process_metadata(#{ + clientid => ClientID + }), + set_process_trace_metadata(RuleID, maps:remove(clientid, Columns)); +set_process_trace_metadata(RuleID, Columns) -> + EventTimestamp = + case Columns of + #{timestamp := Timestamp} -> + Timestamp; + _ -> + erlang:system_time(millisecond) + end, logger:update_process_metadata(#{ rule_id => RuleID, - clientid => ClientID - }); -set_process_trace_metadata(RuleID, _) -> - logger:update_process_metadata(#{ - rule_id => RuleID + rule_trigger_time => EventTimestamp }). reset_process_trace_metadata(#{clientid := _ClientID}) -> Meta = logger:get_process_metadata(), Meta1 = maps:remove(clientid, Meta), Meta2 = maps:remove(rule_id, Meta1), - logger:set_process_metadata(Meta2); + Meta3 = maps:remove(rule_trigger_time, Meta2), + logger:set_process_metadata(Meta3); reset_process_trace_metadata(_) -> Meta = logger:get_process_metadata(), Meta1 = maps:remove(rule_id, Meta), - logger:set_process_metadata(Meta1). + Meta2 = maps:remove(rule_trigger_time, Meta1), + logger:set_process_metadata(Meta2). do_apply_rule( #{ @@ -499,21 +509,25 @@ do_handle_action_get_trace_inc_metrics_context_unconditionally(Action, TraceMeta case TraceMeta of #{ rule_id := RuleID, - clientid := ClientID + clientid := ClientID, + rule_trigger_time := Timestamp } -> #{ rule_id => RuleID, clientid => ClientID, action_id => Action, - stop_action_after_render => StopAfterRender + stop_action_after_render => StopAfterRender, + rule_trigger_time => Timestamp }; #{ - rule_id := RuleID + rule_id := RuleID, + rule_trigger_time := Timestamp } -> #{ rule_id => RuleID, action_id => Action, - stop_action_after_render => StopAfterRender + stop_action_after_render => StopAfterRender, + rule_trigger_time => Timestamp } end. diff --git a/apps/emqx_rule_engine/test/emqx_rule_engine_api_rule_apply_SUITE.erl b/apps/emqx_rule_engine/test/emqx_rule_engine_api_rule_apply_SUITE.erl index b1e533d31..52fa1a2e5 100644 --- a/apps/emqx_rule_engine/test/emqx_rule_engine_api_rule_apply_SUITE.erl +++ b/apps/emqx_rule_engine/test/emqx_rule_engine_api_rule_apply_SUITE.erl @@ -159,6 +159,20 @@ basic_apply_rule_test_helper(Config, TraceType, StopAfterRender) -> end ) end, + %% Check that rule_trigger_time meta field is present in all log entries + Log0 = read_rule_trace_file(TraceName, TraceType, Now), + Log1 = binary:split(Log0, <<"\n">>, [global, trim]), + Log2 = lists:join(<<",\n">>, Log1), + Log3 = iolist_to_binary(["[", Log2, "]"]), + {ok, LogEntries} = emqx_utils_json:safe_decode(Log3, [return_maps]), + [#{<<"meta">> := #{<<"rule_trigger_time">> := RuleTriggerTime}} | _] = LogEntries, + [ + ?assert( + (maps:get(<<"rule_trigger_time">>, Meta, no_time) =:= RuleTriggerTime) orelse + (lists:member(RuleTriggerTime, maps:get(<<"rule_trigger_times">>, Meta, []))) + ) + || #{<<"meta">> := Meta} <- LogEntries + ], emqx_trace:delete(TraceName), ok. From f5169e4e21d94ab25bd330ff246032f73cd31fa5 Mon Sep 17 00:00:00 2001 From: Ivan Dyachkov Date: Mon, 22 Apr 2024 14:03:03 +0200 Subject: [PATCH 229/234] chore: add Andrew (keynslug) to the review board --- .github/CODEOWNERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index a9474f01a..95c219b88 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -5,6 +5,7 @@ ## HJianBo ## id ## ieQu1 +## keynslug ## qzhuyan ## savonarola ## terry-xiaoyu @@ -20,7 +21,6 @@ /apps/emqx_dashboard_rbac/ @emqx/emqx-review-board @lafirest /apps/emqx_dashboard_sso/ @emqx/emqx-review-board @JimMoen @lafirest /apps/emqx_exhook/ @emqx/emqx-review-board @JimMoen -/apps/emqx_ft/ @emqx/emqx-review-board @keynslug /apps/emqx_gateway/ @emqx/emqx-review-board @lafirest /apps/emqx_management/ @emqx/emqx-review-board @lafirest /apps/emqx_opentelemetry @emqx/emqx-review-board @SergeTupchiy From e4c3283c9c9cdf9c3de4ca61f7952e43a6f2a67c Mon Sep 17 00:00:00 2001 From: ieQu1 <99872536+ieQu1@users.noreply.github.com> Date: Tue, 23 Apr 2024 16:24:55 +0200 Subject: [PATCH 230/234] docs(ds): Update README with CLI and REST API endpoints --- apps/emqx_durable_storage/README.md | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/apps/emqx_durable_storage/README.md b/apps/emqx_durable_storage/README.md index 1e87f3907..f67cc3e24 100644 --- a/apps/emqx_durable_storage/README.md +++ b/apps/emqx_durable_storage/README.md @@ -64,6 +64,10 @@ Messages are organized in the following hierarchy: The consumer of the messages can replay the stream using an _iterator_. +## Saving messages to the durable storage + +`emqx_ds` provides `store_batch/3` function that saves a list of MQTT messages to the durable storage. + ## Message replay All the API functions in EMQX DS are batch-oriented. @@ -120,9 +124,24 @@ The following application environment variables are available: - `emqx_durable_storage.egress_flush_interval`: period at which the batches of messages are committed to the durable storage. +Runtime settings for the durable storages can be modified via CLI as well as the REST API. +The following CLI commands are available: + +- `emqx ctl ds info` — get a quick overview of the durable storage state +- `emqx ctl ds set_replicas ...` — update the list of replicas for a durable storage. +- `emqx ctl ds join ` — add a replica of durable storage on the site +- `emqx ctl ds leave ` — remove a replica of a durable storage from the site + # HTTP APIs -None +The following REST APIs are available for managing the builtin durable storages: + +- `/ds/sites` — list known sites. +- `/ds/sites/:site` — get information about the site (its status, current EMQX node name managing the site, etc.) +- `/ds/storages` — list durable storages +- `/ds/storages/:ds` — get information about the durable storage and its shards +- `/ds/storages/:ds/replicas` — list or update sites that contain replicas of a durable storage +- `/ds/storages/:ds/replicas/:site` — add or remove replica of the durable storage on the site # Other TBD From d2e0094dfdc1d56512c25f9ee9e11b54d32f3f05 Mon Sep 17 00:00:00 2001 From: zmstone Date: Tue, 23 Apr 2024 21:01:58 +0200 Subject: [PATCH 231/234] docs: improve doc for pulsar auth config fields --- rel/i18n/emqx_bridge_pulsar.hocon | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/rel/i18n/emqx_bridge_pulsar.hocon b/rel/i18n/emqx_bridge_pulsar.hocon index 913ab8d2a..56db961c3 100644 --- a/rel/i18n/emqx_bridge_pulsar.hocon +++ b/rel/i18n/emqx_bridge_pulsar.hocon @@ -13,15 +13,15 @@ connector_resource_opts.label: auth_basic.desc: """Parameters for basic authentication.""" auth_basic.label: -"""Basic auth params""" +"""Basic auth parameters""" auth_basic_password.desc: -"""Basic authentication password.""" +"""Basic authentication password. The `password` part of the `username:password` authentication string.""" auth_basic_password.label: """Password""" auth_basic_username.desc: -"""Basic authentication username.""" +"""Basic authentication username. The `username` part of the `username:password` authentication string.""" auth_basic_username.label: """Username""" From 02a0faa1d6bff2f10883bcc7d59e6cf912e8d0ea Mon Sep 17 00:00:00 2001 From: Serge Tupchii Date: Wed, 24 Apr 2024 16:01:32 +0300 Subject: [PATCH 232/234] refactor: avoid evaluating Data more than once in SLOG_THROTTE macro --- apps/emqx/include/logger.hrl | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/apps/emqx/include/logger.hrl b/apps/emqx/include/logger.hrl index 7bd542b0d..31fe0e36a 100644 --- a/apps/emqx/include/logger.hrl +++ b/apps/emqx/include/logger.hrl @@ -44,11 +44,20 @@ ). -define(SLOG_THROTTLE(Level, Data, Meta), - case emqx_log_throttler:allow(maps:get(msg, Data)) of + case logger:allow(Level, ?MODULE) of true -> - ?SLOG(Level, Data, Meta); + (fun(#{msg := __Msg} = __Data) -> + case emqx_log_throttler:allow(__Msg) of + true -> + logger:log(Level, __Data, Meta); + false -> + ?_DO_TRACE(Level, __Msg, maps:merge(__Data, Meta)) + end + end)( + Data + ); false -> - ?_DO_TRACE(Level, maps:get(msg, Data), maps:merge(Data, Meta)) + ok end ). From d856d4e7450b0f7a075ab78be4c1b0ef013477b9 Mon Sep 17 00:00:00 2001 From: zmstone Date: Wed, 24 Apr 2024 21:25:47 +0200 Subject: [PATCH 233/234] ci: refine dashboard_test --- scripts/ui-tests/dashboard_test.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/scripts/ui-tests/dashboard_test.py b/scripts/ui-tests/dashboard_test.py index 5f7ac8e15..7003802ab 100644 --- a/scripts/ui-tests/dashboard_test.py +++ b/scripts/ui-tests/dashboard_test.py @@ -9,6 +9,7 @@ from selenium.webdriver.common.keys import Keys from selenium.webdriver.chrome.options import Options from selenium.webdriver.support.wait import WebDriverWait from selenium.webdriver.common import utils +from selenium.common.exceptions import NoSuchElementException @pytest.fixture def driver(): @@ -31,11 +32,13 @@ def dashboard_url(dashboard_host, dashboard_port): @pytest.fixture def login(driver, dashboard_url): + # admin is set in CI jobs, hence as default value + password = os.getenv("EMQX_DASHBOARD__DEFAULT_PASSWORD", "admin") driver.get(dashboard_url) assert "EMQX Dashboard" == driver.title assert f"{dashboard_url}/#/login?to=/dashboard/overview" == driver.current_url driver.find_element(By.XPATH, "//div[@class='login']//form[1]//input[@type='text']").send_keys("admin") - driver.find_element(By.XPATH, "//div[@class='login']//form[1]//input[@type='password']").send_keys("admin") + driver.find_element(By.XPATH, "//div[@class='login']//form[1]//input[@type='password']").send_keys(password) driver.find_element(By.XPATH, "//div[@class='login']//form[1]//button[1]").click() dest_url = urljoin(dashboard_url, "/#/dashboard/overview") driver.get(dest_url) @@ -91,8 +94,12 @@ def test_docs_link(driver, login, dashboard_url): else: emqx_version = f"v{emqx_community_version}" docs_base_url = "https://www.emqx.io/docs/en" - + emqx_version = ".".join(emqx_version.split(".")[:2]) docs_url = f"{docs_base_url}/{emqx_version}" xpath = f"//div[@id='app']//div[@class='nav-header']//a[@href[starts-with(.,'{docs_url}')]]" - assert driver.find_element(By.XPATH, xpath) + + try: + driver.find_element(By.XPATH, xpath) + except NoSuchElementException: + raise AssertionError(f"Cannot find the doc URL for {emqx_name} version {emqx_version}, please make sure the dashboard package is up to date.") From eaeaeb57d64718667711cd623f651665804b6d25 Mon Sep 17 00:00:00 2001 From: zmstone Date: Thu, 25 Apr 2024 09:05:54 +0200 Subject: [PATCH 234/234] chore: update dashboard versions --- Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index 2f01ad16e..b899017b4 100644 --- a/Makefile +++ b/Makefile @@ -20,8 +20,8 @@ endif # Dashboard version # from https://github.com/emqx/emqx-dashboard5 -export EMQX_DASHBOARD_VERSION ?= v1.8.1 -export EMQX_EE_DASHBOARD_VERSION ?= e1.6.1 +export EMQX_DASHBOARD_VERSION ?= v1.9.0-beta.1 +export EMQX_EE_DASHBOARD_VERSION ?= e1.7.0-beta.1 PROFILE ?= emqx REL_PROFILES := emqx emqx-enterprise