From 397c104a850c68dabad56f16ad78de498a016e83 Mon Sep 17 00:00:00 2001 From: firest Date: Fri, 26 Jul 2024 16:56:47 +0800 Subject: [PATCH 01/50] feat(exclusive): added CLI interface for exclusive topics --- apps/emqx_management/src/emqx_mgmt_cli.erl | 24 +++++++++++++++++-- .../test/emqx_mgmt_cli_SUITE.erl | 5 ++++ 2 files changed, 27 insertions(+), 2 deletions(-) diff --git a/apps/emqx_management/src/emqx_mgmt_cli.erl b/apps/emqx_management/src/emqx_mgmt_cli.erl index 2742167f5..035af188f 100644 --- a/apps/emqx_management/src/emqx_mgmt_cli.erl +++ b/apps/emqx_management/src/emqx_mgmt_cli.erl @@ -23,6 +23,7 @@ -include_lib("emqx/include/logger.hrl"). -define(DATA_BACKUP_OPTS, #{print_fun => fun emqx_ctl:print/2}). +-define(EXCLUSIVE_TAB, emqx_exclusive_subscription). -export([load/0]). @@ -45,7 +46,8 @@ olp/1, data/1, ds/1, - cluster_info/0 + cluster_info/0, + exclusive/1 ]). -spec load() -> ok. @@ -1024,7 +1026,9 @@ print({?SUBOPTION, {{Topic, Pid}, Options}}) when is_pid(Pid) -> NL = maps:get(nl, Options, 0), RH = maps:get(rh, Options, 0), RAP = maps:get(rap, Options, 0), - emqx_ctl:print("~ts -> topic:~ts qos:~p nl:~p rh:~p rap:~p~n", [SubId, Topic, QoS, NL, RH, RAP]). + emqx_ctl:print("~ts -> topic:~ts qos:~p nl:~p rh:~p rap:~p~n", [SubId, Topic, QoS, NL, RH, RAP]); +print({exclusive, {exclusive_subscription, Topic, ClientId}}) -> + emqx_ctl:print("topic:~ts -> ClientId:~ts~n", [Topic, ClientId]). format(_, undefined) -> undefined; @@ -1085,3 +1089,19 @@ safe_call_mria(Fun, Args, OnFail) -> }), OnFail end. +%%-------------------------------------------------------------------- +%% @doc Exclusive topics +exclusive(["list"]) -> + case ets:info(?EXCLUSIVE_TAB, size) of + 0 -> emqx_ctl:print("No topics.~n"); + _ -> dump(?EXCLUSIVE_TAB, exclusive) + end; +exclusive(["delete", Topic0]) -> + Topic = erlang:iolist_to_binary(Topic0), + emqx_exclusive_subscription:unsubscribe(Topic, #{is_exclusive => true}), + emqx_ctl:print("ok~n"); +exclusive(_) -> + emqx_ctl:usage([ + {"exclusive list", "List all exclusive topics"}, + {"exclusive delete ", "Delete an exclusive topic"} + ]). diff --git a/apps/emqx_management/test/emqx_mgmt_cli_SUITE.erl b/apps/emqx_management/test/emqx_mgmt_cli_SUITE.erl index cb3451fc1..f3757d7ec 100644 --- a/apps/emqx_management/test/emqx_mgmt_cli_SUITE.erl +++ b/apps/emqx_management/test/emqx_mgmt_cli_SUITE.erl @@ -354,4 +354,9 @@ t_autocluster_leave(Config) -> ) ). +t_exclusive(_Config) -> + emqx_ctl:run_command(["exclusive", "list"]), + emqx_ctl:run_command(["exclusive", "delete", "t/1"]), + ok. + format(Str, Opts) -> io:format("str:~s: Opts:~p", [Str, Opts]). From dc342a35ac7becf1f020f913d6ce7e8c812ec180 Mon Sep 17 00:00:00 2001 From: firest Date: Fri, 26 Jul 2024 17:18:52 +0800 Subject: [PATCH 02/50] chore: update changes --- changes/ce/feat-13524.en.md | 1 + 1 file changed, 1 insertion(+) create mode 100644 changes/ce/feat-13524.en.md diff --git a/changes/ce/feat-13524.en.md b/changes/ce/feat-13524.en.md new file mode 100644 index 000000000..efe21104f --- /dev/null +++ b/changes/ce/feat-13524.en.md @@ -0,0 +1 @@ +Added CLI interface `emqx ctl exclusive` for the feature exclusive topics. From d7cac74bedd2ae8fa1cbb039d48699e0be998ac2 Mon Sep 17 00:00:00 2001 From: JimMoen Date: Mon, 29 Jul 2024 17:24:01 +0800 Subject: [PATCH 03/50] feat: add authz skipped trace --- apps/emqx_auth/src/emqx_authz/emqx_authz.erl | 7 ++++++- changes/ce/feat-13534.en.md | 1 + 2 files changed, 7 insertions(+), 1 deletion(-) create mode 100644 changes/ce/feat-13534.en.md diff --git a/apps/emqx_auth/src/emqx_authz/emqx_authz.erl b/apps/emqx_auth/src/emqx_authz/emqx_authz.erl index 8bc60a600..7ca9748a2 100644 --- a/apps/emqx_auth/src/emqx_authz/emqx_authz.erl +++ b/apps/emqx_auth/src/emqx_authz/emqx_authz.erl @@ -477,9 +477,14 @@ authorize_deny( sources() ) -> authz_result(). -authorize(Client, PubSub, Topic, _DefaultResult, Sources) -> +authorize(#{username := Username} = Client, PubSub, Topic, _DefaultResult, Sources) -> case maps:get(is_superuser, Client, false) of true -> + ?TRACE("AUTHZ", "authorization_skipped_as_superuser", #{ + username => Username, + topic => Topic, + action => emqx_access_control:format_action(PubSub) + }), emqx_metrics:inc(?METRIC_SUPERUSER), {stop, #{result => allow, from => superuser}}; false -> diff --git a/changes/ce/feat-13534.en.md b/changes/ce/feat-13534.en.md new file mode 100644 index 000000000..5c5af0bf5 --- /dev/null +++ b/changes/ce/feat-13534.en.md @@ -0,0 +1 @@ +Add trace logging when superuser skipped authz check. From 5ddd7d7a6a21a546d6e95aff660e4b6e5e5c1acd Mon Sep 17 00:00:00 2001 From: JimMoen Date: Mon, 29 Jul 2024 17:24:16 +0800 Subject: [PATCH 04/50] test: superuser skipped all authz check --- apps/emqx_auth/src/emqx_authz/emqx_authz.erl | 1 + .../test/emqx_authz/emqx_authz_SUITE.erl | 72 +++++++++++++++++++ 2 files changed, 73 insertions(+) diff --git a/apps/emqx_auth/src/emqx_authz/emqx_authz.erl b/apps/emqx_auth/src/emqx_authz/emqx_authz.erl index 7ca9748a2..e76d52535 100644 --- a/apps/emqx_auth/src/emqx_authz/emqx_authz.erl +++ b/apps/emqx_auth/src/emqx_authz/emqx_authz.erl @@ -480,6 +480,7 @@ authorize_deny( authorize(#{username := Username} = Client, PubSub, Topic, _DefaultResult, Sources) -> case maps:get(is_superuser, Client, false) of true -> + ?tp(authz_skipped, #{reason => client_is_superuser, action => PubSub}), ?TRACE("AUTHZ", "authorization_skipped_as_superuser", #{ username => Username, topic => Topic, diff --git a/apps/emqx_auth/test/emqx_authz/emqx_authz_SUITE.erl b/apps/emqx_auth/test/emqx_authz/emqx_authz_SUITE.erl index 575eb4109..4745d7ec6 100644 --- a/apps/emqx_auth/test/emqx_authz/emqx_authz_SUITE.erl +++ b/apps/emqx_auth/test/emqx_authz/emqx_authz_SUITE.erl @@ -674,5 +674,77 @@ t_publish_last_will_testament_banned_client_connecting(_Config) -> ok. +t_sikpped_as_superuser(_Config) -> + ClientInfo = #{ + clientid => <<"clientid">>, + username => <<"username">>, + peerhost => {127, 0, 0, 1}, + zone => default, + listener => {tcp, default}, + is_superuser => true + }, + ?check_trace( + begin + ?assertEqual( + allow, + emqx_access_control:authorize(ClientInfo, ?AUTHZ_PUBLISH(?QOS_0), <<"p/t/0">>) + ), + ?assertEqual( + allow, + emqx_access_control:authorize(ClientInfo, ?AUTHZ_PUBLISH(?QOS_1), <<"p/t/1">>) + ), + ?assertEqual( + allow, + emqx_access_control:authorize(ClientInfo, ?AUTHZ_PUBLISH(?QOS_2), <<"p/t/2">>) + ), + ?assertEqual( + allow, + emqx_access_control:authorize(ClientInfo, ?AUTHZ_SUBSCRIBE(?QOS_0), <<"s/t/0">>) + ), + ?assertEqual( + allow, + emqx_access_control:authorize(ClientInfo, ?AUTHZ_SUBSCRIBE(?QOS_1), <<"s/t/1">>) + ), + ?assertEqual( + allow, + emqx_access_control:authorize(ClientInfo, ?AUTHZ_SUBSCRIBE(?QOS_2), <<"s/t/2">>) + ) + end, + fun(Trace) -> + ?assertMatch( + [ + #{ + reason := client_is_superuser, + action := #{qos := ?QOS_0, action_type := publish} + }, + #{ + reason := client_is_superuser, + action := #{qos := ?QOS_1, action_type := publish} + }, + #{ + reason := client_is_superuser, + action := #{qos := ?QOS_2, action_type := publish} + }, + #{ + reason := client_is_superuser, + action := #{qos := ?QOS_0, action_type := subscribe} + }, + #{ + reason := client_is_superuser, + action := #{qos := ?QOS_1, action_type := subscribe} + }, + #{ + reason := client_is_superuser, + action := #{qos := ?QOS_2, action_type := subscribe} + } + ], + ?of_kind(authz_skipped, Trace) + ), + ok + end + ), + + ok = snabbkaffe:stop(). + stop_apps(Apps) -> lists:foreach(fun application:stop/1, Apps). From fd961f9da7a973422680e0659dfda96e8d0ee392 Mon Sep 17 00:00:00 2001 From: Thales Macedo Garitezi Date: Tue, 30 Jul 2024 12:06:24 -0300 Subject: [PATCH 05/50] fix(schema registry): clear protobuf code cache when deleting/updating serde Fixes https://emqx.atlassian.net/browse/EMQX-12789 --- .../include/emqx_schema_registry.hrl | 2 +- .../src/emqx_schema_registry.erl | 16 +++-- .../src/emqx_schema_registry_serde.erl | 49 +++++++++++---- .../test/emqx_schema_registry_serde_SUITE.erl | 60 +++++++++++++++++++ changes/ee/fix-13543.en.md | 1 + 5 files changed, 112 insertions(+), 16 deletions(-) create mode 100644 changes/ee/fix-13543.en.md diff --git a/apps/emqx_schema_registry/include/emqx_schema_registry.hrl b/apps/emqx_schema_registry/include/emqx_schema_registry.hrl index b25042c20..289b167fd 100644 --- a/apps/emqx_schema_registry/include/emqx_schema_registry.hrl +++ b/apps/emqx_schema_registry/include/emqx_schema_registry.hrl @@ -34,7 +34,7 @@ type :: serde_type(), eval_context :: term(), %% for future use - extra = [] + extra = #{} }). -type serde() :: #serde{}. diff --git a/apps/emqx_schema_registry/src/emqx_schema_registry.erl b/apps/emqx_schema_registry/src/emqx_schema_registry.erl index f8d760ddc..3919cd7be 100644 --- a/apps/emqx_schema_registry/src/emqx_schema_registry.erl +++ b/apps/emqx_schema_registry/src/emqx_schema_registry.erl @@ -148,14 +148,19 @@ post_config_update( post_config_update( [?CONF_KEY_ROOT, schemas, NewName], _Cmd, - NewSchemas, - %% undefined or OldSchemas - _, + NewSchema, + OldSchema, _AppEnvs ) -> - case build_serdes([{NewName, NewSchemas}]) of + case OldSchema of + undefined -> + ok; + _ -> + ensure_serde_absent(NewName) + end, + case build_serdes([{NewName, NewSchema}]) of ok -> - {ok, #{NewName => NewSchemas}}; + {ok, #{NewName => NewSchema}}; {error, Reason, SerdesToRollback} -> lists:foreach(fun ensure_serde_absent/1, SerdesToRollback), {error, Reason} @@ -176,6 +181,7 @@ post_config_update(?CONF_KEY_PATH, _Cmd, NewConf = #{schemas := NewSchemas}, Old async_delete_serdes(RemovedNames) end, SchemasToBuild = maps:to_list(maps:merge(Changed, Added)), + ok = lists:foreach(fun ensure_serde_absent/1, [N || {N, _} <- SchemasToBuild]), case build_serdes(SchemasToBuild) of ok -> {ok, NewConf}; diff --git a/apps/emqx_schema_registry/src/emqx_schema_registry_serde.erl b/apps/emqx_schema_registry/src/emqx_schema_registry_serde.erl index 0e661c356..b5944a68e 100644 --- a/apps/emqx_schema_registry/src/emqx_schema_registry_serde.erl +++ b/apps/emqx_schema_registry/src/emqx_schema_registry_serde.erl @@ -48,6 +48,10 @@ -type eval_context() :: term(). +-type fingerprint() :: binary(). + +-type protobuf_cache_key() :: {schema_name(), fingerprint()}. + -export_type([serde_type/0]). %%------------------------------------------------------------------------------ @@ -175,11 +179,12 @@ make_serde(avro, Name, Source) -> eval_context = Store }; make_serde(protobuf, Name, Source) -> - SerdeMod = make_protobuf_serde_mod(Name, Source), + {CacheKey, SerdeMod} = make_protobuf_serde_mod(Name, Source), #serde{ name = Name, type = protobuf, - eval_context = SerdeMod + eval_context = SerdeMod, + extra = #{cache_key => CacheKey} }; make_serde(json, Name, Source) -> case json_decode(Source) of @@ -254,8 +259,9 @@ eval_encode(#serde{type = json, name = Name}, [Map]) -> destroy(#serde{type = avro, name = _Name}) -> ?tp(serde_destroyed, #{type => avro, name => _Name}), ok; -destroy(#serde{type = protobuf, name = _Name, eval_context = SerdeMod}) -> +destroy(#serde{type = protobuf, name = _Name, eval_context = SerdeMod} = Serde) -> unload_code(SerdeMod), + destroy_protobuf_code(Serde), ?tp(serde_destroyed, #{type => protobuf, name => _Name}), ok; destroy(#serde{type = json, name = Name}) -> @@ -282,13 +288,14 @@ jesse_validate(Name, Map) -> jesse_name(Str) -> unicode:characters_to_list(Str). --spec make_protobuf_serde_mod(schema_name(), schema_source()) -> module(). +-spec make_protobuf_serde_mod(schema_name(), schema_source()) -> {protobuf_cache_key(), module()}. make_protobuf_serde_mod(Name, Source) -> {SerdeMod0, SerdeModFileName} = protobuf_serde_mod_name(Name), case lazy_generate_protobuf_code(Name, SerdeMod0, Source) of {ok, SerdeMod, ModBinary} -> load_code(SerdeMod, SerdeModFileName, ModBinary), - SerdeMod; + CacheKey = protobuf_cache_key(Name, Source), + {CacheKey, SerdeMod}; {error, #{error := Error, warnings := Warnings}} -> ?SLOG( warning, @@ -310,6 +317,13 @@ protobuf_serde_mod_name(Name) -> SerdeModFileName = SerdeModName ++ ".memory", {SerdeMod, SerdeModFileName}. +%% Fixme: we cannot uncomment the following typespec because Dialyzer complains that +%% `Source' should be `string()' due to `gpb_compile:string/3', but it does work fine with +%% binaries... +%% -spec protobuf_cache_key(schema_name(), schema_source()) -> {schema_name(), fingerprint()}. +protobuf_cache_key(Name, Source) -> + {Name, erlang:md5(Source)}. + -spec lazy_generate_protobuf_code(schema_name(), module(), schema_source()) -> {ok, module(), binary()} | {error, #{error := term(), warnings := [term()]}}. lazy_generate_protobuf_code(Name, SerdeMod0, Source) -> @@ -326,9 +340,9 @@ lazy_generate_protobuf_code(Name, SerdeMod0, Source) -> -spec lazy_generate_protobuf_code_trans(schema_name(), module(), schema_source()) -> {ok, module(), binary()} | {error, #{error := term(), warnings := [term()]}}. lazy_generate_protobuf_code_trans(Name, SerdeMod0, Source) -> - Fingerprint = erlang:md5(Source), - _ = mnesia:lock({record, ?PROTOBUF_CACHE_TAB, Fingerprint}, write), - case mnesia:read(?PROTOBUF_CACHE_TAB, Fingerprint) of + CacheKey = protobuf_cache_key(Name, Source), + _ = mnesia:lock({record, ?PROTOBUF_CACHE_TAB, CacheKey}, write), + case mnesia:read(?PROTOBUF_CACHE_TAB, CacheKey) of [#protobuf_cache{module = SerdeMod, module_binary = ModBinary}] -> ?tp(schema_registry_protobuf_cache_hit, #{name => Name}), {ok, SerdeMod, ModBinary}; @@ -337,7 +351,7 @@ lazy_generate_protobuf_code_trans(Name, SerdeMod0, Source) -> case generate_protobuf_code(SerdeMod0, Source) of {ok, SerdeMod, ModBinary} -> CacheEntry = #protobuf_cache{ - fingerprint = Fingerprint, + fingerprint = CacheKey, module = SerdeMod, module_binary = ModBinary }, @@ -345,7 +359,7 @@ lazy_generate_protobuf_code_trans(Name, SerdeMod0, Source) -> {ok, SerdeMod, ModBinary}; {ok, SerdeMod, ModBinary, _Warnings} -> CacheEntry = #protobuf_cache{ - fingerprint = Fingerprint, + fingerprint = CacheKey, module = SerdeMod, module_binary = ModBinary }, @@ -390,6 +404,21 @@ unload_code(SerdeMod) -> _ = code:delete(SerdeMod), ok. +-spec destroy_protobuf_code(serde()) -> ok. +destroy_protobuf_code(Serde) -> + #serde{extra = #{cache_key := CacheKey}} = Serde, + {atomic, Res} = mria:transaction( + ?SCHEMA_REGISTRY_SHARD, + fun destroy_protobuf_code_trans/1, + [CacheKey] + ), + ?tp("schema_registry_protobuf_cache_destroyed", #{name => Serde#serde.name}), + Res. + +-spec destroy_protobuf_code_trans({schema_name(), fingerprint()}) -> ok. +destroy_protobuf_code_trans(CacheKey) -> + mnesia:delete(?PROTOBUF_CACHE_TAB, CacheKey, write). + -spec has_inner_type(serde_type(), eval_context(), [binary()]) -> boolean(). has_inner_type(protobuf, _SerdeMod, [_, _ | _]) -> diff --git a/apps/emqx_schema_registry/test/emqx_schema_registry_serde_SUITE.erl b/apps/emqx_schema_registry/test/emqx_schema_registry_serde_SUITE.erl index bdc083736..685e152e6 100644 --- a/apps/emqx_schema_registry/test/emqx_schema_registry_serde_SUITE.erl +++ b/apps/emqx_schema_registry/test/emqx_schema_registry_serde_SUITE.erl @@ -207,6 +207,66 @@ t_protobuf_invalid_schema(_Config) -> ), ok. +%% Checks that we unload code and clear code generation cache after destroying a protobuf +%% serde. +t_destroy_protobuf(_Config) -> + SerdeName = ?FUNCTION_NAME, + SerdeNameBin = atom_to_binary(SerdeName), + ?check_trace( + #{timetrap => 5_000}, + begin + Params = schema_params(protobuf), + ok = emqx_schema_registry:add_schema(SerdeName, Params), + {ok, {ok, _}} = + ?wait_async_action( + emqx_schema_registry:delete_schema(SerdeName), + #{?snk_kind := serde_destroyed, name := SerdeNameBin} + ), + %% Create again to check we don't hit the cache. + ok = emqx_schema_registry:add_schema(SerdeName, Params), + {ok, {ok, _}} = + ?wait_async_action( + emqx_schema_registry:delete_schema(SerdeName), + #{?snk_kind := serde_destroyed, name := SerdeNameBin} + ), + ok + end, + fun(Trace) -> + ?assertMatch([], ?of_kind(schema_registry_protobuf_cache_hit, Trace)), + ?assertMatch([_ | _], ?of_kind("schema_registry_protobuf_cache_destroyed", Trace)), + ok + end + ), + ok. + +%% Checks that we don't leave entries lingering in the protobuf code cache table when +%% updating the source of a serde. +t_update_protobuf_cache(_Config) -> + SerdeName = ?FUNCTION_NAME, + ?check_trace( + #{timetrap => 5_000}, + begin + #{source := Source0} = Params0 = schema_params(protobuf), + ok = emqx_schema_registry:add_schema(SerdeName, Params0), + %% Now we touch the source so protobuf needs to be recompiled. + Source1 = <>, + Params1 = Params0#{source := Source1}, + {ok, {ok, _}} = + ?wait_async_action( + emqx_schema_registry:add_schema(SerdeName, Params1), + #{?snk_kind := "schema_registry_protobuf_cache_destroyed"} + ), + ok + end, + fun(Trace) -> + ?assertMatch([], ?of_kind(schema_registry_protobuf_cache_hit, Trace)), + ?assertMatch([_, _ | _], ?of_kind(schema_registry_protobuf_cache_miss, Trace)), + ?assertMatch([_ | _], ?of_kind("schema_registry_protobuf_cache_destroyed", Trace)), + ok + end + ), + ok. + t_json_invalid_schema(_Config) -> SerdeName = invalid_json, Params = schema_params(json), diff --git a/changes/ee/fix-13543.en.md b/changes/ee/fix-13543.en.md new file mode 100644 index 000000000..f9f56a5a6 --- /dev/null +++ b/changes/ee/fix-13543.en.md @@ -0,0 +1 @@ +Fixed an issue where the internal cache for Protobuf schemas in Schema Registry was not properly cleaned up after deleting or updating a schema. From ebb69f4ebf74b0d3f3cb074f0dc39d1ce088c10b Mon Sep 17 00:00:00 2001 From: Thales Macedo Garitezi Date: Tue, 30 Jul 2024 10:56:58 -0300 Subject: [PATCH 06/50] fix(crl): force remove crl fields from SSL opts after listener update Fixes https://emqx.atlassian.net/browse/EMQX-12785 --- apps/emqx/src/emqx_listeners.erl | 21 +++-- apps/emqx/src/emqx_tls_lib.erl | 8 ++ apps/emqx/test/emqx_crl_cache_SUITE.erl | 107 +++++++++++++++++++++++- changes/ce/fix-13541.en.md | 1 + 4 files changed, 129 insertions(+), 8 deletions(-) create mode 100644 changes/ce/fix-13541.en.md diff --git a/apps/emqx/src/emqx_listeners.erl b/apps/emqx/src/emqx_listeners.erl index e325263c5..32599b3be 100644 --- a/apps/emqx/src/emqx_listeners.erl +++ b/apps/emqx/src/emqx_listeners.erl @@ -421,7 +421,7 @@ do_start_listener(Type, Name, Id, #{bind := ListenOn} = Opts) when ?ESOCKD_LISTE esockd:open( Id, ListenOn, - merge_default(esockd_opts(Id, Type, Name, Opts)) + merge_default(esockd_opts(Id, Type, Name, Opts, _OldOpts = undefined)) ); %% Start MQTT/WS listener do_start_listener(Type, Name, Id, Opts) when ?COWBOY_LISTENER(Type) -> @@ -465,7 +465,7 @@ do_update_listener(Type, Name, OldConf, NewConf = #{bind := ListenOn}) when Id = listener_id(Type, Name), case maps:get(bind, OldConf) of ListenOn -> - esockd:set_options({Id, ListenOn}, esockd_opts(Id, Type, Name, NewConf)); + esockd:set_options({Id, ListenOn}, esockd_opts(Id, Type, Name, NewConf, OldConf)); _Different -> %% TODO %% Again, we're not strictly required to drop live connections in this case. @@ -577,7 +577,7 @@ perform_listener_change(update, {{Type, Name, ConfOld}, {_, _, ConfNew}}) -> perform_listener_change(stop, {Type, Name, Conf}) -> stop_listener(Type, Name, Conf). -esockd_opts(ListenerId, Type, Name, Opts0) -> +esockd_opts(ListenerId, Type, Name, Opts0, OldOpts) -> Opts1 = maps:with([acceptors, max_connections, proxy_protocol, proxy_protocol_timeout], Opts0), Limiter = limiter(Opts0), Opts2 = @@ -609,7 +609,7 @@ esockd_opts(ListenerId, Type, Name, Opts0) -> tcp -> Opts3#{tcp_options => tcp_opts(Opts0)}; ssl -> - OptsWithCRL = inject_crl_config(Opts0), + OptsWithCRL = inject_crl_config(Opts0, OldOpts), OptsWithSNI = inject_sni_fun(ListenerId, OptsWithCRL), OptsWithRootFun = inject_root_fun(OptsWithSNI), OptsWithVerifyFun = inject_verify_fun(OptsWithRootFun), @@ -985,7 +985,7 @@ inject_sni_fun(_ListenerId, Conf) -> Conf. inject_crl_config( - Conf = #{ssl_options := #{enable_crl_check := true} = SSLOpts} + Conf = #{ssl_options := #{enable_crl_check := true} = SSLOpts}, _OldOpts ) -> HTTPTimeout = emqx_config:get([crl_cache, http_timeout], timer:seconds(15)), Conf#{ @@ -995,7 +995,16 @@ inject_crl_config( crl_cache => {emqx_ssl_crl_cache, {internal, [{http, HTTPTimeout}]}} } }; -inject_crl_config(Conf) -> +inject_crl_config(#{ssl_options := SSLOpts0} = Conf0, #{} = OldOpts) -> + %% Note: we must set crl options to `undefined' to unset them. Otherwise, + %% `esockd' will retain such options when `esockd:merge_opts/2' is called and the SSL + %% options were previously enabled. + WasEnabled = emqx_utils_maps:deep_get([ssl_options, enable_crl_check], OldOpts, false), + Undefine = fun(Acc, K) -> emqx_utils_maps:put_if(Acc, K, undefined, WasEnabled) end, + SSLOpts1 = Undefine(SSLOpts0, crl_check), + SSLOpts = Undefine(SSLOpts1, crl_cache), + Conf0#{ssl_options := SSLOpts}; +inject_crl_config(Conf, undefined = _OldOpts) -> Conf. maybe_unregister_ocsp_stapling_refresh( diff --git a/apps/emqx/src/emqx_tls_lib.erl b/apps/emqx/src/emqx_tls_lib.erl index e1de50385..1b9e89199 100644 --- a/apps/emqx/src/emqx_tls_lib.erl +++ b/apps/emqx/src/emqx_tls_lib.erl @@ -589,6 +589,14 @@ ensure_valid_options(Options, Versions) -> ensure_valid_options([], _, Acc) -> lists:reverse(Acc); +ensure_valid_options([{K, undefined} | T], Versions, Acc) when + K =:= crl_check; + K =:= crl_cache +-> + %% Note: we must set crl options to `undefined' to unset them. Otherwise, + %% `esockd' will retain such options when `esockd:merge_opts/2' is called and the SSL + %% options were previously enabled. + ensure_valid_options(T, Versions, [{K, undefined} | Acc]); ensure_valid_options([{_, undefined} | T], Versions, Acc) -> ensure_valid_options(T, Versions, Acc); ensure_valid_options([{_, ""} | T], Versions, Acc) -> diff --git a/apps/emqx/test/emqx_crl_cache_SUITE.erl b/apps/emqx/test/emqx_crl_cache_SUITE.erl index 1d7d0f1d5..51196439e 100644 --- a/apps/emqx/test/emqx_crl_cache_SUITE.erl +++ b/apps/emqx/test/emqx_crl_cache_SUITE.erl @@ -138,13 +138,14 @@ init_per_testcase(t_refresh_config = TestCase, Config) -> ]; init_per_testcase(TestCase, Config) when TestCase =:= t_update_listener; + TestCase =:= t_update_listener_enable_disable; TestCase =:= t_validations -> ct:timetrap({seconds, 30}), ok = snabbkaffe:start_trace(), %% when running emqx standalone tests, we can't use those %% features. - case does_module_exist(emqx_management) of + case does_module_exist(emqx_mgmt) of true -> DataDir = ?config(data_dir, Config), CRLFile = filename:join([DataDir, "intermediate-revoked.crl.pem"]), @@ -165,7 +166,7 @@ init_per_testcase(TestCase, Config) when {emqx_conf, #{config => #{listeners => #{ssl => #{default => ListenerConf}}}}}, emqx, emqx_management, - {emqx_dashboard, "dashboard.listeners.http { enable = true, bind = 18083 }"} + emqx_mgmt_api_test_util:emqx_dashboard() ], #{work_dir => emqx_cth_suite:work_dir(TestCase, Config)} ), @@ -206,6 +207,7 @@ read_crl(Filename) -> end_per_testcase(TestCase, Config) when TestCase =:= t_update_listener; + TestCase =:= t_update_listener_enable_disable; TestCase =:= t_validations -> Skip = proplists:get_bool(skip_does_not_apply, Config), @@ -1057,3 +1059,104 @@ do_t_validations(_Config) -> ), ok. + +%% Checks that if CRL is ever enabled and then disabled, clients can connect, even if they +%% would otherwise not have their corresponding CRLs cached and fail with `{bad_crls, +%% no_relevant_crls}`. +t_update_listener_enable_disable(Config) -> + case proplists:get_bool(skip_does_not_apply, Config) of + true -> + ct:pal("skipping as this test does not apply in this profile"), + ok; + false -> + do_t_update_listener_enable_disable(Config) + end. + +do_t_update_listener_enable_disable(Config) -> + DataDir = ?config(data_dir, Config), + Keyfile = filename:join([DataDir, "server.key.pem"]), + Certfile = filename:join([DataDir, "server.cert.pem"]), + Cacertfile = filename:join([DataDir, "ca-chain.cert.pem"]), + ClientCert = filename:join(DataDir, "client.cert.pem"), + ClientKey = filename:join(DataDir, "client.key.pem"), + + ListenerId = "ssl:default", + %% Enable CRL + {ok, {{_, 200, _}, _, ListenerData0}} = get_listener_via_api(ListenerId), + CRLConfig0 = + #{ + <<"ssl_options">> => + #{ + <<"keyfile">> => Keyfile, + <<"certfile">> => Certfile, + <<"cacertfile">> => Cacertfile, + <<"enable_crl_check">> => true, + <<"fail_if_no_peer_cert">> => true + } + }, + ListenerData1 = emqx_utils_maps:deep_merge(ListenerData0, CRLConfig0), + {ok, {_, _, ListenerData2}} = update_listener_via_api(ListenerId, ListenerData1), + ?assertMatch( + #{ + <<"ssl_options">> := + #{ + <<"enable_crl_check">> := true, + <<"verify">> := <<"verify_peer">>, + <<"fail_if_no_peer_cert">> := true + } + }, + ListenerData2 + ), + + %% Disable CRL + CRLConfig1 = + #{ + <<"ssl_options">> => + #{ + <<"keyfile">> => Keyfile, + <<"certfile">> => Certfile, + <<"cacertfile">> => Cacertfile, + <<"enable_crl_check">> => false, + <<"fail_if_no_peer_cert">> => true + } + }, + ListenerData3 = emqx_utils_maps:deep_merge(ListenerData2, CRLConfig1), + redbug:start( + [ + "esockd_server:get_listener_prop -> return", + "esockd_server:set_listener_prop -> return", + "esockd:merge_opts -> return", + "esockd_listener_sup:set_options -> return", + "emqx_listeners:inject_crl_config -> return" + ], + [{msgs, 100}] + ), + {ok, {_, _, ListenerData4}} = update_listener_via_api(ListenerId, ListenerData3), + ?assertMatch( + #{ + <<"ssl_options">> := + #{ + <<"enable_crl_check">> := false, + <<"verify">> := <<"verify_peer">>, + <<"fail_if_no_peer_cert">> := true + } + }, + ListenerData4 + ), + + %% Now the client that would be blocked tries to connect and should now be allowed. + {ok, C} = emqtt:start_link([ + {ssl, true}, + {ssl_opts, [ + {certfile, ClientCert}, + {keyfile, ClientKey}, + {verify, verify_none} + ]}, + {port, 8883} + ]), + ?assertMatch({ok, _}, emqtt:connect(C)), + emqtt:stop(C), + + ?assertNotReceive({http_get, _}), + + ok. diff --git a/changes/ce/fix-13541.en.md b/changes/ce/fix-13541.en.md new file mode 100644 index 000000000..48013cc19 --- /dev/null +++ b/changes/ce/fix-13541.en.md @@ -0,0 +1 @@ +Previously, if CRL checks were ever enabled for a listener, later disabling them via the configuration would not actually disable them until the listener restarted. This has been fixed. From 8d8ff6cf5d84edfaa14e3136ee79f5453144adaa Mon Sep 17 00:00:00 2001 From: Ivan Dyachkov Date: Wed, 31 Jul 2024 10:27:04 +0200 Subject: [PATCH 07/50] ci: fix docker images build /etc/docker/daemon.json requires root for read access --- .github/workflows/build_and_push_docker_images.yaml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/build_and_push_docker_images.yaml b/.github/workflows/build_and_push_docker_images.yaml index 7224444fc..0cec2bbde 100644 --- a/.github/workflows/build_and_push_docker_images.yaml +++ b/.github/workflows/build_and_push_docker_images.yaml @@ -122,9 +122,10 @@ jobs: run: | ls -lR _packages/$PROFILE mv _packages/$PROFILE/*.tar.gz ./ + - name: Enable containerd image store on Docker Engine run: | - echo "$(jq '. += {"features": {"containerd-snapshotter": true}}' /etc/docker/daemon.json)" > daemon.json + echo "$(sudo cat /etc/docker/daemon.json | jq '. += {"features": {"containerd-snapshotter": true}}')" > daemon.json sudo mv daemon.json /etc/docker/daemon.json sudo systemctl restart docker From a2465519148f165b12b35e0bdc03ac1236da5fdc Mon Sep 17 00:00:00 2001 From: JimMoen Date: Wed, 31 Jul 2024 16:15:51 +0800 Subject: [PATCH 08/50] fix: add a startup timeout limit for the plugin application --- apps/emqx_plugins/src/emqx_plugins.erl | 34 ++++++++++++++++++++------ changes/ce/fix-13552.en.md | 8 ++++++ 2 files changed, 35 insertions(+), 7 deletions(-) create mode 100644 changes/ce/fix-13552.en.md diff --git a/apps/emqx_plugins/src/emqx_plugins.erl b/apps/emqx_plugins/src/emqx_plugins.erl index 253d1b4ef..b13a28e2d 100644 --- a/apps/emqx_plugins/src/emqx_plugins.erl +++ b/apps/emqx_plugins/src/emqx_plugins.erl @@ -992,19 +992,22 @@ do_load_plugin_app(AppName, Ebin) -> end. start_app(App) -> - case application:ensure_all_started(App) of - {ok, Started} -> + case run_with_timeout(application, ensure_all_started, [App], 10_000) of + {ok, {ok, Started}} -> case Started =/= [] of true -> ?SLOG(debug, #{msg => "started_plugin_apps", apps => Started}); false -> ok - end, - ?SLOG(debug, #{msg => "started_plugin_app", app => App}), - ok; - {error, {ErrApp, Reason}} -> + end; + {ok, {error, Reason}} -> + throw(#{ + msg => "failed_to_start_app", + app => App, + reason => Reason + }); + {error, Reason} -> throw(#{ msg => "failed_to_start_plugin_app", app => App, - err_app => ErrApp, reason => Reason }) end. @@ -1525,3 +1528,20 @@ bin(B) when is_binary(B) -> B. wrap_to_list(Path) -> binary_to_list(iolist_to_binary(Path)). + +run_with_timeout(Module, Function, Args, Timeout) -> + Self = self(), + Fun = fun() -> + Result = apply(Module, Function, Args), + Self ! {self(), Result} + end, + Pid = spawn(Fun), + TimerRef = erlang:send_after(Timeout, self(), {timeout, Pid}), + receive + {Pid, Result} -> + erlang:cancel_timer(TimerRef), + {ok, Result}; + {timeout, Pid} -> + exit(Pid, kill), + {error, timeout} + end. diff --git a/changes/ce/fix-13552.en.md b/changes/ce/fix-13552.en.md new file mode 100644 index 000000000..5af1b4140 --- /dev/null +++ b/changes/ce/fix-13552.en.md @@ -0,0 +1,8 @@ +Add a startup timeout limit for the plug-in application. Currently the timeout is 10 seconds. + +Starting a bad plugin while EMQX is running will result in a thrown runtime error. +When EMQX is closed and restarted, the main starting process may hang due to the the plugin application to start failures. + +Maybe restarting with modified: +- Modifed config file: make the bad plugin enabled. +- Add a plugin with bad plugin config. From c658cfe26997a8026606b2ef85221f9255d2dd6d Mon Sep 17 00:00:00 2001 From: JimMoen Date: Wed, 31 Jul 2024 17:17:13 +0800 Subject: [PATCH 09/50] fix: make static_check happy --- apps/emqx_plugins/src/emqx_plugins.erl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/apps/emqx_plugins/src/emqx_plugins.erl b/apps/emqx_plugins/src/emqx_plugins.erl index b13a28e2d..005df026b 100644 --- a/apps/emqx_plugins/src/emqx_plugins.erl +++ b/apps/emqx_plugins/src/emqx_plugins.erl @@ -1539,7 +1539,7 @@ run_with_timeout(Module, Function, Args, Timeout) -> TimerRef = erlang:send_after(Timeout, self(), {timeout, Pid}), receive {Pid, Result} -> - erlang:cancel_timer(TimerRef), + _ = erlang:cancel_timer(TimerRef), {ok, Result}; {timeout, Pid} -> exit(Pid, kill), From 9f97bff7d0f83b58d7b6ac71c1458d48e0a480e4 Mon Sep 17 00:00:00 2001 From: Thales Macedo Garitezi Date: Tue, 30 Jul 2024 18:21:53 -0300 Subject: [PATCH 10/50] feat: expose `resource_opts.query_mode` for pulsar action Fixes https://emqx.atlassian.net/browse/EMQX-12782 --- .../test/emqx_bridge_v2_testlib.erl | 5 +- .../src/emqx_bridge_pulsar.app.src | 2 +- .../src/emqx_bridge_pulsar_action_info.erl | 28 ++- .../src/emqx_bridge_pulsar_connector.erl | 26 ++- .../src/emqx_bridge_pulsar_pubsub_schema.erl | 4 +- .../emqx_bridge_pulsar_connector_SUITE.erl | 9 +- .../test/emqx_bridge_pulsar_v2_SUITE.erl | 189 +++++++++++------- .../src/emqx_resource_buffer_worker.erl | 12 +- changes/ee/feat-13546.en.md | 1 + 9 files changed, 182 insertions(+), 94 deletions(-) create mode 100644 changes/ee/feat-13546.en.md diff --git a/apps/emqx_bridge/test/emqx_bridge_v2_testlib.erl b/apps/emqx_bridge/test/emqx_bridge_v2_testlib.erl index d98f4f926..46d1883bd 100644 --- a/apps/emqx_bridge/test/emqx_bridge_v2_testlib.erl +++ b/apps/emqx_bridge/test/emqx_bridge_v2_testlib.erl @@ -889,7 +889,8 @@ t_sync_query_down(Config, Opts) -> ), ?force_ordering( - #{?snk_kind := call_query}, + #{?snk_kind := SNKKind} when + SNKKind =:= call_query orelse SNKKind =:= simple_query_enter, #{?snk_kind := cut_connection, ?snk_span := start} ), %% Note: order of arguments here is reversed compared to `?force_ordering'. @@ -913,6 +914,7 @@ t_sync_query_down(Config, Opts) -> emqx_common_test_helpers:enable_failure(down, ProxyName, ProxyHost, ProxyPort) ) end), + ?tp("publishing_message", #{}), try {_, {ok, _}} = snabbkaffe:wait_async_action( @@ -921,6 +923,7 @@ t_sync_query_down(Config, Opts) -> infinity ) after + ?tp("healing_failure", #{}), emqx_common_test_helpers:heal_failure(down, ProxyName, ProxyHost, ProxyPort) end, {ok, _} = snabbkaffe:block_until(SuccessTPFilter, infinity), diff --git a/apps/emqx_bridge_pulsar/src/emqx_bridge_pulsar.app.src b/apps/emqx_bridge_pulsar/src/emqx_bridge_pulsar.app.src index a8eeba483..dcb86a3ca 100644 --- a/apps/emqx_bridge_pulsar/src/emqx_bridge_pulsar.app.src +++ b/apps/emqx_bridge_pulsar/src/emqx_bridge_pulsar.app.src @@ -1,6 +1,6 @@ {application, emqx_bridge_pulsar, [ {description, "EMQX Pulsar Bridge"}, - {vsn, "0.2.2"}, + {vsn, "0.2.3"}, {registered, []}, {applications, [ kernel, diff --git a/apps/emqx_bridge_pulsar/src/emqx_bridge_pulsar_action_info.erl b/apps/emqx_bridge_pulsar/src/emqx_bridge_pulsar_action_info.erl index 6d15687f6..fb9a38cc6 100644 --- a/apps/emqx_bridge_pulsar/src/emqx_bridge_pulsar_action_info.erl +++ b/apps/emqx_bridge_pulsar/src/emqx_bridge_pulsar_action_info.erl @@ -11,7 +11,8 @@ action_type_name/0, connector_type_name/0, schema_module/0, - is_action/1 + is_action/1, + connector_action_config_to_bridge_v1_config/2 ]). is_action(_) -> true. @@ -23,3 +24,28 @@ action_type_name() -> pulsar. connector_type_name() -> pulsar. schema_module() -> emqx_bridge_pulsar_pubsub_schema. + +connector_action_config_to_bridge_v1_config(ConnectorConfig, ActionConfig) -> + BridgeV1Config1 = emqx_action_info:connector_action_config_to_bridge_v1_config( + ConnectorConfig, ActionConfig + ), + BridgeV1Config = maps:with(v1_fields(pulsar_producer), BridgeV1Config1), + emqx_utils_maps:update_if_present( + <<"resource_opts">>, + fun(RO) -> maps:with(v1_fields(producer_resource_opts), RO) end, + BridgeV1Config + ). + +%%------------------------------------------------------------------------------------------ +%% Internal helper functions +%%------------------------------------------------------------------------------------------ + +v1_fields(Struct) -> + [ + to_bin(K) + || {K, _} <- emqx_bridge_pulsar:fields(Struct) + ]. + +to_bin(B) when is_binary(B) -> B; +to_bin(L) when is_list(L) -> list_to_binary(L); +to_bin(A) when is_atom(A) -> atom_to_binary(A, utf8). diff --git a/apps/emqx_bridge_pulsar/src/emqx_bridge_pulsar_connector.erl b/apps/emqx_bridge_pulsar/src/emqx_bridge_pulsar_connector.erl index 9d269493d..c98dd19ed 100644 --- a/apps/emqx_bridge_pulsar/src/emqx_bridge_pulsar_connector.erl +++ b/apps/emqx_bridge_pulsar/src/emqx_bridge_pulsar_connector.erl @@ -58,6 +58,8 @@ callback_mode() -> async_if_possible. +query_mode(#{resource_opts := #{query_mode := sync}}) -> + simple_sync_internal_buffer; query_mode(_Config) -> simple_async_internal_buffer. @@ -202,12 +204,17 @@ on_query(_InstanceId, {ChannelId, Message}, State) -> sync_timeout => SyncTimeout, is_async => false }), - try - pulsar:send_sync(Producers, [PulsarMessage], SyncTimeout) - catch - error:timeout -> - {error, timeout} - end + ?tp_span( + "pulsar_producer_query_enter", + #{instance_id => _InstanceId, message => Message, mode => sync}, + try + ?tp("pulsar_producer_send", #{msg => PulsarMessage, mode => sync}), + pulsar:send_sync(Producers, [PulsarMessage], SyncTimeout) + catch + error:timeout -> + {error, timeout} + end + ) end. -spec on_query_async( @@ -218,11 +225,11 @@ on_query_async(_InstanceId, {ChannelId, Message}, AsyncReplyFn, State) -> #{channels := Channels} = State, case maps:find(ChannelId, Channels) of error -> - {error, channel_not_found}; + {error, {unrecoverable_error, channel_not_found}}; {ok, #{message := MessageTmpl, producers := Producers}} -> ?tp_span( - pulsar_producer_on_query_async, - #{instance_id => _InstanceId, message => Message}, + "pulsar_producer_query_enter", + #{instance_id => _InstanceId, message => Message, mode => async}, on_query_async2(ChannelId, Producers, Message, MessageTmpl, AsyncReplyFn) ) end. @@ -233,6 +240,7 @@ on_query_async2(ChannelId, Producers, Message, MessageTmpl, AsyncReplyFn) -> message => PulsarMessage, is_async => true }), + ?tp("pulsar_producer_send", #{msg => PulsarMessage, mode => async}), pulsar:send(Producers, [PulsarMessage], #{callback_fn => AsyncReplyFn}). on_format_query_result({ok, Info}) -> diff --git a/apps/emqx_bridge_pulsar/src/emqx_bridge_pulsar_pubsub_schema.erl b/apps/emqx_bridge_pulsar/src/emqx_bridge_pulsar_pubsub_schema.erl index dff62843e..515fcdb5a 100644 --- a/apps/emqx_bridge_pulsar/src/emqx_bridge_pulsar_pubsub_schema.erl +++ b/apps/emqx_bridge_pulsar/src/emqx_bridge_pulsar_pubsub_schema.erl @@ -66,10 +66,8 @@ fields(action_resource_opts) -> batch_size, batch_time, worker_pool_size, - request_ttl, inflight_window, - max_buffer_bytes, - query_mode + max_buffer_bytes ], lists:filter( fun({K, _V}) -> not lists:member(K, UnsupportedOpts) end, diff --git a/apps/emqx_bridge_pulsar/test/emqx_bridge_pulsar_connector_SUITE.erl b/apps/emqx_bridge_pulsar/test/emqx_bridge_pulsar_connector_SUITE.erl index cd54e2194..0a908f5be 100644 --- a/apps/emqx_bridge_pulsar/test/emqx_bridge_pulsar_connector_SUITE.erl +++ b/apps/emqx_bridge_pulsar/test/emqx_bridge_pulsar_connector_SUITE.erl @@ -843,7 +843,8 @@ do_t_send_with_failure(Config, FailureType) -> ?wait_async_action( emqx:publish(Message0), #{ - ?snk_kind := pulsar_producer_on_query_async, + ?snk_kind := "pulsar_producer_query_enter", + mode := async, ?snk_span := {complete, _} }, 5_000 @@ -970,7 +971,11 @@ t_producer_process_crash(Config) -> {_, {ok, _}} = ?wait_async_action( emqx:publish(Message0), - #{?snk_kind := pulsar_producer_on_query_async, ?snk_span := {complete, _}}, + #{ + ?snk_kind := "pulsar_producer_query_enter", + mode := async, + ?snk_span := {complete, _} + }, 5_000 ), Data0 = receive_consumed(20_000), diff --git a/apps/emqx_bridge_pulsar/test/emqx_bridge_pulsar_v2_SUITE.erl b/apps/emqx_bridge_pulsar/test/emqx_bridge_pulsar_v2_SUITE.erl index 11caa15c6..94534fafd 100644 --- a/apps/emqx_bridge_pulsar/test/emqx_bridge_pulsar_v2_SUITE.erl +++ b/apps/emqx_bridge_pulsar/test/emqx_bridge_pulsar_v2_SUITE.erl @@ -23,31 +23,25 @@ %%------------------------------------------------------------------------------ all() -> - [ - {group, plain}, - {group, tls} - ]. + All0 = emqx_common_test_helpers:all(?MODULE), + All = All0 -- matrix_cases(), + Groups = lists:map(fun({G, _, _}) -> {group, G} end, groups()), + Groups ++ All. groups() -> - AllTCs = emqx_common_test_helpers:all(?MODULE), - [ - {plain, AllTCs}, - {tls, AllTCs} - ]. + emqx_common_test_helpers:matrix_to_groups(?MODULE, matrix_cases()). + +matrix_cases() -> + emqx_common_test_helpers:all(?MODULE). init_per_suite(Config) -> - %% Ensure enterprise bridge module is loaded - _ = emqx_bridge_enterprise:module_info(), - {ok, Cwd} = file:get_cwd(), - PrivDir = ?config(priv_dir, Config), - WorkDir = emqx_utils_fs:find_relpath(filename:join(PrivDir, "ebp"), Cwd), Apps = emqx_cth_suite:start( lists:flatten([ ?APPS, emqx_management, emqx_mgmt_api_test_util:emqx_dashboard() ]), - #{work_dir => WorkDir} + #{work_dir => emqx_cth_suite:work_dir(Config)} ), [{suite_apps, Apps} | Config]. @@ -61,6 +55,7 @@ init_per_group(plain = Type, Config) -> case emqx_common_test_helpers:is_tcp_server_available(PulsarHost, PulsarPort) of true -> Config1 = common_init_per_group(), + ConnectorName = ?MODULE, NewConfig = [ {proxy_name, ProxyName}, @@ -70,7 +65,7 @@ init_per_group(plain = Type, Config) -> {use_tls, false} | Config1 ++ Config ], - create_connector(?MODULE, NewConfig), + create_connector(ConnectorName, NewConfig), NewConfig; false -> maybe_skip_without_ci() @@ -82,6 +77,7 @@ init_per_group(tls = Type, Config) -> case emqx_common_test_helpers:is_tcp_server_available(PulsarHost, PulsarPort) of true -> Config1 = common_init_per_group(), + ConnectorName = ?MODULE, NewConfig = [ {proxy_name, ProxyName}, @@ -91,17 +87,21 @@ init_per_group(tls = Type, Config) -> {use_tls, true} | Config1 ++ Config ], - create_connector(?MODULE, NewConfig), + create_connector(ConnectorName, NewConfig), NewConfig; false -> maybe_skip_without_ci() - end. + end; +init_per_group(_Group, Config) -> + Config. end_per_group(Group, Config) when Group =:= plain; Group =:= tls -> common_end_per_group(Config), + ok; +end_per_group(_Group, _Config) -> ok. common_init_per_group() -> @@ -189,66 +189,49 @@ pulsar_connector(Config) -> ":", integer_to_binary(PulsarPort) ]), - Connector = #{ - <<"connectors">> => #{ - <<"pulsar">> => #{ - Name => #{ - <<"enable">> => true, - <<"ssl">> => #{ - <<"enable">> => UseTLS, - <<"verify">> => <<"verify_none">>, - <<"server_name_indication">> => <<"auto">> - }, - <<"authentication">> => <<"none">>, - <<"servers">> => ServerURL - } - } - } + InnerConfigMap = #{ + <<"enable">> => true, + <<"ssl">> => #{ + <<"enable">> => UseTLS, + <<"verify">> => <<"verify_none">>, + <<"server_name_indication">> => <<"auto">> + }, + <<"authentication">> => <<"none">>, + <<"servers">> => ServerURL }, - parse_and_check(<<"connectors">>, emqx_connector_schema, Connector, Name). + emqx_bridge_v2_testlib:parse_and_check_connector(?TYPE, Name, InnerConfigMap). pulsar_action(Config) -> + QueryMode = proplists:get_value(query_mode, Config, <<"sync">>), Name = atom_to_binary(?MODULE), - Action = #{ - <<"actions">> => #{ - <<"pulsar">> => #{ - Name => #{ - <<"connector">> => Name, - <<"enable">> => true, - <<"parameters">> => #{ - <<"retention_period">> => <<"infinity">>, - <<"max_batch_bytes">> => <<"1MB">>, - <<"batch_size">> => 100, - <<"strategy">> => <<"random">>, - <<"buffer">> => #{ - <<"mode">> => <<"memory">>, - <<"per_partition_limit">> => <<"10MB">>, - <<"segment_bytes">> => <<"5MB">>, - <<"memory_overload_protection">> => true - }, - <<"message">> => #{ - <<"key">> => <<"${.clientid}">>, - <<"value">> => <<"${.}">> - }, - <<"pulsar_topic">> => ?config(pulsar_topic, Config) - }, - <<"resource_opts">> => #{ - <<"health_check_interval">> => <<"1s">>, - <<"metrics_flush_interval">> => <<"300ms">> - } - } - } + InnerConfigMap = #{ + <<"connector">> => Name, + <<"enable">> => true, + <<"parameters">> => #{ + <<"retention_period">> => <<"infinity">>, + <<"max_batch_bytes">> => <<"1MB">>, + <<"batch_size">> => 100, + <<"strategy">> => <<"random">>, + <<"buffer">> => #{ + <<"mode">> => <<"memory">>, + <<"per_partition_limit">> => <<"10MB">>, + <<"segment_bytes">> => <<"5MB">>, + <<"memory_overload_protection">> => true + }, + <<"message">> => #{ + <<"key">> => <<"${.clientid}">>, + <<"value">> => <<"${.}">> + }, + <<"pulsar_topic">> => ?config(pulsar_topic, Config) + }, + <<"resource_opts">> => #{ + <<"query_mode">> => QueryMode, + <<"request_ttl">> => <<"1s">>, + <<"health_check_interval">> => <<"1s">>, + <<"metrics_flush_interval">> => <<"300ms">> } }, - parse_and_check(<<"actions">>, emqx_bridge_v2_schema, Action, Name). - -parse_and_check(Key, Mod, Conf, Name) -> - ConfStr = hocon_pp:do(Conf, #{}), - ct:pal(ConfStr), - {ok, RawConf} = hocon:binary(ConfStr, #{format => map}), - hocon_tconf:check_plain(Mod, RawConf, #{required => false, atom_key => false}), - #{Key := #{<<"pulsar">> := #{Name := RetConf}}} = RawConf, - RetConf. + emqx_bridge_v2_testlib:parse_and_check(action, ?TYPE, Name, InnerConfigMap). instance_id(Type, Name) -> ConnectorId = emqx_bridge_resource:resource_id(Type, ?TYPE, Name), @@ -404,20 +387,44 @@ assert_status_api(Line, Type, Name, Status) -> ). -define(assertStatusAPI(TYPE, NAME, STATUS), assert_status_api(?LINE, TYPE, NAME, STATUS)). +proplists_with(Keys, PList) -> + lists:filter(fun({K, _}) -> lists:member(K, Keys) end, PList). + +group_path(Config) -> + case emqx_common_test_helpers:group_path(Config) of + [] -> + undefined; + Path -> + Path + end. + %%------------------------------------------------------------------------------ %% Testcases %%------------------------------------------------------------------------------ -t_action_probe(Config) -> +t_action_probe(matrix) -> + [[plain], [tls]]; +t_action_probe(Config) when is_list(Config) -> Name = atom_to_binary(?FUNCTION_NAME), Action = pulsar_action(Config), {ok, Res0} = emqx_bridge_v2_testlib:probe_bridge_api(action, ?TYPE, Name, Action), ?assertMatch({{_, 204, _}, _, _}, Res0), ok. -t_action(Config) -> +t_action(matrix) -> + [ + [plain, async], + [plain, sync], + [tls, async] + ]; +t_action(Config) when is_list(Config) -> + QueryMode = + case group_path(Config) of + [_, QM | _] -> atom_to_binary(QM); + _ -> <<"async">> + end, Name = atom_to_binary(?FUNCTION_NAME), - create_action(Name, Config), + create_action(Name, [{query_mode, QueryMode} | Config]), Actions = emqx_bridge_v2:list(actions), Any = fun(#{name := BName}) -> BName =:= Name end, ?assert(lists:any(Any, Actions), Actions), @@ -465,7 +472,9 @@ t_action(Config) -> %% Tests that deleting/disabling an action that share the same Pulsar topic with other %% actions do not disturb the latter. -t_multiple_actions_sharing_topic(Config) -> +t_multiple_actions_sharing_topic(matrix) -> + [[plain], [tls]]; +t_multiple_actions_sharing_topic(Config) when is_list(Config) -> Type = ?TYPE, ConnectorName = <<"c">>, ConnectorConfig = pulsar_connector(Config), @@ -546,3 +555,31 @@ t_multiple_actions_sharing_topic(Config) -> [] ), ok. + +t_sync_query_down(matrix) -> + [[plain]]; +t_sync_query_down(Config0) when is_list(Config0) -> + ct:timetrap({seconds, 15}), + Payload = #{<<"x">> => <<"some data">>}, + PayloadBin = emqx_utils_json:encode(Payload), + ClientId = <<"some_client">>, + Opts = #{ + make_message_fn => fun(Topic) -> emqx_message:make(ClientId, Topic, PayloadBin) end, + enter_tp_filter => + ?match_event(#{?snk_kind := "pulsar_producer_send"}), + error_tp_filter => + ?match_event(#{?snk_kind := "resource_simple_sync_internal_buffer_query_timeout"}), + success_tp_filter => + ?match_event(#{?snk_kind := pulsar_echo_consumer_message}) + }, + Config = [ + {connector_type, ?TYPE}, + {connector_name, ?FUNCTION_NAME}, + {connector_config, pulsar_connector(Config0)}, + {action_type, ?TYPE}, + {action_name, ?FUNCTION_NAME}, + {action_config, pulsar_action(Config0)} + | proplists_with([proxy_name, proxy_host, proxy_port], Config0) + ], + emqx_bridge_v2_testlib:t_sync_query_down(Config, Opts), + ok. diff --git a/apps/emqx_resource/src/emqx_resource_buffer_worker.erl b/apps/emqx_resource/src/emqx_resource_buffer_worker.erl index 516795f39..8eb7b373d 100644 --- a/apps/emqx_resource/src/emqx_resource_buffer_worker.erl +++ b/apps/emqx_resource/src/emqx_resource_buffer_worker.erl @@ -198,6 +198,9 @@ simple_sync_internal_buffer_query(Id, Request, QueryOpts0) -> QueryOpts = #{timeout := Timeout} = maps:merge(simple_query_opts(), QueryOpts1), case simple_async_query(Id, Request, QueryOpts) of {error, _} = Error -> + ?tp("resource_simple_sync_internal_buffer_query_error", #{ + id => Id, request => Request + }), Error; {async_return, {error, _} = Error} -> Error; @@ -210,7 +213,11 @@ simple_sync_internal_buffer_query(Id, Request, QueryOpts0) -> receive {ReplyAlias, Response} -> Response - after 0 -> {error, timeout} + after 0 -> + ?tp("resource_simple_sync_internal_buffer_query_timeout", #{ + id => Id, request => Request + }), + {error, timeout} end end end @@ -1302,6 +1309,7 @@ do_call_query(QM, Id, Index, Ref, Query, #{query_mode := ReqQM} = QueryOpts, Res ?tp(simple_query_override, #{query_mode => ReqQM}), #{mod := Mod, state := ResSt, callback_mode := CBM, added_channels := Channels} = Resource, CallMode = call_mode(QM, CBM), + ?tp(simple_query_enter, #{}), apply_query_fun(CallMode, Mod, Id, Index, Ref, Query, ResSt, Channels, QueryOpts); do_call_query(QM, Id, Index, Ref, Query, QueryOpts, #{query_mode := ResQM} = Resource) when ResQM =:= simple_sync_internal_buffer; ResQM =:= simple_async_internal_buffer @@ -1309,6 +1317,7 @@ do_call_query(QM, Id, Index, Ref, Query, QueryOpts, #{query_mode := ResQM} = Res %% The connector supports buffer, send even in disconnected state #{mod := Mod, state := ResSt, callback_mode := CBM, added_channels := Channels} = Resource, CallMode = call_mode(QM, CBM), + ?tp(simple_query_enter, #{}), apply_query_fun(CallMode, Mod, Id, Index, Ref, Query, ResSt, Channels, QueryOpts); do_call_query(QM, Id, Index, Ref, Query, QueryOpts, #{status := connected} = Resource) -> %% when calling from the buffer worker or other simple queries, @@ -2297,6 +2306,7 @@ reply_call(Alias, Response) -> %% Used by `simple_sync_internal_buffer_query' to reply and chain existing `reply_to' %% callbacks. reply_call_internal_buffer(ReplyAlias, MaybeReplyTo, Response) -> + ?tp("reply_call_internal_buffer", #{}), ?MODULE:reply_call(ReplyAlias, Response), do_reply_caller(MaybeReplyTo, Response). diff --git a/changes/ee/feat-13546.en.md b/changes/ee/feat-13546.en.md new file mode 100644 index 000000000..c403409ac --- /dev/null +++ b/changes/ee/feat-13546.en.md @@ -0,0 +1 @@ +Added the option to configure the query mode for Pulsar Producer action. From d4508a4f1d226a8515bb12e8a7828a9c410a0c0f Mon Sep 17 00:00:00 2001 From: JimMoen Date: Tue, 25 Jun 2024 16:52:48 +0800 Subject: [PATCH 11/50] chore: sync master `elvis.config` --- elvis.config | 131 ++++++++++++++++++++++++++++----------------------- 1 file changed, 73 insertions(+), 58 deletions(-) diff --git a/elvis.config b/elvis.config index 87d739865..5095318e0 100644 --- a/elvis.config +++ b/elvis.config @@ -1,62 +1,77 @@ %% -*- mode: erlang -*- [ - { - elvis, - [ - {config, - [ - #{dirs => ["src", "apps/**/src"], - filter => "*.erl", - ruleset => erl_files, - rules => [ - {elvis_style, macro_names, disable}, - {elvis_style, function_naming_convention, disable}, - {elvis_style, state_record_and_type, disable}, - {elvis_style, no_common_caveats_call, #{}}, - {elvis_style, no_debug_call, #{ debug_functions => [ {ct, pal} - , {ct, print} - ]}}, - {elvis_style, operator_spaces, #{rules => [{right, "|"}, - {left, "|"}, - {right, "||"}, - {left, "||"}]}}, - {elvis_style, dont_repeat_yourself, #{ min_complexity => 20 }}, - {elvis_style, god_modules, #{limit => 100}}, - {elvis_text_style, line_length, #{ limit => 120 % trust erlfmt - , skip_comments => false - }} - ] - }, - #{dirs => ["test", "apps/**/test"], - filter => "*.erl", - rules => [ - {elvis_text_style, line_length, #{ limit => 120 - , skip_comments => false - }}, - {elvis_style, dont_repeat_yourself, #{ min_complexity => 100 }}, - {elvis_style, nesting_level, #{ level => 6 }} - ] - }, - #{dirs => ["apps/emqx_rule_engine/src"], - filter => "*_rule_funcs.erl", - rules => [ - {elvis_style, god_modules, disable} - ] - }, - #{dirs => ["."], - filter => "Makefile", - ruleset => makefiles - }, - #{dirs => ["."], - filter => "rebar.config", - ruleset => rebar_config - }, - #{dirs => ["."], - filter => "elvis.config", - ruleset => elvis_config - } - ] + { + elvis, + [ + {config, [ + #{ + dirs => ["src", "apps/**/src"], + filter => "*.erl", + ruleset => erl_files, + rules => [ + {elvis_style, param_pattern_matching, disable}, + {elvis_style, macro_names, disable}, + {elvis_style, function_naming_convention, disable}, + {elvis_style, state_record_and_type, disable}, + {elvis_style, no_common_caveats_call, #{}}, + {elvis_style, no_debug_call, #{ + debug_functions => [ + {ct, pal}, + {ct, print} + ] + }}, + {elvis_style, operator_spaces, #{ + rules => [ + {right, "|"}, + {left, "|"}, + {right, "||"}, + {left, "||"} + ] + }}, + {elvis_style, dont_repeat_yourself, #{min_complexity => 20}}, + {elvis_style, god_modules, #{limit => 100}}, + % trust erlfmt + {elvis_text_style, line_length, #{ + limit => 120, + skip_comments => false + }} + ] + }, + #{ + dirs => ["test", "apps/**/test"], + filter => "*.erl", + rules => [ + {elvis_text_style, line_length, #{ + limit => 120, + skip_comments => false + }}, + {elvis_style, dont_repeat_yourself, #{min_complexity => 100}}, + {elvis_style, nesting_level, #{level => 6}} + ] + }, + #{ + dirs => ["apps/emqx_rule_engine/src"], + filter => "*_rule_funcs.erl", + rules => [ + {elvis_style, god_modules, disable} + ] + }, + #{ + dirs => ["."], + filter => "Makefile", + ruleset => makefiles + }, + #{ + dirs => ["."], + filter => "rebar.config", + ruleset => rebar_config + }, + #{ + dirs => ["."], + filter => "elvis.config", + ruleset => elvis_config + } + ]} + ] } - ] - } ]. From 6db1c0a446678e18656fde6fd45a7693a905208e Mon Sep 17 00:00:00 2001 From: JimMoen Date: Tue, 25 Jun 2024 18:27:09 +0800 Subject: [PATCH 12/50] refactor: separate function to handle `frame_error` --- apps/emqx/src/emqx_channel.erl | 59 +++++++++++++++----------- apps/emqx/src/emqx_frame.erl | 2 +- apps/emqx/src/emqx_quic_connection.erl | 8 ++-- 3 files changed, 39 insertions(+), 30 deletions(-) diff --git a/apps/emqx/src/emqx_channel.erl b/apps/emqx/src/emqx_channel.erl index a27c534e2..f7c76cade 100644 --- a/apps/emqx/src/emqx_channel.erl +++ b/apps/emqx/src/emqx_channel.erl @@ -563,29 +563,8 @@ handle_in( process_disconnect(ReasonCode, Properties, NChannel); handle_in(?AUTH_PACKET(), Channel) -> handle_out(disconnect, ?RC_IMPLEMENTATION_SPECIFIC_ERROR, Channel); -handle_in({frame_error, Reason}, Channel = #channel{conn_state = idle}) -> - shutdown(shutdown_count(frame_error, Reason), Channel); -handle_in( - {frame_error, #{cause := frame_too_large} = R}, Channel = #channel{conn_state = connecting} -) -> - shutdown( - shutdown_count(frame_error, R), ?CONNACK_PACKET(?RC_PACKET_TOO_LARGE), Channel - ); -handle_in({frame_error, Reason}, Channel = #channel{conn_state = connecting}) -> - shutdown(shutdown_count(frame_error, Reason), ?CONNACK_PACKET(?RC_MALFORMED_PACKET), Channel); -handle_in( - {frame_error, #{cause := frame_too_large}}, Channel = #channel{conn_state = ConnState} -) when - ConnState =:= connected orelse ConnState =:= reauthenticating --> - handle_out(disconnect, {?RC_PACKET_TOO_LARGE, frame_too_large}, Channel); -handle_in({frame_error, Reason}, Channel = #channel{conn_state = ConnState}) when - ConnState =:= connected orelse ConnState =:= reauthenticating --> - handle_out(disconnect, {?RC_MALFORMED_PACKET, Reason}, Channel); -handle_in({frame_error, Reason}, Channel = #channel{conn_state = disconnected}) -> - ?SLOG(error, #{msg => "malformed_mqtt_message", reason => Reason}), - {ok, Channel}; +handle_in({frame_error, Reason}, Channel) -> + handle_frame_error(Reason, Channel); handle_in(Packet, Channel) -> ?SLOG(error, #{msg => "disconnecting_due_to_unexpected_message", packet => Packet}), handle_out(disconnect, ?RC_PROTOCOL_ERROR, Channel). @@ -1017,6 +996,37 @@ not_nacked({deliver, _Topic, Msg}) -> true end. +%%-------------------------------------------------------------------- +%% Handle Frame Error +%%-------------------------------------------------------------------- + +handle_frame_error( + Reason, + Channel = #channel{conn_state = idle} +) -> + shutdown(shutdown_count(frame_error, Reason), Channel); +handle_frame_error( + #{cause := frame_too_large} = R, Channel = #channel{conn_state = connecting} +) -> + shutdown( + shutdown_count(frame_error, R), ?CONNACK_PACKET(?RC_PACKET_TOO_LARGE), Channel + ); +handle_frame_error(Reason, Channel = #channel{conn_state = connecting}) -> + shutdown(shutdown_count(frame_error, Reason), ?CONNACK_PACKET(?RC_MALFORMED_PACKET), Channel); +handle_frame_error( + #{cause := frame_too_large}, Channel = #channel{conn_state = ConnState} +) when + ConnState =:= connected orelse ConnState =:= reauthenticating +-> + handle_out(disconnect, {?RC_PACKET_TOO_LARGE, frame_too_large}, Channel); +handle_frame_error(Reason, Channel = #channel{conn_state = ConnState}) when + ConnState =:= connected orelse ConnState =:= reauthenticating +-> + handle_out(disconnect, {?RC_MALFORMED_PACKET, Reason}, Channel); +handle_frame_error(Reason, Channel = #channel{conn_state = disconnected}) -> + ?SLOG(error, #{msg => "malformed_mqtt_message", reason => Reason}), + {ok, Channel}. + %%-------------------------------------------------------------------- %% Handle outgoing packet %%-------------------------------------------------------------------- @@ -2629,8 +2639,7 @@ save_alias(outbound, AliasId, Topic, TopicAliases = #{outbound := Aliases}) -> NAliases = maps:put(Topic, AliasId, Aliases), TopicAliases#{outbound => NAliases}. --compile({inline, [reply/2, shutdown/2, shutdown/3, sp/1, flag/1]}). - +-compile({inline, [reply/2, shutdown/2, shutdown/3]}). reply(Reply, Channel) -> {reply, Reply, Channel}. diff --git a/apps/emqx/src/emqx_frame.erl b/apps/emqx/src/emqx_frame.erl index a1c9084dd..f83a739ad 100644 --- a/apps/emqx/src/emqx_frame.erl +++ b/apps/emqx/src/emqx_frame.erl @@ -1134,7 +1134,7 @@ validate_connect_reserved(0) -> ok; validate_connect_reserved(1) -> ?PARSE_ERR(reserved_connect_flag). %% MQTT-v3.1.1-[MQTT-3.1.2-13], MQTT-v5.0-[MQTT-3.1.2-11] -validate_connect_will(false, _, WillQos) when WillQos > 0 -> ?PARSE_ERR(invalid_will_qos); +validate_connect_will(false, _, WillQoS) when WillQoS > 0 -> ?PARSE_ERR(invalid_will_qos); %% MQTT-v3.1.1-[MQTT-3.1.2-14], MQTT-v5.0-[MQTT-3.1.2-12] validate_connect_will(true, _, WillQoS) when WillQoS > 2 -> ?PARSE_ERR(invalid_will_qos); %% MQTT-v3.1.1-[MQTT-3.1.2-15], MQTT-v5.0-[MQTT-3.1.2-13] diff --git a/apps/emqx/src/emqx_quic_connection.erl b/apps/emqx/src/emqx_quic_connection.erl index df9520d90..c63014cea 100644 --- a/apps/emqx/src/emqx_quic_connection.erl +++ b/apps/emqx/src/emqx_quic_connection.erl @@ -62,7 +62,7 @@ streams := [{pid(), quicer:stream_handle()}], %% New stream opts stream_opts := map(), - %% If conneciton is resumed from session ticket + %% If connection is resumed from session ticket is_resumed => boolean(), %% mqtt message serializer config serialize => undefined, @@ -70,8 +70,8 @@ }. -type cb_ret() :: quicer_lib:cb_ret(). -%% @doc Data streams initializions are started in parallel with control streams, data streams are blocked -%% for the activation from control stream after it is accepted as a legit conneciton. +%% @doc Data streams initializations are started in parallel with control streams, data streams are blocked +%% for the activation from control stream after it is accepted as a legit connection. %% For security, the initial number of allowed data streams from client should be limited by %% 'peer_bidi_stream_count` & 'peer_unidi_stream_count` -spec activate_data_streams(pid(), { @@ -80,7 +80,7 @@ activate_data_streams(ConnOwner, {PS, Serialize, Channel}) -> gen_server:call(ConnOwner, {activate_data_streams, {PS, Serialize, Channel}}, infinity). -%% @doc conneciton owner init callback +%% @doc connection owner init callback -spec init(map()) -> {ok, cb_state()}. init(#{stream_opts := SOpts} = S) when is_list(SOpts) -> init(S#{stream_opts := maps:from_list(SOpts)}); From c313aa89f07c9918c87f6938795020368baca42d Mon Sep 17 00:00:00 2001 From: JimMoen Date: Thu, 27 Jun 2024 16:39:11 +0800 Subject: [PATCH 13/50] fix: try throw proto_ver and proto_name when parsing CONNECT packet --- apps/emqx/src/emqx_channel.erl | 16 ++++--- apps/emqx/src/emqx_frame.erl | 78 ++++++++++++++++++++++++---------- 2 files changed, 64 insertions(+), 30 deletions(-) diff --git a/apps/emqx/src/emqx_channel.erl b/apps/emqx/src/emqx_channel.erl index f7c76cade..f7b210a22 100644 --- a/apps/emqx/src/emqx_channel.erl +++ b/apps/emqx/src/emqx_channel.erl @@ -145,7 +145,9 @@ -type replies() :: emqx_types:packet() | reply() | [reply()]. -define(IS_MQTT_V5, #channel{conninfo = #{proto_ver := ?MQTT_PROTO_V5}}). - +-define(IS_CONNECTED_OR_REAUTHENTICATING(ConnState), + ((ConnState == connected) orelse (ConnState == reauthenticating)) +). -define(IS_COMMON_SESSION_TIMER(N), ((N == retry_delivery) orelse (N == expire_awaiting_rel)) ). @@ -333,7 +335,7 @@ take_conn_info_fields(Fields, ClientInfo, ConnInfo) -> | {shutdown, Reason :: term(), channel()} | {shutdown, Reason :: term(), replies(), channel()}. handle_in(?CONNECT_PACKET(), Channel = #channel{conn_state = ConnState}) when - ConnState =:= connected orelse ConnState =:= reauthenticating + ?IS_CONNECTED_OR_REAUTHENTICATING(ConnState) -> handle_out(disconnect, ?RC_PROTOCOL_ERROR, Channel); handle_in(?CONNECT_PACKET(), Channel = #channel{conn_state = connecting}) -> @@ -1016,11 +1018,11 @@ handle_frame_error(Reason, Channel = #channel{conn_state = connecting}) -> handle_frame_error( #{cause := frame_too_large}, Channel = #channel{conn_state = ConnState} ) when - ConnState =:= connected orelse ConnState =:= reauthenticating + ?IS_CONNECTED_OR_REAUTHENTICATING(ConnState) -> handle_out(disconnect, {?RC_PACKET_TOO_LARGE, frame_too_large}, Channel); handle_frame_error(Reason, Channel = #channel{conn_state = ConnState}) when - ConnState =:= connected orelse ConnState =:= reauthenticating + ?IS_CONNECTED_OR_REAUTHENTICATING(ConnState) -> handle_out(disconnect, {?RC_MALFORMED_PACKET, Reason}, Channel); handle_frame_error(Reason, Channel = #channel{conn_state = disconnected}) -> @@ -1295,7 +1297,7 @@ handle_info( session = Session } ) when - ConnState =:= connected orelse ConnState =:= reauthenticating + ?IS_CONNECTED_OR_REAUTHENTICATING(ConnState) -> {Intent, Session1} = session_disconnect(ClientInfo, ConnInfo, Session), Channel1 = ensure_disconnected(Reason, maybe_publish_will_msg(sock_closed, Channel)), @@ -2675,13 +2677,13 @@ disconnect_and_shutdown( ?IS_MQTT_V5 = #channel{conn_state = ConnState} ) when - ConnState =:= connected orelse ConnState =:= reauthenticating + ?IS_CONNECTED_OR_REAUTHENTICATING(ConnState) -> NChannel = ensure_disconnected(Reason, Channel), shutdown(Reason, Reply, ?DISCONNECT_PACKET(reason_code(Reason)), NChannel); %% mqtt v3/v4 connected sessions disconnect_and_shutdown(Reason, Reply, Channel = #channel{conn_state = ConnState}) when - ConnState =:= connected orelse ConnState =:= reauthenticating + ?IS_CONNECTED_OR_REAUTHENTICATING(ConnState) -> NChannel = ensure_disconnected(Reason, Channel), shutdown(Reason, Reply, NChannel); diff --git a/apps/emqx/src/emqx_frame.erl b/apps/emqx/src/emqx_frame.erl index f83a739ad..398a5f35c 100644 --- a/apps/emqx/src/emqx_frame.erl +++ b/apps/emqx/src/emqx_frame.erl @@ -267,27 +267,36 @@ packet(Header, Variable, Payload) -> #mqtt_packet{header = Header, variable = Variable, payload = Payload}. parse_connect(FrameBin, StrictMode) -> - {ProtoName, Rest} = parse_utf8_string_with_cause(FrameBin, StrictMode, invalid_proto_name), - case ProtoName of - <<"MQTT">> -> - ok; - <<"MQIsdp">> -> - ok; - _ -> - %% from spec: the server MAY send disconnect with reason code 0x84 - %% we chose to close socket because the client is likely not talking MQTT anyway - ?PARSE_ERR(#{ - cause => invalid_proto_name, - expected => <<"'MQTT' or 'MQIsdp'">>, - received => ProtoName - }) - end, - parse_connect2(ProtoName, Rest, StrictMode). + {ProtoName, Rest0} = parse_utf8_string_with_cause(FrameBin, StrictMode, invalid_proto_name), + %% No need to parse and check proto_ver if proto_name is invalid, check it first + %% And the matching check of `proto_name` and `proto_ver` fields will be done in `emqx_packet:check_proto_ver/2` + _ = validate_proto_name(ProtoName), + {IsBridge, ProtoVer, Rest2} = parse_connect_proto_ver(Rest0), + Meta = #{proto_name => ProtoName, proto_ver => ProtoVer}, + try + do_parse_connect(ProtoName, IsBridge, ProtoVer, Rest2, StrictMode) + catch + throw:{?FRAME_PARSE_ERROR, ReasonM} when is_map(ReasonM) -> + ?PARSE_ERR(maps:merge(ReasonM, Meta)); + throw:{?FRAME_PARSE_ERROR, Reason} -> + ?PARSE_ERR(Meta#{cause => Reason}) + end. -parse_connect2( +do_parse_connect( ProtoName, - <>, + IsBridge, + ProtoVer, + << + UsernameFlagB:1, + PasswordFlagB:1, + WillRetainB:1, + WillQoS:2, + WillFlagB:1, + CleanStart:1, + Reserved:1, + KeepAlive:16/big, + Rest/binary + >>, StrictMode ) -> _ = validate_connect_reserved(Reserved), @@ -302,14 +311,14 @@ parse_connect2( UsernameFlag = bool(UsernameFlagB), PasswordFlag = bool(PasswordFlagB) ), - {Properties, Rest3} = parse_properties(Rest2, ProtoVer, StrictMode), + {Properties, Rest3} = parse_properties(Rest, ProtoVer, StrictMode), {ClientId, Rest4} = parse_utf8_string_with_cause(Rest3, StrictMode, invalid_clientid), ConnPacket = #mqtt_packet_connect{ proto_name = ProtoName, proto_ver = ProtoVer, %% For bridge mode, non-standard implementation %% Invented by mosquitto, named 'try_private': https://mosquitto.org/man/mosquitto-conf-5.html - is_bridge = (BridgeTag =:= 8), + is_bridge = IsBridge, clean_start = bool(CleanStart), will_flag = WillFlag, will_qos = WillQoS, @@ -342,8 +351,8 @@ parse_connect2( unexpected_trailing_bytes => size(Rest7) }) end; -parse_connect2(_ProtoName, Bin, _StrictMode) -> - %% sent less than 32 bytes +do_parse_connect(_ProtoName, _IsBridge, _ProtoVer, Bin, _StrictMode) -> + %% sent less than 24 bytes ?PARSE_ERR(#{cause => malformed_connect, header_bytes => Bin}). parse_packet( @@ -515,6 +524,12 @@ parse_packet_id(<>) -> parse_packet_id(_) -> ?PARSE_ERR(invalid_packet_id). +parse_connect_proto_ver(<>) -> + {_IsBridge = (BridgeTag =:= 8), ProtoVer, Rest}; +parse_connect_proto_ver(Bin) -> + %% sent less than 1 bytes or empty + ?PARSE_ERR(#{cause => malformed_connect, header_bytes => Bin}). + parse_properties(Bin, Ver, _StrictMode) when Ver =/= ?MQTT_PROTO_V5 -> {#{}, Bin}; %% TODO: version mess? @@ -1129,10 +1144,25 @@ validate_subqos([3 | _]) -> ?PARSE_ERR(bad_subqos); validate_subqos([_ | T]) -> validate_subqos(T); validate_subqos([]) -> ok. +%% from spec: the server MAY send disconnect with reason code 0x84 +%% we chose to close socket because the client is likely not talking MQTT anyway +validate_proto_name(<<"MQTT">>) -> + ok; +validate_proto_name(<<"MQIsdp">>) -> + ok; +validate_proto_name(ProtoName) -> + ?PARSE_ERR(#{ + cause => invalid_proto_name, + expected => <<"'MQTT' or 'MQIsdp'">>, + received => ProtoName + }). + %% MQTT-v3.1.1-[MQTT-3.1.2-3], MQTT-v5.0-[MQTT-3.1.2-3] +-compile({inline, [validate_connect_reserved/1]}). validate_connect_reserved(0) -> ok; validate_connect_reserved(1) -> ?PARSE_ERR(reserved_connect_flag). +-compile({inline, [validate_connect_will/3]}). %% MQTT-v3.1.1-[MQTT-3.1.2-13], MQTT-v5.0-[MQTT-3.1.2-11] validate_connect_will(false, _, WillQoS) when WillQoS > 0 -> ?PARSE_ERR(invalid_will_qos); %% MQTT-v3.1.1-[MQTT-3.1.2-14], MQTT-v5.0-[MQTT-3.1.2-12] @@ -1141,6 +1171,7 @@ validate_connect_will(true, _, WillQoS) when WillQoS > 2 -> ?PARSE_ERR(invalid_w validate_connect_will(false, WillRetain, _) when WillRetain -> ?PARSE_ERR(invalid_will_retain); validate_connect_will(_, _, _) -> ok. +-compile({inline, [validate_connect_password_flag/4]}). %% MQTT-v3.1 %% Username flag and password flag are not strongly related %% https://public.dhe.ibm.com/software/dw/webservices/ws-mqtt/mqtt-v3r1.html#connect @@ -1155,6 +1186,7 @@ validate_connect_password_flag(true, ?MQTT_PROTO_V5, _, _) -> validate_connect_password_flag(_, _, _, _) -> ok. +-compile({inline, [bool/1]}). bool(0) -> false; bool(1) -> true. From 37a89d009480294a4c37b2d9446dd8a159063ad5 Mon Sep 17 00:00:00 2001 From: JimMoen Date: Mon, 15 Jul 2024 11:38:10 +0800 Subject: [PATCH 14/50] fix: enrich parse_state and connection serialize opts --- apps/emqx/include/emqx_mqtt.hrl | 1 + apps/emqx/src/emqx_channel.erl | 78 +++++++++++++++++++++------- apps/emqx/src/emqx_connection.erl | 9 +++- apps/emqx/src/emqx_frame.erl | 27 +++++++--- apps/emqx/src/emqx_ws_connection.erl | 11 +++- 5 files changed, 100 insertions(+), 26 deletions(-) diff --git a/apps/emqx/include/emqx_mqtt.hrl b/apps/emqx/include/emqx_mqtt.hrl index 09f7495ea..1c3fd770c 100644 --- a/apps/emqx/include/emqx_mqtt.hrl +++ b/apps/emqx/include/emqx_mqtt.hrl @@ -683,6 +683,7 @@ end). -define(FRAME_PARSE_ERROR, frame_parse_error). -define(FRAME_SERIALIZE_ERROR, frame_serialize_error). + -define(THROW_FRAME_ERROR(Reason), erlang:throw({?FRAME_PARSE_ERROR, Reason})). -define(THROW_SERIALIZE_ERROR(Reason), erlang:throw({?FRAME_SERIALIZE_ERROR, Reason})). diff --git a/apps/emqx/src/emqx_channel.erl b/apps/emqx/src/emqx_channel.erl index f7b210a22..07aad9b24 100644 --- a/apps/emqx/src/emqx_channel.erl +++ b/apps/emqx/src/emqx_channel.erl @@ -37,6 +37,7 @@ get_mqtt_conf/2, get_mqtt_conf/3, set_conn_state/2, + set_conninfo_proto_ver/2, stats/1, caps/1 ]). @@ -219,6 +220,9 @@ info(impl, #channel{session = Session}) -> set_conn_state(ConnState, Channel) -> Channel#channel{conn_state = ConnState}. +set_conninfo_proto_ver({none, #{version := ProtoVer}}, Channel = #channel{conninfo = ConnInfo}) -> + Channel#channel{conninfo = ConnInfo#{proto_ver => ProtoVer}}. + -spec stats(channel()) -> emqx_types:stats(). stats(#channel{session = undefined}) -> emqx_pd:get_counters(?CHANNEL_METRICS); @@ -1003,29 +1007,60 @@ not_nacked({deliver, _Topic, Msg}) -> %%-------------------------------------------------------------------- handle_frame_error( - Reason, - Channel = #channel{conn_state = idle} -) -> - shutdown(shutdown_count(frame_error, Reason), Channel); -handle_frame_error( - #{cause := frame_too_large} = R, Channel = #channel{conn_state = connecting} -) -> - shutdown( - shutdown_count(frame_error, R), ?CONNACK_PACKET(?RC_PACKET_TOO_LARGE), Channel - ); -handle_frame_error(Reason, Channel = #channel{conn_state = connecting}) -> - shutdown(shutdown_count(frame_error, Reason), ?CONNACK_PACKET(?RC_MALFORMED_PACKET), Channel); -handle_frame_error( - #{cause := frame_too_large}, Channel = #channel{conn_state = ConnState} + Reason = #{cause := frame_too_large}, + Channel = #channel{conn_state = ConnState, conninfo = ConnInfo} ) when ?IS_CONNECTED_OR_REAUTHENTICATING(ConnState) -> - handle_out(disconnect, {?RC_PACKET_TOO_LARGE, frame_too_large}, Channel); -handle_frame_error(Reason, Channel = #channel{conn_state = ConnState}) when + ShutdownCount = shutdown_count(frame_error, Reason), + case proto_ver(Reason, ConnInfo) of + ?MQTT_PROTO_V5 -> + handle_out(disconnect, {?RC_PACKET_TOO_LARGE, frame_too_large}, Channel); + _ -> + shutdown(ShutdownCount, Channel) + end; +%% Only send CONNACK with reason code `frame_too_large` for MQTT-v5.0 when connecting, +%% otherwise DONOT send any CONNACK or DISCONNECT packet. +handle_frame_error( + Reason, + Channel = #channel{conn_state = ConnState, conninfo = ConnInfo} +) when + is_map(Reason) andalso + (ConnState == idle orelse ConnState == connecting) +-> + ShutdownCount = shutdown_count(frame_error, Reason), + ProtoVer = proto_ver(Reason, ConnInfo), + NChannel = Channel#channel{conninfo = ConnInfo#{proto_ver => ProtoVer}}, + case ProtoVer of + ?MQTT_PROTO_V5 -> + shutdown(ShutdownCount, ?CONNACK_PACKET(?RC_PACKET_TOO_LARGE), NChannel); + _ -> + shutdown(ShutdownCount, NChannel) + end; +handle_frame_error( + Reason, + Channel = #channel{conn_state = connecting} +) -> + shutdown( + shutdown_count(frame_error, Reason), + ?CONNACK_PACKET(?RC_MALFORMED_PACKET), + Channel + ); +handle_frame_error( + Reason, + Channel = #channel{conn_state = ConnState} +) when ?IS_CONNECTED_OR_REAUTHENTICATING(ConnState) -> - handle_out(disconnect, {?RC_MALFORMED_PACKET, Reason}, Channel); -handle_frame_error(Reason, Channel = #channel{conn_state = disconnected}) -> + handle_out( + disconnect, + {?RC_MALFORMED_PACKET, Reason}, + Channel + ); +handle_frame_error( + Reason, + Channel = #channel{conn_state = disconnected} +) -> ?SLOG(error, #{msg => "malformed_mqtt_message", reason => Reason}), {ok, Channel}. @@ -2726,6 +2761,13 @@ is_durable_session(#channel{session = Session}) -> false end. +proto_ver(#{proto_ver := ProtoVer}, _ConnInfo) -> + ProtoVer; +proto_ver(_Reason, #{proto_ver := ProtoVer}) -> + ProtoVer; +proto_ver(_, _) -> + ?MQTT_PROTO_V4. + %%-------------------------------------------------------------------- %% For CT tests %%-------------------------------------------------------------------- diff --git a/apps/emqx/src/emqx_connection.erl b/apps/emqx/src/emqx_connection.erl index f378b700e..ecb962f08 100644 --- a/apps/emqx/src/emqx_connection.erl +++ b/apps/emqx/src/emqx_connection.erl @@ -782,7 +782,8 @@ parse_incoming(Data, Packets, State = #state{parse_state = ParseState}) -> input_bytes => Data, parsed_packets => Packets }), - {[{frame_error, Reason} | Packets], State}; + NState = enrich_state(Reason, State), + {[{frame_error, Reason} | Packets], NState}; error:Reason:Stacktrace -> ?LOG(error, #{ at_state => emqx_frame:describe_state(ParseState), @@ -1204,6 +1205,12 @@ inc_counter(Key, Inc) -> _ = emqx_pd:inc_counter(Key, Inc), ok. +enrich_state(#{parse_state := NParseState}, State) -> + Serialize = emqx_frame:serialize_opts(NParseState), + State#state{parse_state = NParseState, serialize = Serialize}; +enrich_state(_, State) -> + State. + set_tcp_keepalive({quic, _Listener}) -> ok; set_tcp_keepalive({Type, Id}) -> diff --git a/apps/emqx/src/emqx_frame.erl b/apps/emqx/src/emqx_frame.erl index 398a5f35c..554847d67 100644 --- a/apps/emqx/src/emqx_frame.erl +++ b/apps/emqx/src/emqx_frame.erl @@ -266,20 +266,33 @@ packet(Header, Variable) -> packet(Header, Variable, Payload) -> #mqtt_packet{header = Header, variable = Variable, payload = Payload}. -parse_connect(FrameBin, StrictMode) -> +parse_connect(FrameBin, Options = #{strict_mode := StrictMode}) -> {ProtoName, Rest0} = parse_utf8_string_with_cause(FrameBin, StrictMode, invalid_proto_name), %% No need to parse and check proto_ver if proto_name is invalid, check it first %% And the matching check of `proto_name` and `proto_ver` fields will be done in `emqx_packet:check_proto_ver/2` _ = validate_proto_name(ProtoName), {IsBridge, ProtoVer, Rest2} = parse_connect_proto_ver(Rest0), - Meta = #{proto_name => ProtoName, proto_ver => ProtoVer}, + NOptions = Options#{version => ProtoVer}, try do_parse_connect(ProtoName, IsBridge, ProtoVer, Rest2, StrictMode) catch throw:{?FRAME_PARSE_ERROR, ReasonM} when is_map(ReasonM) -> - ?PARSE_ERR(maps:merge(ReasonM, Meta)); + ?PARSE_ERR( + ReasonM#{ + proto_ver => ProtoVer, + proto_name => ProtoName, + parse_state => ?NONE(NOptions) + } + ); throw:{?FRAME_PARSE_ERROR, Reason} -> - ?PARSE_ERR(Meta#{cause => Reason}) + ?PARSE_ERR( + #{ + cause => Reason, + proto_ver => ProtoVer, + proto_name => ProtoName, + parse_state => ?NONE(NOptions) + } + ) end. do_parse_connect( @@ -358,9 +371,9 @@ do_parse_connect(_ProtoName, _IsBridge, _ProtoVer, Bin, _StrictMode) -> parse_packet( #mqtt_packet_header{type = ?CONNECT}, FrameBin, - #{strict_mode := StrictMode} + Options ) -> - parse_connect(FrameBin, StrictMode); + parse_connect(FrameBin, Options); parse_packet( #mqtt_packet_header{type = ?CONNACK}, <>, @@ -753,6 +766,8 @@ serialize_fun(#{version := Ver, max_size := MaxSize}) -> serialize_opts() -> ?DEFAULT_OPTIONS. +serialize_opts(?NONE(Options)) -> + maps:merge(?DEFAULT_OPTIONS, Options); serialize_opts(#mqtt_packet_connect{proto_ver = ProtoVer, properties = ConnProps}) -> MaxSize = get_property('Maximum-Packet-Size', ConnProps, ?MAX_PACKET_SIZE), #{version => ProtoVer, max_size => MaxSize}. diff --git a/apps/emqx/src/emqx_ws_connection.erl b/apps/emqx/src/emqx_ws_connection.erl index 5d04b3304..4765fdace 100644 --- a/apps/emqx/src/emqx_ws_connection.erl +++ b/apps/emqx/src/emqx_ws_connection.erl @@ -436,6 +436,7 @@ websocket_handle({Frame, _}, State) -> %% TODO: should not close the ws connection ?LOG(error, #{msg => "unexpected_frame", frame => Frame}), shutdown(unexpected_ws_frame, State). + websocket_info({call, From, Req}, State) -> handle_call(From, Req, State); websocket_info({cast, rate_limit}, State) -> @@ -725,7 +726,8 @@ parse_incoming(Data, Packets, State = #state{parse_state = ParseState}) -> input_bytes => Data }), FrameError = {frame_error, Reason}, - {[{incoming, FrameError} | Packets], State}; + NState = enrich_state(Reason, State), + {[{incoming, FrameError} | Packets], NState}; error:Reason:Stacktrace -> ?LOG(error, #{ at_state => emqx_frame:describe_state(ParseState), @@ -1059,6 +1061,13 @@ check_max_connection(Type, Listener) -> {denny, Reason} end end. + +enrich_state(#{parse_state := NParseState}, State) -> + Serialize = emqx_frame:serialize_opts(NParseState), + State#state{parse_state = NParseState, serialize = Serialize}; +enrich_state(_, State) -> + State. + %%-------------------------------------------------------------------- %% For CT tests %%-------------------------------------------------------------------- From 7a251c9ead89c04fda48f22d901f8b3350d50519 Mon Sep 17 00:00:00 2001 From: JimMoen Date: Mon, 15 Jul 2024 15:28:39 +0800 Subject: [PATCH 15/50] test: handle frame error for CONNECT packets --- apps/emqx/test/emqx_channel_SUITE.erl | 34 ++++++++++------- apps/emqx/test/emqx_frame_SUITE.erl | 54 ++++++++++++++++++++++----- 2 files changed, 65 insertions(+), 23 deletions(-) diff --git a/apps/emqx/test/emqx_channel_SUITE.erl b/apps/emqx/test/emqx_channel_SUITE.erl index d157cc914..83b862892 100644 --- a/apps/emqx/test/emqx_channel_SUITE.erl +++ b/apps/emqx/test/emqx_channel_SUITE.erl @@ -414,24 +414,32 @@ t_handle_in_auth(_) -> emqx_channel:handle_in(?AUTH_PACKET(), Channel). t_handle_in_frame_error(_) -> - IdleChannel = channel(#{conn_state => idle}), - {shutdown, #{shutdown_count := frame_too_large, cause := frame_too_large}, _Chan} = - emqx_channel:handle_in({frame_error, #{cause => frame_too_large}}, IdleChannel), + IdleChannelV5 = channel(#{conn_state => idle}), + %% no CONNACK packet for v4 + ?assertMatch( + {shutdown, #{shutdown_count := frame_too_large, cause := frame_too_large}, _Chan}, + emqx_channel:handle_in( + {frame_error, #{cause => frame_too_large}}, v4(IdleChannelV5) + ) + ), + ConnectingChan = channel(#{conn_state => connecting}), ConnackPacket = ?CONNACK_PACKET(?RC_PACKET_TOO_LARGE), - {shutdown, - #{ - shutdown_count := frame_too_large, - cause := frame_too_large, - limit := 100, - received := 101 - }, - ConnackPacket, - _} = + ?assertMatch( + {shutdown, + #{ + shutdown_count := frame_too_large, + cause := frame_too_large, + limit := 100, + received := 101 + }, + ConnackPacket, _}, emqx_channel:handle_in( {frame_error, #{cause => frame_too_large, received => 101, limit => 100}}, ConnectingChan - ), + ) + ), + DisconnectPacket = ?DISCONNECT_PACKET(?RC_PACKET_TOO_LARGE), ConnectedChan = channel(#{conn_state => connected}), ?assertMatch( diff --git a/apps/emqx/test/emqx_frame_SUITE.erl b/apps/emqx/test/emqx_frame_SUITE.erl index 9c8a99547..0c5a36231 100644 --- a/apps/emqx/test/emqx_frame_SUITE.erl +++ b/apps/emqx/test/emqx_frame_SUITE.erl @@ -63,6 +63,7 @@ groups() -> t_parse_malformed_properties, t_malformed_connect_header, t_malformed_connect_data, + t_malformed_connect_data_proto_ver, t_reserved_connect_flag, t_invalid_clientid, t_undefined_password, @@ -167,6 +168,8 @@ t_parse_malformed_utf8_string(_) -> ParseState = emqx_frame:initial_parse_state(#{strict_mode => true}), ?ASSERT_FRAME_THROW(utf8_string_invalid, emqx_frame:parse(MalformedPacket, ParseState)). +%% TODO: parse v3 with 0 length clientid + t_serialize_parse_v3_connect(_) -> Bin = <<16, 37, 0, 6, 77, 81, 73, 115, 100, 112, 3, 2, 0, 60, 0, 23, 109, 111, 115, 113, 112, 117, @@ -324,7 +327,7 @@ t_serialize_parse_bridge_connect(_) -> header = #mqtt_packet_header{type = ?CONNECT}, variable = #mqtt_packet_connect{ clientid = <<"C_00:0C:29:2B:77:52">>, - proto_ver = 16#03, + proto_ver = ?MQTT_PROTO_V3, proto_name = <<"MQIsdp">>, is_bridge = true, will_retain = true, @@ -686,15 +689,36 @@ t_malformed_connect_header(_) -> ). t_malformed_connect_data(_) -> + ProtoNameWithLen = <<0, 6, "MQIsdp">>, + ConnectFlags = <<2#00000000>>, + ClientIdwithLen = <<0, 1, "a">>, + UnexpectedRestBin = <<0, 1, 2>>, ?ASSERT_FRAME_THROW( - #{cause := malformed_connect, unexpected_trailing_bytes := _}, - emqx_frame:parse(<<16, 15, 0, 6, 77, 81, 73, 115, 100, 112, 3, 0, 0, 0, 0, 0, 0>>) + #{cause := malformed_connect, unexpected_trailing_bytes := 3}, + emqx_frame:parse( + <<16, 18, ProtoNameWithLen/binary, ?MQTT_PROTO_V3, ConnectFlags/binary, 0, 0, + ClientIdwithLen/binary, UnexpectedRestBin/binary>> + ) + ). + +t_malformed_connect_data_proto_ver(_) -> + Proto3NameWithLen = <<0, 6, "MQIsdp">>, + ?ASSERT_FRAME_THROW( + #{cause := malformed_connect, header_bytes := <<>>}, + emqx_frame:parse(<<16, 8, Proto3NameWithLen/binary>>) + ), + ProtoNameWithLen = <<0, 4, "MQTT">>, + ?ASSERT_FRAME_THROW( + #{cause := malformed_connect, header_bytes := <<>>}, + emqx_frame:parse(<<16, 6, ProtoNameWithLen/binary>>) ). t_reserved_connect_flag(_) -> ?assertException( throw, - {frame_parse_error, reserved_connect_flag}, + {frame_parse_error, #{ + cause := reserved_connect_flag, proto_ver := ?MQTT_PROTO_V3, proto_name := <<"MQIsdp">> + }}, emqx_frame:parse(<<16, 15, 0, 6, 77, 81, 73, 115, 100, 112, 3, 1, 0, 0, 1, 0, 0>>) ). @@ -726,7 +750,7 @@ t_undefined_password(_) -> }, variable = #mqtt_packet_connect{ proto_name = <<"MQTT">>, - proto_ver = 4, + proto_ver = ?MQTT_PROTO_V4, is_bridge = false, clean_start = true, will_flag = false, @@ -774,7 +798,9 @@ t_invalid_will_retain(_) -> 54, 75, 78, 112, 57, 0, 6, 68, 103, 55, 87, 87, 87>>, ?assertException( throw, - {frame_parse_error, invalid_will_retain}, + {frame_parse_error, #{ + cause := invalid_will_retain, proto_ver := ?MQTT_PROTO_V5, proto_name := <<"MQTT">> + }}, emqx_frame:parse(ConnectBin) ), ok. @@ -796,22 +822,30 @@ t_invalid_will_qos(_) -> ), ?assertException( throw, - {frame_parse_error, invalid_will_qos}, + {frame_parse_error, #{ + cause := invalid_will_qos, proto_ver := ?MQTT_PROTO_V5, proto_name := <<"MQTT">> + }}, emqx_frame:parse(ConnectBinFun(Will_F_WillQoS1)) ), ?assertException( throw, - {frame_parse_error, invalid_will_qos}, + {frame_parse_error, #{ + cause := invalid_will_qos, proto_ver := ?MQTT_PROTO_V5, proto_name := <<"MQTT">> + }}, emqx_frame:parse(ConnectBinFun(Will_F_WillQoS2)) ), ?assertException( throw, - {frame_parse_error, invalid_will_qos}, + {frame_parse_error, #{ + cause := invalid_will_qos, proto_ver := ?MQTT_PROTO_V5, proto_name := <<"MQTT">> + }}, emqx_frame:parse(ConnectBinFun(Will_F_WillQoS3)) ), ?assertException( throw, - {frame_parse_error, invalid_will_qos}, + {frame_parse_error, #{ + cause := invalid_will_qos, proto_ver := ?MQTT_PROTO_V5, proto_name := <<"MQTT">> + }}, emqx_frame:parse(ConnectBinFun(Will_T_WillQoS3)) ), ok. From 15b3f4deb04104704faf26e2f9a43a4ba64e8b75 Mon Sep 17 00:00:00 2001 From: JimMoen Date: Thu, 1 Aug 2024 15:00:24 +0800 Subject: [PATCH 16/50] fix: rm unused func and exports --- apps/emqx/src/emqx_channel.erl | 4 ---- 1 file changed, 4 deletions(-) diff --git a/apps/emqx/src/emqx_channel.erl b/apps/emqx/src/emqx_channel.erl index 07aad9b24..6c9c8f8a8 100644 --- a/apps/emqx/src/emqx_channel.erl +++ b/apps/emqx/src/emqx_channel.erl @@ -37,7 +37,6 @@ get_mqtt_conf/2, get_mqtt_conf/3, set_conn_state/2, - set_conninfo_proto_ver/2, stats/1, caps/1 ]). @@ -220,9 +219,6 @@ info(impl, #channel{session = Session}) -> set_conn_state(ConnState, Channel) -> Channel#channel{conn_state = ConnState}. -set_conninfo_proto_ver({none, #{version := ProtoVer}}, Channel = #channel{conninfo = ConnInfo}) -> - Channel#channel{conninfo = ConnInfo#{proto_ver => ProtoVer}}. - -spec stats(channel()) -> emqx_types:stats(). stats(#channel{session = undefined}) -> emqx_pd:get_counters(?CHANNEL_METRICS); From 4915cc0da6d89823972245fbb65dc47f7270b256 Mon Sep 17 00:00:00 2001 From: JimMoen Date: Thu, 1 Aug 2024 15:23:35 +0800 Subject: [PATCH 17/50] chore: add changelog entry for 13357 --- changes/ce/fix-13357.en.md | 4 ++++ 1 file changed, 4 insertions(+) create mode 100644 changes/ce/fix-13357.en.md diff --git a/changes/ce/fix-13357.en.md b/changes/ce/fix-13357.en.md new file mode 100644 index 000000000..ea497a847 --- /dev/null +++ b/changes/ce/fix-13357.en.md @@ -0,0 +1,4 @@ +Stop returning `CONNACK` or `DISCONNECT` to clients that sent malformed CONNECT packets. + +- Only send `CONNACK` with reason code `frame_too_large` for MQTT-v5.0 when connecting if the protocol version field in CONNECT can be detected. +- Otherwise **DONOT** send any CONNACK or DISCONNECT packet. From 11aaa7b07d8b7e69bfb6f5c361d6d950caec20b2 Mon Sep 17 00:00:00 2001 From: Kjell Winblad Date: Fri, 5 Jul 2024 17:55:48 +0200 Subject: [PATCH 18/50] fix: make MQTT connector error log messages easier to understand Fixes: https://emqx.atlassian.net/browse/EMQX-12555 https://emqx.atlassian.net/browse/EMQX-12657 --- .../src/emqx_bridge_mqtt_connector.erl | 43 +++++++++++-- .../test/emqx_bridge_mqtt_SUITE.erl | 62 +++++++++++++++++++ 2 files changed, 100 insertions(+), 5 deletions(-) diff --git a/apps/emqx_bridge_mqtt/src/emqx_bridge_mqtt_connector.erl b/apps/emqx_bridge_mqtt/src/emqx_bridge_mqtt_connector.erl index 118542356..358856c35 100644 --- a/apps/emqx_bridge_mqtt/src/emqx_bridge_mqtt_connector.erl +++ b/apps/emqx_bridge_mqtt/src/emqx_bridge_mqtt_connector.erl @@ -514,15 +514,48 @@ connect(Pid, Name) -> {ok, Pid}; {error, Reason} = Error -> IsDryRun = emqx_resource:is_dry_run(Name), - ?SLOG(?LOG_LEVEL(IsDryRun), #{ - msg => "ingress_client_connect_failed", - reason => Reason, - resource_id => Name - }), + log_connect_error_reason(?LOG_LEVEL(IsDryRun), Reason, Name), _ = catch emqtt:stop(Pid), Error end. +log_connect_error_reason(Level, {tcp_closed, _} = Reason, Name) -> + ?tp(emqx_bridge_mqtt_connector_tcp_closed, #{}), + ?SLOG(Level, #{ + msg => "ingress_client_connect_failed", + reason => Reason, + name => Name, + explain => + << + "Your MQTT connection attempt was unsuccessful. " + "It might be at its maximum capacity for handling new connections. " + "To diagnose the issue further, you can check the server logs for " + "any specific messages related to the unavailability or connection limits." + >> + }); +log_connect_error_reason(Level, econnrefused = Reason, Name) -> + ?tp(emqx_bridge_mqtt_connector_econnrefused_error, #{}), + ?SLOG(Level, #{ + msg => "ingress_client_connect_failed", + reason => Reason, + name => Name, + explain => + << + "This error indicates that your connection attempt to the MQTT server was rejected. " + "In simpler terms, the server you tried to connect to refused your request. " + "There can be multiple reasons for this. " + "For example, the MQTT server you're trying to connect to might be down or not " + "running at all or you might have provided the wrong address " + "or port number for the server." + >> + }); +log_connect_error_reason(Level, Reason, Name) -> + ?SLOG(Level, #{ + msg => "ingress_client_connect_failed", + reason => Reason, + name => Name + }). + handle_disconnect(_Reason) -> ok. diff --git a/apps/emqx_bridge_mqtt/test/emqx_bridge_mqtt_SUITE.erl b/apps/emqx_bridge_mqtt/test/emqx_bridge_mqtt_SUITE.erl index a220eb9f7..574851f70 100644 --- a/apps/emqx_bridge_mqtt/test/emqx_bridge_mqtt_SUITE.erl +++ b/apps/emqx_bridge_mqtt/test/emqx_bridge_mqtt_SUITE.erl @@ -27,6 +27,8 @@ -include_lib("common_test/include/ct.hrl"). -include_lib("snabbkaffe/include/snabbkaffe.hrl"). +-import(emqx_common_test_helpers, [on_exit/1]). + %% output functions -export([inspect/3]). @@ -399,6 +401,56 @@ t_mqtt_conn_bridge_ingress_shared_subscription(_) -> {ok, 204, <<>>} = request(delete, uri(["bridges", BridgeID]), []), ok. +t_connect_with_more_clients_than_the_broker_accepts(_) -> + PoolSize = 100, + OrgConf = emqx_mgmt_listeners_conf:get_raw(tcp, default), + on_exit(fun() -> + emqx_mgmt_listeners_conf:update(tcp, default, OrgConf) + end), + NewConf = OrgConf#{<<"max_connections">> => 3}, + {ok, _} = emqx_mgmt_listeners_conf:update(tcp, default, NewConf), + BridgeName = atom_to_binary(?FUNCTION_NAME), + BridgeID = create_bridge( + ?SERVER_CONF#{ + <<"name">> => BridgeName, + <<"ingress">> => #{ + <<"pool_size">> => PoolSize, + <<"remote">> => #{ + <<"topic">> => <<"$share/ingress/", ?INGRESS_REMOTE_TOPIC, "/#">>, + <<"qos">> => 1 + }, + <<"local">> => #{ + <<"topic">> => <>, + <<"qos">> => <<"${qos}">>, + <<"payload">> => <<"${clientid}">>, + <<"retain">> => <<"${retain}">> + } + } + } + ), + snabbkaffe:block_until( + fun + (#{msg := emqx_bridge_mqtt_connector_tcp_closed}) -> + true; + (_) -> + false + end, + 5000 + ), + Trace = snabbkaffe:collect_trace(), + ?assert( + lists:any( + fun(K) -> + maps:get(msg, K, not_found) =:= + emqx_bridge_mqtt_connector_tcp_closed + end, + Trace + ) + ), + + {ok, 204, <<>>} = request(delete, uri(["bridges", BridgeID]), []), + ok. + t_mqtt_egress_bridge_warns_clean_start(_) -> BridgeName = atom_to_binary(?FUNCTION_NAME), Action = fun() -> @@ -1050,6 +1102,16 @@ t_mqtt_conn_bridge_egress_async_reconnect(_) -> Payload <- [integer_to_binary(I)] ], + Trace = snabbkaffe:collect_trace(50), + ?assert( + lists:any( + fun(K) -> + maps:get(msg, K, not_found) =:= + emqx_bridge_mqtt_connector_econnrefused_error + end, + Trace + ) + ), ok. start_publisher(Topic, Interval, CtrlPid) -> From ba2d4f3df3f723be4c42992f6321a72183be30f1 Mon Sep 17 00:00:00 2001 From: Kjell Winblad Date: Fri, 5 Jul 2024 18:00:33 +0200 Subject: [PATCH 19/50] docs: add change log entry --- changes/ce/fix-13425.en.md | 1 + 1 file changed, 1 insertion(+) create mode 100644 changes/ce/fix-13425.en.md diff --git a/changes/ce/fix-13425.en.md b/changes/ce/fix-13425.en.md new file mode 100644 index 000000000..e02e99c0a --- /dev/null +++ b/changes/ce/fix-13425.en.md @@ -0,0 +1 @@ +The MQTT connector error log messages have been improved to provide clearer and more detailed information. From baf2b96cbc12adbf9a66fba7a9b05868e1a8f08f Mon Sep 17 00:00:00 2001 From: Thales Macedo Garitezi Date: Thu, 1 Aug 2024 14:27:25 -0300 Subject: [PATCH 20/50] test: refactor test structure --- .../test/emqx_bridge_mqtt_SUITE.erl | 62 ++++++++----------- 1 file changed, 27 insertions(+), 35 deletions(-) diff --git a/apps/emqx_bridge_mqtt/test/emqx_bridge_mqtt_SUITE.erl b/apps/emqx_bridge_mqtt/test/emqx_bridge_mqtt_SUITE.erl index 574851f70..438b1f601 100644 --- a/apps/emqx_bridge_mqtt/test/emqx_bridge_mqtt_SUITE.erl +++ b/apps/emqx_bridge_mqtt/test/emqx_bridge_mqtt_SUITE.erl @@ -410,45 +410,37 @@ t_connect_with_more_clients_than_the_broker_accepts(_) -> NewConf = OrgConf#{<<"max_connections">> => 3}, {ok, _} = emqx_mgmt_listeners_conf:update(tcp, default, NewConf), BridgeName = atom_to_binary(?FUNCTION_NAME), - BridgeID = create_bridge( - ?SERVER_CONF#{ - <<"name">> => BridgeName, - <<"ingress">> => #{ - <<"pool_size">> => PoolSize, - <<"remote">> => #{ - <<"topic">> => <<"$share/ingress/", ?INGRESS_REMOTE_TOPIC, "/#">>, - <<"qos">> => 1 - }, - <<"local">> => #{ - <<"topic">> => <>, - <<"qos">> => <<"${qos}">>, - <<"payload">> => <<"${clientid}">>, - <<"retain">> => <<"${retain}">> + ?check_trace( + #{timetrap => 10_000}, + begin + BridgeID = create_bridge( + ?SERVER_CONF#{ + <<"name">> => BridgeName, + <<"ingress">> => #{ + <<"pool_size">> => PoolSize, + <<"remote">> => #{ + <<"topic">> => <<"$share/ingress/", ?INGRESS_REMOTE_TOPIC, "/#">>, + <<"qos">> => 1 + }, + <<"local">> => #{ + <<"topic">> => <>, + <<"qos">> => <<"${qos}">>, + <<"payload">> => <<"${clientid}">>, + <<"retain">> => <<"${retain}">> + } + } } - } - } - ), - snabbkaffe:block_until( - fun - (#{msg := emqx_bridge_mqtt_connector_tcp_closed}) -> - true; - (_) -> - false + ), + ?block_until(#{?snk_kind := emqx_bridge_mqtt_connector_tcp_closed}), + {ok, 204, <<>>} = request(delete, uri(["bridges", BridgeID]), []), + ok end, - 5000 - ), - Trace = snabbkaffe:collect_trace(), - ?assert( - lists:any( - fun(K) -> - maps:get(msg, K, not_found) =:= - emqx_bridge_mqtt_connector_tcp_closed - end, - Trace - ) + fun(Trace) -> + ?assertMatch([_ | _], ?of_kind(emqx_bridge_mqtt_connector_tcp_closed, Trace)), + ok + end ), - {ok, 204, <<>>} = request(delete, uri(["bridges", BridgeID]), []), ok. t_mqtt_egress_bridge_warns_clean_start(_) -> From 44e7f2e9b2555a61716daac5996ff98916814630 Mon Sep 17 00:00:00 2001 From: Thales Macedo Garitezi Date: Thu, 1 Aug 2024 14:49:43 -0300 Subject: [PATCH 21/50] refactor: use macros for status to avoid typos --- apps/emqx_bridge_mqtt/src/emqx_bridge_mqtt_connector.erl | 8 ++++---- apps/emqx_bridge_mqtt/src/emqx_bridge_mqtt_ingress.erl | 7 ++++--- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/apps/emqx_bridge_mqtt/src/emqx_bridge_mqtt_connector.erl b/apps/emqx_bridge_mqtt/src/emqx_bridge_mqtt_connector.erl index 358856c35..eb8b4e1ad 100644 --- a/apps/emqx_bridge_mqtt/src/emqx_bridge_mqtt_connector.erl +++ b/apps/emqx_bridge_mqtt/src/emqx_bridge_mqtt_connector.erl @@ -200,7 +200,7 @@ on_get_channel_status( } = _State ) when is_map_key(ChannelId, Channels) -> %% The channel should be ok as long as the MQTT client is ok - connected. + ?status_connected. on_get_channels(ResId) -> emqx_bridge_v2:get_channels_for_connector(ResId). @@ -359,7 +359,7 @@ on_get_status(_ResourceId, State) -> combine_status(Statuses) catch exit:timeout -> - connecting + ?status_connecting end. get_status({_Pool, Worker}) -> @@ -367,7 +367,7 @@ get_status({_Pool, Worker}) -> {ok, Client} -> emqx_bridge_mqtt_ingress:status(Client); {error, _} -> - disconnected + ?status_disconnected end. combine_status(Statuses) -> @@ -379,7 +379,7 @@ combine_status(Statuses) -> [Status | _] -> Status; [] -> - disconnected + ?status_disconnected end. mk_ingress_config( diff --git a/apps/emqx_bridge_mqtt/src/emqx_bridge_mqtt_ingress.erl b/apps/emqx_bridge_mqtt/src/emqx_bridge_mqtt_ingress.erl index 1749d4194..35aea67a6 100644 --- a/apps/emqx_bridge_mqtt/src/emqx_bridge_mqtt_ingress.erl +++ b/apps/emqx_bridge_mqtt/src/emqx_bridge_mqtt_ingress.erl @@ -19,6 +19,7 @@ -include_lib("emqx/include/logger.hrl"). -include_lib("emqx/include/emqx_mqtt.hrl"). -include_lib("snabbkaffe/include/snabbkaffe.hrl"). +-include_lib("emqx_resource/include/emqx_resource.hrl"). %% management APIs -export([ @@ -234,13 +235,13 @@ status(Pid) -> try case proplists:get_value(socket, info(Pid)) of Socket when Socket /= undefined -> - connected; + ?status_connected; undefined -> - connecting + ?status_connecting end catch exit:{noproc, _} -> - disconnected + ?status_disconnected end. %% From 52b2d73b2802176c035d0ca1f0a777cb980fd694 Mon Sep 17 00:00:00 2001 From: Thales Macedo Garitezi Date: Thu, 1 Aug 2024 15:13:25 -0300 Subject: [PATCH 22/50] test: move new test to newer module and use current apis --- .../test/emqx_bridge_mqtt_SUITE.erl | 42 ------------------- .../emqx_bridge_mqtt_v2_subscriber_SUITE.erl | 25 +++++++++++ 2 files changed, 25 insertions(+), 42 deletions(-) diff --git a/apps/emqx_bridge_mqtt/test/emqx_bridge_mqtt_SUITE.erl b/apps/emqx_bridge_mqtt/test/emqx_bridge_mqtt_SUITE.erl index 438b1f601..e1af22c3d 100644 --- a/apps/emqx_bridge_mqtt/test/emqx_bridge_mqtt_SUITE.erl +++ b/apps/emqx_bridge_mqtt/test/emqx_bridge_mqtt_SUITE.erl @@ -401,48 +401,6 @@ t_mqtt_conn_bridge_ingress_shared_subscription(_) -> {ok, 204, <<>>} = request(delete, uri(["bridges", BridgeID]), []), ok. -t_connect_with_more_clients_than_the_broker_accepts(_) -> - PoolSize = 100, - OrgConf = emqx_mgmt_listeners_conf:get_raw(tcp, default), - on_exit(fun() -> - emqx_mgmt_listeners_conf:update(tcp, default, OrgConf) - end), - NewConf = OrgConf#{<<"max_connections">> => 3}, - {ok, _} = emqx_mgmt_listeners_conf:update(tcp, default, NewConf), - BridgeName = atom_to_binary(?FUNCTION_NAME), - ?check_trace( - #{timetrap => 10_000}, - begin - BridgeID = create_bridge( - ?SERVER_CONF#{ - <<"name">> => BridgeName, - <<"ingress">> => #{ - <<"pool_size">> => PoolSize, - <<"remote">> => #{ - <<"topic">> => <<"$share/ingress/", ?INGRESS_REMOTE_TOPIC, "/#">>, - <<"qos">> => 1 - }, - <<"local">> => #{ - <<"topic">> => <>, - <<"qos">> => <<"${qos}">>, - <<"payload">> => <<"${clientid}">>, - <<"retain">> => <<"${retain}">> - } - } - } - ), - ?block_until(#{?snk_kind := emqx_bridge_mqtt_connector_tcp_closed}), - {ok, 204, <<>>} = request(delete, uri(["bridges", BridgeID]), []), - ok - end, - fun(Trace) -> - ?assertMatch([_ | _], ?of_kind(emqx_bridge_mqtt_connector_tcp_closed, Trace)), - ok - end - ), - - ok. - t_mqtt_egress_bridge_warns_clean_start(_) -> BridgeName = atom_to_binary(?FUNCTION_NAME), Action = fun() -> diff --git a/apps/emqx_bridge_mqtt/test/emqx_bridge_mqtt_v2_subscriber_SUITE.erl b/apps/emqx_bridge_mqtt/test/emqx_bridge_mqtt_v2_subscriber_SUITE.erl index b9097b9c3..9030d2ac7 100644 --- a/apps/emqx_bridge_mqtt/test/emqx_bridge_mqtt_v2_subscriber_SUITE.erl +++ b/apps/emqx_bridge_mqtt/test/emqx_bridge_mqtt_v2_subscriber_SUITE.erl @@ -246,3 +246,28 @@ t_receive_via_rule(Config) -> end ), ok. + +t_connect_with_more_clients_than_the_broker_accepts(Config0) -> + OrgConf = emqx_mgmt_listeners_conf:get_raw(tcp, default), + on_exit(fun() -> + emqx_mgmt_listeners_conf:update(tcp, default, OrgConf) + end), + NewConf = OrgConf#{<<"max_connections">> => 3}, + {ok, _} = emqx_mgmt_listeners_conf:update(tcp, default, NewConf), + ConnectorConfig0 = ?config(connector_config, Config0), + ConnectorConfig = ConnectorConfig0#{<<"pool_size">> := 100}, + Config = emqx_utils:merge_opts(Config0, [{connector_config, ConnectorConfig}]), + ?check_trace( + #{timetrap => 10_000}, + begin + {ok, _} = emqx_bridge_v2_testlib:create_bridge_api(Config), + ?block_until(#{?snk_kind := emqx_bridge_mqtt_connector_tcp_closed}), + ok + end, + fun(Trace) -> + ?assertMatch([_ | _], ?of_kind(emqx_bridge_mqtt_connector_tcp_closed, Trace)), + ok + end + ), + + ok. From 3162fe7a27931d6d5094786d48aea846f0e3d69a Mon Sep 17 00:00:00 2001 From: Thales Macedo Garitezi Date: Thu, 1 Aug 2024 15:23:14 -0300 Subject: [PATCH 23/50] feat: prettify some error explanations --- .../src/emqx_bridge_mqtt_connector.erl | 62 +++++++++++++------ .../emqx_bridge_mqtt_v2_subscriber_SUITE.erl | 31 ++++++++-- 2 files changed, 69 insertions(+), 24 deletions(-) diff --git a/apps/emqx_bridge_mqtt/src/emqx_bridge_mqtt_connector.erl b/apps/emqx_bridge_mqtt/src/emqx_bridge_mqtt_connector.erl index eb8b4e1ad..02cf6595f 100644 --- a/apps/emqx_bridge_mqtt/src/emqx_bridge_mqtt_connector.erl +++ b/apps/emqx_bridge_mqtt/src/emqx_bridge_mqtt_connector.erl @@ -98,7 +98,7 @@ on_start(ResourceId, #{server := Server} = Conf) -> server => Server }}; {error, Reason} -> - {error, Reason} + {error, emqx_maybe:define(explain_error(Reason), Reason)} end. on_add_channel( @@ -356,7 +356,12 @@ on_get_status(_ResourceId, State) -> Workers = [{Pool, Worker} || {Pool, PN} <- Pools, {_Name, Worker} <- ecpool:workers(PN)], try emqx_utils:pmap(fun get_status/1, Workers, ?HEALTH_CHECK_TIMEOUT) of Statuses -> - combine_status(Statuses) + case combine_status(Statuses) of + {Status, Msg} -> + {Status, State, Msg}; + Status -> + Status + end catch exit:timeout -> ?status_connecting @@ -375,7 +380,21 @@ combine_status(Statuses) -> %% Natural order of statuses: [connected, connecting, disconnected] %% * `disconnected` wins over any other status %% * `connecting` wins over `connected` - case lists:reverse(lists:usort(Statuses)) of + ToStatus = fun + ({S, _Reason}) -> S; + (S) when is_atom(S) -> S + end, + CompareFn = fun(S1A, S2A) -> + S1 = ToStatus(S1A), + S2 = ToStatus(S2A), + S1 > S2 + end, + case lists:usort(CompareFn, Statuses) of + [{Status, Reason} | _] -> + case explain_error(Reason) of + undefined -> Status; + Msg -> {Status, Msg} + end; [Status | _] -> Status; [] -> @@ -525,13 +544,7 @@ log_connect_error_reason(Level, {tcp_closed, _} = Reason, Name) -> msg => "ingress_client_connect_failed", reason => Reason, name => Name, - explain => - << - "Your MQTT connection attempt was unsuccessful. " - "It might be at its maximum capacity for handling new connections. " - "To diagnose the issue further, you can check the server logs for " - "any specific messages related to the unavailability or connection limits." - >> + explain => explain_error(Reason) }); log_connect_error_reason(Level, econnrefused = Reason, Name) -> ?tp(emqx_bridge_mqtt_connector_econnrefused_error, #{}), @@ -539,15 +552,7 @@ log_connect_error_reason(Level, econnrefused = Reason, Name) -> msg => "ingress_client_connect_failed", reason => Reason, name => Name, - explain => - << - "This error indicates that your connection attempt to the MQTT server was rejected. " - "In simpler terms, the server you tried to connect to refused your request. " - "There can be multiple reasons for this. " - "For example, the MQTT server you're trying to connect to might be down or not " - "running at all or you might have provided the wrong address " - "or port number for the server." - >> + explain => explain_error(Reason) }); log_connect_error_reason(Level, Reason, Name) -> ?SLOG(Level, #{ @@ -556,6 +561,25 @@ log_connect_error_reason(Level, Reason, Name) -> name => Name }). +explain_error(econnrefused) -> + << + "This error indicates that your connection attempt to the MQTT server was rejected. " + "In simpler terms, the server you tried to connect to refused your request. " + "There can be multiple reasons for this. " + "For example, the MQTT server you're trying to connect to might be down or not " + "running at all or you might have provided the wrong address " + "or port number for the server." + >>; +explain_error({tcp_closed, _}) -> + << + "Your MQTT connection attempt was unsuccessful. " + "It might be at its maximum capacity for handling new connections. " + "To diagnose the issue further, you can check the server logs for " + "any specific messages related to the unavailability or connection limits." + >>; +explain_error(_Reason) -> + undefined. + handle_disconnect(_Reason) -> ok. diff --git a/apps/emqx_bridge_mqtt/test/emqx_bridge_mqtt_v2_subscriber_SUITE.erl b/apps/emqx_bridge_mqtt/test/emqx_bridge_mqtt_v2_subscriber_SUITE.erl index 9030d2ac7..e0598fa1e 100644 --- a/apps/emqx_bridge_mqtt/test/emqx_bridge_mqtt_v2_subscriber_SUITE.erl +++ b/apps/emqx_bridge_mqtt/test/emqx_bridge_mqtt_v2_subscriber_SUITE.erl @@ -131,6 +131,9 @@ hookpoint(Config) -> BridgeId = bridge_id(Config), emqx_bridge_resource:bridge_hookpoint(BridgeId). +simplify_result(Res) -> + emqx_bridge_v2_testlib:simplify_result(Res). + %%------------------------------------------------------------------------------ %% Testcases %%------------------------------------------------------------------------------ @@ -247,21 +250,39 @@ t_receive_via_rule(Config) -> ), ok. -t_connect_with_more_clients_than_the_broker_accepts(Config0) -> +t_connect_with_more_clients_than_the_broker_accepts(Config) -> + Name = ?config(connector_name, Config), OrgConf = emqx_mgmt_listeners_conf:get_raw(tcp, default), on_exit(fun() -> emqx_mgmt_listeners_conf:update(tcp, default, OrgConf) end), NewConf = OrgConf#{<<"max_connections">> => 3}, {ok, _} = emqx_mgmt_listeners_conf:update(tcp, default, NewConf), - ConnectorConfig0 = ?config(connector_config, Config0), - ConnectorConfig = ConnectorConfig0#{<<"pool_size">> := 100}, - Config = emqx_utils:merge_opts(Config0, [{connector_config, ConnectorConfig}]), ?check_trace( #{timetrap => 10_000}, begin - {ok, _} = emqx_bridge_v2_testlib:create_bridge_api(Config), + ?assertMatch( + {201, #{ + <<"status">> := <<"disconnected">>, + <<"status_reason">> := + <<"Your MQTT connection attempt was unsuccessful", _/binary>> + }}, + simplify_result( + emqx_bridge_v2_testlib:create_connector_api( + Config, + #{<<"pool_size">> => 100} + ) + ) + ), ?block_until(#{?snk_kind := emqx_bridge_mqtt_connector_tcp_closed}), + ?assertMatch( + {200, #{ + <<"status">> := <<"disconnected">>, + <<"status_reason">> := + <<"Your MQTT connection attempt was unsuccessful", _/binary>> + }}, + simplify_result(emqx_bridge_v2_testlib:get_connector_api(mqtt, Name)) + ), ok end, fun(Trace) -> From bba9d085d6a0e03e90fca5ca7e87e964df269a87 Mon Sep 17 00:00:00 2001 From: Thales Macedo Garitezi Date: Thu, 1 Aug 2024 15:34:58 -0300 Subject: [PATCH 24/50] test: refactor test structure --- .../test/emqx_bridge_api_SUITE.erl | 2 +- .../src/emqx_bridge_mqtt_connector.erl | 1 + .../test/emqx_bridge_mqtt_SUITE.erl | 62 +++++++++---------- 3 files changed, 31 insertions(+), 34 deletions(-) diff --git a/apps/emqx_bridge/test/emqx_bridge_api_SUITE.erl b/apps/emqx_bridge/test/emqx_bridge_api_SUITE.erl index 6b160f3b3..fdbbc5376 100644 --- a/apps/emqx_bridge/test/emqx_bridge_api_SUITE.erl +++ b/apps/emqx_bridge/test/emqx_bridge_api_SUITE.erl @@ -1154,7 +1154,7 @@ t_bridges_probe(Config) -> ?assertMatch( {ok, 400, #{ <<"code">> := <<"TEST_FAILED">>, - <<"message">> := <<"Connection refused">> + <<"message">> := <<"Connection refused", _/binary>> }}, request_json( post, diff --git a/apps/emqx_bridge_mqtt/src/emqx_bridge_mqtt_connector.erl b/apps/emqx_bridge_mqtt/src/emqx_bridge_mqtt_connector.erl index 02cf6595f..6b9a40123 100644 --- a/apps/emqx_bridge_mqtt/src/emqx_bridge_mqtt_connector.erl +++ b/apps/emqx_bridge_mqtt/src/emqx_bridge_mqtt_connector.erl @@ -563,6 +563,7 @@ log_connect_error_reason(Level, Reason, Name) -> explain_error(econnrefused) -> << + "Connection refused. " "This error indicates that your connection attempt to the MQTT server was rejected. " "In simpler terms, the server you tried to connect to refused your request. " "There can be multiple reasons for this. " diff --git a/apps/emqx_bridge_mqtt/test/emqx_bridge_mqtt_SUITE.erl b/apps/emqx_bridge_mqtt/test/emqx_bridge_mqtt_SUITE.erl index e1af22c3d..42cf9d2b8 100644 --- a/apps/emqx_bridge_mqtt/test/emqx_bridge_mqtt_SUITE.erl +++ b/apps/emqx_bridge_mqtt/test/emqx_bridge_mqtt_SUITE.erl @@ -27,8 +27,6 @@ -include_lib("common_test/include/ct.hrl"). -include_lib("snabbkaffe/include/snabbkaffe.hrl"). --import(emqx_common_test_helpers, [on_exit/1]). - %% output functions -export([inspect/3]). @@ -1027,40 +1025,38 @@ t_mqtt_conn_bridge_egress_async_reconnect(_) -> ct:sleep(1000), %% stop the listener 1883 to make the bridge disconnected - ok = emqx_listeners:stop_listener('tcp:default'), - ct:sleep(1500), - ?assertMatch( - #{<<"status">> := Status} when - Status == <<"connecting">> orelse Status == <<"disconnected">>, - request_bridge(BridgeIDEgress) - ), + ?check_trace( + begin + ok = emqx_listeners:stop_listener('tcp:default'), + ct:sleep(1500), + ?assertMatch( + #{<<"status">> := Status} when + Status == <<"connecting">> orelse Status == <<"disconnected">>, + request_bridge(BridgeIDEgress) + ), - %% start the listener 1883 to make the bridge reconnected - ok = emqx_listeners:start_listener('tcp:default'), - timer:sleep(1500), - ?assertMatch( - #{<<"status">> := <<"connected">>}, - request_bridge(BridgeIDEgress) - ), + %% start the listener 1883 to make the bridge reconnected + ok = emqx_listeners:start_listener('tcp:default'), + timer:sleep(1500), + ?assertMatch( + #{<<"status">> := <<"connected">>}, + request_bridge(BridgeIDEgress) + ), - N = stop_publisher(Publisher), + N = stop_publisher(Publisher), - %% all those messages should eventually be delivered - [ - assert_mqtt_msg_received(RemoteTopic, Payload) - || I <- lists:seq(1, N), - Payload <- [integer_to_binary(I)] - ], - - Trace = snabbkaffe:collect_trace(50), - ?assert( - lists:any( - fun(K) -> - maps:get(msg, K, not_found) =:= - emqx_bridge_mqtt_connector_econnrefused_error - end, - Trace - ) + %% all those messages should eventually be delivered + [ + assert_mqtt_msg_received(RemoteTopic, Payload) + || I <- lists:seq(1, N), + Payload <- [integer_to_binary(I)] + ], + ok + end, + fun(Trace) -> + ?assertMatch([_ | _], ?of_kind(emqx_bridge_mqtt_connector_econnrefused_error, Trace)), + ok + end ), ok. From 51530588efea05042d7ce5fd72d81fa4a3114da7 Mon Sep 17 00:00:00 2001 From: zmstone Date: Thu, 1 Aug 2024 22:41:42 +0200 Subject: [PATCH 25/50] ci: fix a typo in commented out docker-compose yaml file --- .ci/docker-compose-file/docker-compose-ldap.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.ci/docker-compose-file/docker-compose-ldap.yaml b/.ci/docker-compose-file/docker-compose-ldap.yaml index 6804f6b01..339db8d85 100644 --- a/.ci/docker-compose-file/docker-compose-ldap.yaml +++ b/.ci/docker-compose-file/docker-compose-ldap.yaml @@ -10,7 +10,7 @@ services: nofile: 1024 image: openldap #ports: - # - 389:389 + # - "389:389" volumes: - ./certs/ca.crt:/etc/certs/ca.crt restart: always From 6c2033ecbfceab3840343c1e17b6f81411856a8f Mon Sep 17 00:00:00 2001 From: zhongwencool Date: Fri, 2 Aug 2024 11:03:59 +0800 Subject: [PATCH 26/50] fix: deactivate alarm before create resource --- apps/emqx_connector/src/emqx_connector_resource.erl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/apps/emqx_connector/src/emqx_connector_resource.erl b/apps/emqx_connector/src/emqx_connector_resource.erl index 8cb61793d..cdbba4f95 100644 --- a/apps/emqx_connector/src/emqx_connector_resource.erl +++ b/apps/emqx_connector/src/emqx_connector_resource.erl @@ -125,6 +125,7 @@ create(Type, Name, Conf0, Opts) -> TypeBin = bin(Type), ResourceId = resource_id(Type, Name), Conf = Conf0#{connector_type => TypeBin, connector_name => Name}, + _ = emqx_alarm:ensure_deactivated(ResourceId), {ok, _Data} = emqx_resource:create_local( ResourceId, ?CONNECTOR_RESOURCE_GROUP, @@ -132,7 +133,6 @@ create(Type, Name, Conf0, Opts) -> parse_confs(TypeBin, Name, Conf), parse_opts(Conf, Opts) ), - _ = emqx_alarm:ensure_deactivated(ResourceId), ok. update(ConnectorId, {OldConf, Conf}) -> From 74c346f9d1c063e529bbcf419096610ccdbd18ef Mon Sep 17 00:00:00 2001 From: firest Date: Fri, 2 Aug 2024 12:00:36 +0800 Subject: [PATCH 27/50] fix(log): respect payload encoding settings when formatting packets --- apps/emqx/include/logger.hrl | 2 +- apps/emqx/src/emqx_logger_jsonfmt.erl | 2 +- apps/emqx/src/emqx_logger_textfmt.erl | 24 +++++-- apps/emqx/src/emqx_packet.erl | 5 -- .../src/emqx_trace/emqx_trace_formatter.erl | 5 -- apps/emqx/src/emqx_ws_connection.erl | 2 +- apps/emqx/test/emqx_packet_SUITE.erl | 72 ++++++++++++------- 7 files changed, 67 insertions(+), 45 deletions(-) diff --git a/apps/emqx/include/logger.hrl b/apps/emqx/include/logger.hrl index a7455418d..4cda2d8af 100644 --- a/apps/emqx/include/logger.hrl +++ b/apps/emqx/include/logger.hrl @@ -88,7 +88,7 @@ ?_DO_TRACE(Tag, Msg, Meta), ?SLOG( Level, - (emqx_trace_formatter:format_meta_map(Meta))#{msg => Msg, tag => Tag}, + (Meta)#{msg => Msg, tag => Tag}, #{is_trace => false} ) end). diff --git a/apps/emqx/src/emqx_logger_jsonfmt.erl b/apps/emqx/src/emqx_logger_jsonfmt.erl index 1c4682171..54c88018c 100644 --- a/apps/emqx/src/emqx_logger_jsonfmt.erl +++ b/apps/emqx/src/emqx_logger_jsonfmt.erl @@ -105,7 +105,7 @@ format(Msg, Meta, Config) -> maybe_format_msg(undefined, _Meta, _Config) -> #{}; maybe_format_msg({report, Report0} = Msg, #{report_cb := Cb} = Meta, Config) -> - Report = emqx_logger_textfmt:try_encode_payload(Report0, Config), + Report = emqx_logger_textfmt:try_encode_meta(Report0, Config), case is_map(Report) andalso Cb =:= ?DEFAULT_FORMATTER of true -> %% reporting a map without a customised format function diff --git a/apps/emqx/src/emqx_logger_textfmt.erl b/apps/emqx/src/emqx_logger_textfmt.erl index f1df7ab76..c1084142f 100644 --- a/apps/emqx/src/emqx_logger_textfmt.erl +++ b/apps/emqx/src/emqx_logger_textfmt.erl @@ -20,7 +20,7 @@ -export([format/2]). -export([check_config/1]). --export([try_format_unicode/1, try_encode_payload/2]). +-export([try_format_unicode/1, try_encode_meta/2]). %% Used in the other log formatters -export([evaluate_lazy_values_if_dbg_level/1, evaluate_lazy_values/1]). @@ -105,7 +105,7 @@ is_list_report_acceptable(_) -> enrich_report(ReportRaw0, Meta, Config) -> %% clientid and peername always in emqx_conn's process metadata. %% topic and username can be put in meta using ?SLOG/3, or put in msg's report by ?SLOG/2 - ReportRaw = try_encode_payload(ReportRaw0, Config), + ReportRaw = try_encode_meta(ReportRaw0, Config), Topic = case maps:get(topic, Meta, undefined) of undefined -> maps:get(topic, ReportRaw, undefined); @@ -172,9 +172,22 @@ enrich_topic({Fmt, Args}, #{topic := Topic}) when is_list(Fmt) -> enrich_topic(Msg, _) -> Msg. -try_encode_payload(#{payload := Payload} = Report, #{payload_encode := Encode}) -> +try_encode_meta(Report, Config) -> + lists:foldl( + fun(Meta, Acc) -> + try_encode_meta(Meta, Acc, Config) + end, + Report, + [payload, packet] + ). + +try_encode_meta(payload, #{payload := Payload} = Report, #{payload_encode := Encode}) -> Report#{payload := encode_payload(Payload, Encode)}; -try_encode_payload(Report, _Config) -> +try_encode_meta(packet, #{packet := Packet} = Report, #{payload_encode := Encode}) when + is_tuple(Packet) +-> + Report#{packet := emqx_packet:format(Packet, Encode)}; +try_encode_meta(_, Report, _Config) -> Report. encode_payload(Payload, text) -> @@ -182,4 +195,5 @@ encode_payload(Payload, text) -> encode_payload(_Payload, hidden) -> "******"; encode_payload(Payload, hex) -> - binary:encode_hex(Payload). + Bin = emqx_utils_conv:bin(Payload), + binary:encode_hex(Bin). diff --git a/apps/emqx/src/emqx_packet.erl b/apps/emqx/src/emqx_packet.erl index 9d6516126..6f5fa0972 100644 --- a/apps/emqx/src/emqx_packet.erl +++ b/apps/emqx/src/emqx_packet.erl @@ -51,7 +51,6 @@ ]). -export([ - format/1, format/2 ]). @@ -481,10 +480,6 @@ will_msg(#mqtt_packet_connect{ headers = #{username => Username, properties => Props} }. -%% @doc Format packet --spec format(emqx_types:packet()) -> iolist(). -format(Packet) -> format(Packet, emqx_trace_handler:payload_encode()). - %% @doc Format packet -spec format(emqx_types:packet(), hex | text | hidden) -> iolist(). format(#mqtt_packet{header = Header, variable = Variable, payload = Payload}, PayloadEncode) -> diff --git a/apps/emqx/src/emqx_trace/emqx_trace_formatter.erl b/apps/emqx/src/emqx_trace/emqx_trace_formatter.erl index e540ae82a..728280700 100644 --- a/apps/emqx/src/emqx_trace/emqx_trace_formatter.erl +++ b/apps/emqx/src/emqx_trace/emqx_trace_formatter.erl @@ -17,7 +17,6 @@ -include("emqx_mqtt.hrl"). -export([format/2]). --export([format_meta_map/1]). %% logger_formatter:config/0 is not exported. -type config() :: map(). @@ -43,10 +42,6 @@ format( format(Event, Config) -> emqx_logger_textfmt:format(Event, Config). -format_meta_map(Meta) -> - Encode = emqx_trace_handler:payload_encode(), - format_meta_map(Meta, Encode). - format_meta_map(Meta, Encode) -> format_meta_map(Meta, Encode, [ {packet, fun format_packet/2}, diff --git a/apps/emqx/src/emqx_ws_connection.erl b/apps/emqx/src/emqx_ws_connection.erl index 5d04b3304..db4151416 100644 --- a/apps/emqx/src/emqx_ws_connection.erl +++ b/apps/emqx/src/emqx_ws_connection.erl @@ -820,7 +820,7 @@ serialize_and_inc_stats_fun(#state{serialize = Serialize}) -> ?LOG(warning, #{ msg => "packet_discarded", reason => "frame_too_large", - packet => emqx_packet:format(Packet) + packet => Packet }), ok = emqx_metrics:inc('delivery.dropped.too_large'), ok = emqx_metrics:inc('delivery.dropped'), diff --git a/apps/emqx/test/emqx_packet_SUITE.erl b/apps/emqx/test/emqx_packet_SUITE.erl index 4ba6d1d82..04ad53a1e 100644 --- a/apps/emqx/test/emqx_packet_SUITE.erl +++ b/apps/emqx/test/emqx_packet_SUITE.erl @@ -377,42 +377,60 @@ t_will_msg(_) -> t_format(_) -> io:format("~ts", [ - emqx_packet:format(#mqtt_packet{ - header = #mqtt_packet_header{type = ?CONNACK, retain = true, dup = 0}, - variable = undefined - }) - ]), - io:format("~ts", [ - emqx_packet:format(#mqtt_packet{ - header = #mqtt_packet_header{type = ?CONNACK}, variable = 1, payload = <<"payload">> - }) + emqx_packet:format( + #mqtt_packet{ + header = #mqtt_packet_header{type = ?CONNACK, retain = true, dup = 0}, + variable = undefined + }, + text + ) ]), + io:format( + "~ts", + [ + emqx_packet:format( + #mqtt_packet{ + header = #mqtt_packet_header{type = ?CONNACK}, + variable = 1, + payload = <<"payload">> + }, + text + ) + ] + ), io:format("~ts", [ emqx_packet:format( - ?CONNECT_PACKET(#mqtt_packet_connect{ - will_flag = true, - will_retain = true, - will_qos = ?QOS_2, - will_topic = <<"topic">>, - will_payload = <<"payload">> - }) + ?CONNECT_PACKET( + #mqtt_packet_connect{ + will_flag = true, + will_retain = true, + will_qos = ?QOS_2, + will_topic = <<"topic">>, + will_payload = <<"payload">> + } + ), + text ) ]), io:format("~ts", [ - emqx_packet:format(?CONNECT_PACKET(#mqtt_packet_connect{password = password})) + emqx_packet:format(?CONNECT_PACKET(#mqtt_packet_connect{password = password}), text) ]), - io:format("~ts", [emqx_packet:format(?CONNACK_PACKET(?CONNACK_SERVER))]), - io:format("~ts", [emqx_packet:format(?PUBLISH_PACKET(?QOS_1, 1))]), - io:format("~ts", [emqx_packet:format(?PUBLISH_PACKET(?QOS_2, <<"topic">>, 10, <<"payload">>))]), - io:format("~ts", [emqx_packet:format(?PUBACK_PACKET(?PUBACK, 98))]), - io:format("~ts", [emqx_packet:format(?PUBREL_PACKET(99))]), + io:format("~ts", [emqx_packet:format(?CONNACK_PACKET(?CONNACK_SERVER), text)]), + io:format("~ts", [emqx_packet:format(?PUBLISH_PACKET(?QOS_1, 1), text)]), io:format("~ts", [ - emqx_packet:format(?SUBSCRIBE_PACKET(15, [{<<"topic">>, ?QOS_0}, {<<"topic1">>, ?QOS_1}])) + emqx_packet:format(?PUBLISH_PACKET(?QOS_2, <<"topic">>, 10, <<"payload">>), text) ]), - io:format("~ts", [emqx_packet:format(?SUBACK_PACKET(40, [?QOS_0, ?QOS_1]))]), - io:format("~ts", [emqx_packet:format(?UNSUBSCRIBE_PACKET(89, [<<"t">>, <<"t2">>]))]), - io:format("~ts", [emqx_packet:format(?UNSUBACK_PACKET(90))]), - io:format("~ts", [emqx_packet:format(?DISCONNECT_PACKET(128))]). + io:format("~ts", [emqx_packet:format(?PUBACK_PACKET(?PUBACK, 98), text)]), + io:format("~ts", [emqx_packet:format(?PUBREL_PACKET(99), text)]), + io:format("~ts", [ + emqx_packet:format( + ?SUBSCRIBE_PACKET(15, [{<<"topic">>, ?QOS_0}, {<<"topic1">>, ?QOS_1}]), text + ) + ]), + io:format("~ts", [emqx_packet:format(?SUBACK_PACKET(40, [?QOS_0, ?QOS_1]), text)]), + io:format("~ts", [emqx_packet:format(?UNSUBSCRIBE_PACKET(89, [<<"t">>, <<"t2">>]), text)]), + io:format("~ts", [emqx_packet:format(?UNSUBACK_PACKET(90), text)]), + io:format("~ts", [emqx_packet:format(?DISCONNECT_PACKET(128), text)]). t_parse_empty_publish(_) -> %% 52: 0011(type=PUBLISH) 0100 (QoS=2) From c9c4d1a196bba42dadb79165c015a8b2b205a9b1 Mon Sep 17 00:00:00 2001 From: firest Date: Mon, 5 Aug 2024 15:56:05 +0800 Subject: [PATCH 28/50] fix(api_key): do not crash boot when the bootstrap file is not exists --- apps/emqx_management/src/emqx_mgmt_app.erl | 10 +++------- apps/emqx_management/src/emqx_mgmt_auth.erl | 16 +++++++--------- .../test/emqx_mgmt_api_api_keys_SUITE.erl | 8 ++++---- 3 files changed, 14 insertions(+), 20 deletions(-) diff --git a/apps/emqx_management/src/emqx_mgmt_app.erl b/apps/emqx_management/src/emqx_mgmt_app.erl index f8f0b6f5b..e8febb7ec 100644 --- a/apps/emqx_management/src/emqx_mgmt_app.erl +++ b/apps/emqx_management/src/emqx_mgmt_app.erl @@ -29,13 +29,9 @@ start(_Type, _Args) -> ok = mria:wait_for_tables(emqx_mgmt_auth:create_tables()), - case emqx_mgmt_auth:init_bootstrap_file() of - ok -> - emqx_conf:add_handler([api_key], emqx_mgmt_auth), - emqx_mgmt_sup:start_link(); - {error, Reason} -> - {error, Reason} - end. + emqx_mgmt_auth:try_init_bootstrap_file(), + emqx_conf:add_handler([api_key], emqx_mgmt_auth), + emqx_mgmt_sup:start_link(). stop(_State) -> emqx_conf:remove_handler([api_key]), diff --git a/apps/emqx_management/src/emqx_mgmt_auth.erl b/apps/emqx_management/src/emqx_mgmt_auth.erl index d822eb788..e64caad57 100644 --- a/apps/emqx_management/src/emqx_mgmt_auth.erl +++ b/apps/emqx_management/src/emqx_mgmt_auth.erl @@ -32,7 +32,7 @@ update/5, delete/1, list/0, - init_bootstrap_file/0, + try_init_bootstrap_file/0, format/1 ]). @@ -52,6 +52,7 @@ -ifdef(TEST). -export([create/7]). -export([trans/2, force_create_app/1]). +-export([init_bootstrap_file/1]). -endif. -define(APP, emqx_app). @@ -114,11 +115,12 @@ post_config_update([api_key], _Req, NewConf, _OldConf, _AppEnvs) -> end, ok. --spec init_bootstrap_file() -> ok | {error, _}. -init_bootstrap_file() -> +-spec try_init_bootstrap_file() -> ok | {error, _}. +try_init_bootstrap_file() -> File = bootstrap_file(), ?SLOG(debug, #{msg => "init_bootstrap_api_keys_from_file", file => File}), - init_bootstrap_file(File). + _ = init_bootstrap_file(File), + ok. create(Name, Enable, ExpiredAt, Desc, Role) -> ApiKey = generate_unique_api_key(Name), @@ -357,10 +359,6 @@ init_bootstrap_file(File) -> init_bootstrap_file(File, Dev, MP); {error, Reason0} -> Reason = emqx_utils:explain_posix(Reason0), - FmtReason = emqx_utils:format( - "load API bootstrap file failed, file:~ts, reason:~ts", - [File, Reason] - ), ?SLOG( error, @@ -371,7 +369,7 @@ init_bootstrap_file(File) -> } ), - {error, FmtReason} + {error, Reason} end. init_bootstrap_file(File, Dev, MP) -> diff --git a/apps/emqx_management/test/emqx_mgmt_api_api_keys_SUITE.erl b/apps/emqx_management/test/emqx_mgmt_api_api_keys_SUITE.erl index 177216b42..f2856fe63 100644 --- a/apps/emqx_management/test/emqx_mgmt_api_api_keys_SUITE.erl +++ b/apps/emqx_management/test/emqx_mgmt_api_api_keys_SUITE.erl @@ -100,7 +100,7 @@ t_bootstrap_file(_) -> BadBin = <<"test-1:secret-11\ntest-2 secret-12">>, ok = file:write_file(File, BadBin), update_file(File), - ?assertMatch({error, #{reason := "invalid_format"}}, emqx_mgmt_auth:init_bootstrap_file()), + ?assertMatch({error, #{reason := "invalid_format"}}, emqx_mgmt_auth:init_bootstrap_file(File)), ?assertEqual(ok, auth_authorize(TestPath, <<"test-1">>, <<"secret-11">>)), ?assertMatch({error, _}, auth_authorize(TestPath, <<"test-2">>, <<"secret-12">>)), update_file(<<>>), @@ -123,7 +123,7 @@ t_bootstrap_file_override(_) -> ok = file:write_file(File, Bin), update_file(File), - ?assertEqual(ok, emqx_mgmt_auth:init_bootstrap_file()), + ?assertEqual(ok, emqx_mgmt_auth:init_bootstrap_file(File)), MatchFun = fun(ApiKey) -> mnesia:match_object(#?APP{api_key = ApiKey, _ = '_'}) end, ?assertMatch( @@ -156,7 +156,7 @@ t_bootstrap_file_dup_override(_) -> File = "./bootstrap_api_keys.txt", ok = file:write_file(File, Bin), update_file(File), - ?assertEqual(ok, emqx_mgmt_auth:init_bootstrap_file()), + ?assertEqual(ok, emqx_mgmt_auth:init_bootstrap_file(File)), SameAppWithDiffName = #?APP{ name = <<"name-1">>, @@ -190,7 +190,7 @@ t_bootstrap_file_dup_override(_) -> %% Similar to loading bootstrap file at node startup %% the duplicated apikey in mnesia will be cleaned up - ?assertEqual(ok, emqx_mgmt_auth:init_bootstrap_file()), + ?assertEqual(ok, emqx_mgmt_auth:init_bootstrap_file(File)), ?assertMatch( {ok, [ #?APP{ From bd87e3ce2b7a59578f6fdb36f53b0d0bb9f13a4d Mon Sep 17 00:00:00 2001 From: Shawn <506895667@qq.com> Date: Mon, 5 Aug 2024 15:58:43 +0800 Subject: [PATCH 29/50] chore: update esockd to 5.12.0 --- apps/emqx/rebar.config | 2 +- mix.exs | 2 +- rebar.config | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/apps/emqx/rebar.config b/apps/emqx/rebar.config index 7a6ec9810..1d818fff0 100644 --- a/apps/emqx/rebar.config +++ b/apps/emqx/rebar.config @@ -28,7 +28,7 @@ {lc, {git, "https://github.com/emqx/lc.git", {tag, "0.3.2"}}}, {gproc, {git, "https://github.com/emqx/gproc", {tag, "0.9.0.1"}}}, {cowboy, {git, "https://github.com/emqx/cowboy", {tag, "2.9.2"}}}, - {esockd, {git, "https://github.com/emqx/esockd", {tag, "5.11.3"}}}, + {esockd, {git, "https://github.com/emqx/esockd", {tag, "5.12.0"}}}, {ekka, {git, "https://github.com/emqx/ekka", {tag, "0.19.5"}}}, {gen_rpc, {git, "https://github.com/emqx/gen_rpc", {tag, "3.3.1"}}}, {hocon, {git, "https://github.com/emqx/hocon.git", {tag, "0.43.1"}}}, diff --git a/mix.exs b/mix.exs index dc2953683..366186e6b 100644 --- a/mix.exs +++ b/mix.exs @@ -182,7 +182,7 @@ defmodule EMQXUmbrella.MixProject do end def common_dep(:ekka), do: {:ekka, github: "emqx/ekka", tag: "0.19.5", override: true} - def common_dep(:esockd), do: {:esockd, github: "emqx/esockd", tag: "5.11.3", override: true} + def common_dep(:esockd), do: {:esockd, github: "emqx/esockd", tag: "5.12.0", override: true} def common_dep(:gproc), do: {:gproc, github: "emqx/gproc", tag: "0.9.0.1", override: true} def common_dep(:hocon), do: {:hocon, github: "emqx/hocon", tag: "0.43.1", override: true} def common_dep(:lc), do: {:lc, github: "emqx/lc", tag: "0.3.2", override: true} diff --git a/rebar.config b/rebar.config index b260561cb..cc7024b79 100644 --- a/rebar.config +++ b/rebar.config @@ -82,7 +82,7 @@ {gproc, {git, "https://github.com/emqx/gproc", {tag, "0.9.0.1"}}}, {jiffy, {git, "https://github.com/emqx/jiffy", {tag, "1.0.6"}}}, {cowboy, {git, "https://github.com/emqx/cowboy", {tag, "2.9.2"}}}, - {esockd, {git, "https://github.com/emqx/esockd", {tag, "5.11.3"}}}, + {esockd, {git, "https://github.com/emqx/esockd", {tag, "5.12.0"}}}, {rocksdb, {git, "https://github.com/emqx/erlang-rocksdb", {tag, "1.8.0-emqx-6"}}}, {ekka, {git, "https://github.com/emqx/ekka", {tag, "0.19.5"}}}, {gen_rpc, {git, "https://github.com/emqx/gen_rpc", {tag, "3.3.1"}}}, From 4cf715113912a859db5e2db233a91d43e8e0c3f5 Mon Sep 17 00:00:00 2001 From: Ivan Dyachkov Date: Mon, 5 Aug 2024 11:01:57 +0200 Subject: [PATCH 30/50] chore: prepare 5.8.0-alpha.1 --- .github/workflows/build_packages_cron.yaml | 1 + .github/workflows/codeql.yaml | 1 + .github/workflows/green_master.yaml | 1 + apps/emqx/include/emqx_release.hrl | 4 ++-- deploy/charts/emqx-enterprise/Chart.yaml | 4 ++-- deploy/charts/emqx/Chart.yaml | 4 ++-- scripts/rel/cut.sh | 6 ++++++ scripts/rel/sync-remotes.sh | 8 ++++++-- 8 files changed, 21 insertions(+), 8 deletions(-) diff --git a/.github/workflows/build_packages_cron.yaml b/.github/workflows/build_packages_cron.yaml index b7d05ad94..27f4698b6 100644 --- a/.github/workflows/build_packages_cron.yaml +++ b/.github/workflows/build_packages_cron.yaml @@ -23,6 +23,7 @@ jobs: profile: - ['emqx', 'master'] - ['emqx', 'release-57'] + - ['emqx', 'release-58'] os: - ubuntu22.04 - amzn2023 diff --git a/.github/workflows/codeql.yaml b/.github/workflows/codeql.yaml index f086b97f6..f6077262f 100644 --- a/.github/workflows/codeql.yaml +++ b/.github/workflows/codeql.yaml @@ -24,6 +24,7 @@ jobs: branch: - master - release-57 + - release-58 language: - cpp - python diff --git a/.github/workflows/green_master.yaml b/.github/workflows/green_master.yaml index 8479ea5ed..a5317027b 100644 --- a/.github/workflows/green_master.yaml +++ b/.github/workflows/green_master.yaml @@ -24,6 +24,7 @@ jobs: ref: - master - release-57 + - release-58 steps: - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 with: diff --git a/apps/emqx/include/emqx_release.hrl b/apps/emqx/include/emqx_release.hrl index c8e84ea06..7979d00fd 100644 --- a/apps/emqx/include/emqx_release.hrl +++ b/apps/emqx/include/emqx_release.hrl @@ -32,7 +32,7 @@ %% `apps/emqx/src/bpapi/README.md' %% Opensource edition --define(EMQX_RELEASE_CE, "5.7.1"). +-define(EMQX_RELEASE_CE, "5.8.0-alpha.1"). %% Enterprise edition --define(EMQX_RELEASE_EE, "5.7.1"). +-define(EMQX_RELEASE_EE, "5.8.0-alpha.1"). diff --git a/deploy/charts/emqx-enterprise/Chart.yaml b/deploy/charts/emqx-enterprise/Chart.yaml index cd795d4f4..3c25b9ebc 100644 --- a/deploy/charts/emqx-enterprise/Chart.yaml +++ b/deploy/charts/emqx-enterprise/Chart.yaml @@ -14,8 +14,8 @@ type: application # This is the chart version. This version number should be incremented each time you make changes # to the chart and its templates, including the app version. -version: 5.7.1 +version: 5.8.0-alpha.1 # This is the version number of the application being deployed. This version number should be # incremented each time you make changes to the application. -appVersion: 5.7.1 +appVersion: 5.8.0-alpha.1 diff --git a/deploy/charts/emqx/Chart.yaml b/deploy/charts/emqx/Chart.yaml index d31648f2f..49a3f7c84 100644 --- a/deploy/charts/emqx/Chart.yaml +++ b/deploy/charts/emqx/Chart.yaml @@ -14,8 +14,8 @@ type: application # This is the chart version. This version number should be incremented each time you make changes # to the chart and its templates, including the app version. -version: 5.7.1 +version: 5.8.0-alpha.1 # This is the version number of the application being deployed. This version number should be # incremented each time you make changes to the application. -appVersion: 5.7.1 +appVersion: 5.8.0-alpha.1 diff --git a/scripts/rel/cut.sh b/scripts/rel/cut.sh index 1affd48bf..8c8899b91 100755 --- a/scripts/rel/cut.sh +++ b/scripts/rel/cut.sh @@ -135,6 +135,12 @@ rel_branch() { e5.7.*) echo 'release-57' ;; + v5.8.*) + echo 'release-58' + ;; + e5.8.*) + echo 'release-58' + ;; *) logerr "Unsupported version tag $TAG" exit 1 diff --git a/scripts/rel/sync-remotes.sh b/scripts/rel/sync-remotes.sh index 430021a79..c986535ce 100755 --- a/scripts/rel/sync-remotes.sh +++ b/scripts/rel/sync-remotes.sh @@ -5,7 +5,7 @@ set -euo pipefail # ensure dir cd -P -- "$(dirname -- "${BASH_SOURCE[0]}")/../.." -BASE_BRANCHES=( 'release-57' 'release-56' 'release-55' 'master' ) +BASE_BRANCHES=( 'release-58' 'release-57' 'release-56' 'release-55' 'master' ) usage() { cat < Date: Tue, 6 Aug 2024 09:32:17 +0800 Subject: [PATCH 31/50] chore(dashboard): bump dashboard version to v1.9.2-beta.1 & e1.7.2-beta.7 --- Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index 41d4bd5b6..f390f600d 100644 --- a/Makefile +++ b/Makefile @@ -10,8 +10,8 @@ include env.sh # Dashboard version # from https://github.com/emqx/emqx-dashboard5 -export EMQX_DASHBOARD_VERSION ?= v1.9.1 -export EMQX_EE_DASHBOARD_VERSION ?= e1.7.1 +export EMQX_DASHBOARD_VERSION ?= v1.9.2-beta.1 +export EMQX_EE_DASHBOARD_VERSION ?= e1.7.2-beta.7 export EMQX_RELUP ?= true export EMQX_REL_FORM ?= tgz From dbbd5e1458594c9625a517c9230ab72f5d8da7d0 Mon Sep 17 00:00:00 2001 From: Kinplemelon Date: Tue, 6 Aug 2024 09:33:20 +0800 Subject: [PATCH 32/50] ci: update emqx docs link in dashboard --- scripts/ui-tests/dashboard_test.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/scripts/ui-tests/dashboard_test.py b/scripts/ui-tests/dashboard_test.py index c349739b6..356432b8e 100644 --- a/scripts/ui-tests/dashboard_test.py +++ b/scripts/ui-tests/dashboard_test.py @@ -131,10 +131,7 @@ def test_docs_link(driver, dashboard_url): # it's v5.x in the url emqx_version = 'v' + emqx_version - if prefix == 'e': - docs_base_url = "https://docs.emqx.com/en/enterprise" - else: - docs_base_url = "https://docs.emqx.com/en/emqx" + docs_base_url = "https://docs.emqx.com/en/emqx" docs_url = f"{docs_base_url}/{emqx_version}" xpath = f"//div[@id='app']//div[@class='nav-header']//a[@href[starts-with(.,'{docs_url}')]]" From cba3dcbeda65845f698ebca90feb5fd7f9916c77 Mon Sep 17 00:00:00 2001 From: Kinplemelon Date: Tue, 6 Aug 2024 13:44:16 +0800 Subject: [PATCH 33/50] chore(dashboard): bump dashboard version to v1.10.0-beta.1 & e1.8.0-beta.1 --- Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index 2f4519cfd..69667c952 100644 --- a/Makefile +++ b/Makefile @@ -10,8 +10,8 @@ include env.sh # Dashboard version # from https://github.com/emqx/emqx-dashboard5 -export EMQX_DASHBOARD_VERSION ?= v1.9.1 -export EMQX_EE_DASHBOARD_VERSION ?= e1.7.1 +export EMQX_DASHBOARD_VERSION ?= v1.10.0-beta.1 +export EMQX_EE_DASHBOARD_VERSION ?= e1.8.0-beta.1 export EMQX_RELUP ?= true export EMQX_REL_FORM ?= tgz From 3ee84d60ae57f8e2bfff4a2043bad9dd1b001ca5 Mon Sep 17 00:00:00 2001 From: Kinplemelon Date: Tue, 6 Aug 2024 18:11:35 +0800 Subject: [PATCH 34/50] chore(dashboard): bump dashboard version to v1.9.2 & e1.7.2 --- Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index f390f600d..524544459 100644 --- a/Makefile +++ b/Makefile @@ -10,8 +10,8 @@ include env.sh # Dashboard version # from https://github.com/emqx/emqx-dashboard5 -export EMQX_DASHBOARD_VERSION ?= v1.9.2-beta.1 -export EMQX_EE_DASHBOARD_VERSION ?= e1.7.2-beta.7 +export EMQX_DASHBOARD_VERSION ?= v1.9.2 +export EMQX_EE_DASHBOARD_VERSION ?= e1.7.2 export EMQX_RELUP ?= true export EMQX_REL_FORM ?= tgz From 822ed71282c80fdc1476217ca382f653e7c3b5a3 Mon Sep 17 00:00:00 2001 From: Ivan Dyachkov Date: Tue, 6 Aug 2024 13:25:56 +0200 Subject: [PATCH 35/50] chore: release 5.7.2 --- apps/emqx/include/emqx_release.hrl | 4 +- changes/v5.7.2.en.md | 87 ++++++++++++++++++++++++ deploy/charts/emqx-enterprise/Chart.yaml | 4 +- deploy/charts/emqx/Chart.yaml | 4 +- 4 files changed, 93 insertions(+), 6 deletions(-) create mode 100644 changes/v5.7.2.en.md diff --git a/apps/emqx/include/emqx_release.hrl b/apps/emqx/include/emqx_release.hrl index c8e84ea06..4357f7aef 100644 --- a/apps/emqx/include/emqx_release.hrl +++ b/apps/emqx/include/emqx_release.hrl @@ -32,7 +32,7 @@ %% `apps/emqx/src/bpapi/README.md' %% Opensource edition --define(EMQX_RELEASE_CE, "5.7.1"). +-define(EMQX_RELEASE_CE, "5.7.2"). %% Enterprise edition --define(EMQX_RELEASE_EE, "5.7.1"). +-define(EMQX_RELEASE_EE, "5.7.2"). diff --git a/changes/v5.7.2.en.md b/changes/v5.7.2.en.md new file mode 100644 index 000000000..fc2d3ae87 --- /dev/null +++ b/changes/v5.7.2.en.md @@ -0,0 +1,87 @@ +## 5.7.2 + +*Release Date: 2024-08-06* + +### Enhancements + +- [#13317](https://github.com/emqx/emqx/pull/13317) Added a new per-authorization source metric type: `ignore`. This metric increments when an authorization source attempts to authorize a request but encounters scenarios where the authorization is not applicable or encounters an error, resulting in an undecidable outcome. + +- [#13336](https://github.com/emqx/emqx/pull/13336) Added functionality to initialize authentication data in the built-in database of an empty EMQX node or cluster using a bootstrap file in CSV or JSON format. This feature introduces new configuration entries, `bootstrap_file` and `bootstrap_type`. + +- [#13348](https://github.com/emqx/emqx/pull/13348) Added a new field `payload_encode` in the log configuration to determine the format of the payload in the log data. + +- [#13436](https://github.com/emqx/emqx/pull/13436) Added the option to add custom request headers to JWKS requests. + +- [#13507](https://github.com/emqx/emqx/pull/13507) Introduced a new built-in function `getenv` in the rule engine and variform expression to facilitate access to environment variables. This function adheres to the following constraints: + + - Prefix `EMQXVAR_` is added before reading from OS environment variables. For example, `getenv('FOO_BAR')` is to read `EMQXVAR_FOO_BAR`. + - These values are immutable once loaded from the OS environment. + +- [#13521](https://github.com/emqx/emqx/pull/13521) Resolved an issue where LDAP query timeouts could cause the underlying connection to become unusable, potentially causing subsequent queries to return outdated results. The fix ensures the system reconnects automatically in case of a timeout. + +- [#13528](https://github.com/emqx/emqx/pull/13528) Applied log throttling for the event of unrecoverable errors in data integrations. + +- [#13548](https://github.com/emqx/emqx/pull/13548) EMQX now can optionally invoke the `on_config_changed/2` callback function when the plugin configuration is updated via the REST API. This callback function is assumed to be exported by the `_app` module. + For example, if the plugin name and version are `my_plugin-1.0.0`, then the callback function is assumed to be `my_plugin_app:on_config_changed/2`. + +- [#13386](https://github.com/emqx/emqx/pull/13386) Added support for initializing a list of banned clients on an empty EMQX node or cluster with a bootstrap file in CSV format. The corresponding config entry to specify the file path is `banned.bootstrap_file`. This file is a CSV file with `,` as its delimiter. The first line of this file must be a header line. All valid headers are listed here: + + - as :: required + - who :: required + - by :: optional + - reason :: optional + - at :: optional + - until :: optional + + See the [Configuration Manual](https://docs.emqx.com/en/enterprise/v@EE_VERSION@/hocon/) for details on each field. + + Each row in the rest of this file must contain the same number of columns as the header line, and the column can be omitted then its value is `undefined`. + +### Bug Fixes + +- [#13222](https://github.com/emqx/emqx/pull/13222) Resolved issues with flags checking and error handling associated with the Will message in the `CONNECT` packet. + For detailed specifications, refer to: + + - MQTT-v3.1.1-[MQTT-3.1.2-13], MQTT-v5.0-[MQTT-3.1.2-11] + - MQTT-v3.1.1-[MQTT-3.1.2-14], MQTT-v5.0-[MQTT-3.1.2-12] + - MQTT-v3.1.1-[MQTT-3.1.2-15], MQTT-v5.0-[MQTT-3.1.2-13] + +- [#13307](https://github.com/emqx/emqx/pull/13307) Updated `ekka` library to version 0.19.5. This version of `ekka` utilizes `mria` 0.8.8, enhancing auto-heal functionality. Previously, the auto-heal worked only when all core nodes were reachable. This update allows to apply auto-heal once the majority of core nodes are alive. For details, refer to the [Mria PR](https://github.com/emqx/mria/pull/180). + +- [#13334](https://github.com/emqx/emqx/pull/13334) Implemented strict mode checking for the `PasswordFlag` in the MQTT v3.1.1 CONNECT packet to align with protocol specifications. + + Note: To ensure bug-to-bug compatibility, this check is performed only in strict mode. + +- [#13344](https://github.com/emqx/emqx/pull/13344) Resolved an issue where the `POST /clients/:clientid/subscribe/bulk` API would not function correctly if the node receiving the API request did not maintain the connection to the specified `clientid`. + +- [#13358](https://github.com/emqx/emqx/pull/13358) Fixed an issue when the `reason` in the `authn_complete_event` event was incorrectly displayed. +- [#13375](https://github.com/emqx/emqx/pull/13375) The value `infinity` has been added as default value to the listener configuration fields `max_conn_rate`, `messages_rate`, and `bytes_rate`. + +- [#13382](https://github.com/emqx/emqx/pull/13382) Updated the `emqtt` library to version 0.4.14, which resolves an issue preventing `emqtt_pool`s from reusing pools that are in an inconsistent state. + +- [#13389](https://github.com/emqx/emqx/pull/13389) Fixed an issue where the `Derived Key Length` for `pbkdf2` could be set to a negative integer. + +- [#13389](https://github.com/emqx/emqx/pull/13389) Fixed an issue where topics in the authorization rules might be parsed incorrectly. + +- [#13393](https://github.com/emqx/emqx/pull/13393) Fixed an issue where plugin applications failed to restart after a node joined a cluster, resulting in hooks not being properly installed and causing inconsistent states. + +- [#13398](https://github.com/emqx/emqx/pull/13398) Fixed an issue where ACL rules were incorrectly cleared when reloading the built-in database for authorization using the command line. + +- [#13403](https://github.com/emqx/emqx/pull/13403) Addressed a security issue where environment variable configuration overrides were inadvertently logging passwords. This fix ensures that passwords present in environment variables are not logged. + +- [#13408](https://github.com/emqx/emqx/pull/13408) Resolved a `function_clause` crash triggered by authentication attempts with invalid salt or password types. This fix enhances error handling to better manage authentication failures involving incorrect salt or password types. + +- [#13419](https://github.com/emqx/emqx/pull/13419) Resolved an issue where crash log messages from the `/configs` API were displaying garbled hints. This fix ensures that log messages related to API calls are clear and understandable. + +- [#13422](https://github.com/emqx/emqx/pull/13422) Fixed an issue where the option `force_shutdown.max_heap_size` could not be set to 0 to disable this tuning. + +- [#13442](https://github.com/emqx/emqx/pull/13442) Fixed an issue where the health check interval configuration for actions/sources was not being respected. Previously, EMQX ignored the specified health check interval for actions and used the connector's interval instead. The fix ensures that EMQX now correctly uses the health check interval configured for actions/sources, allowing for independent and accurate health monitoring frequencies. + +- [#13503](https://github.com/emqx/emqx/pull/13503) Fixed an issue where connectors did not adhere to the configured health check interval upon initial startup, requiring an update or restart to apply the correct interval. + +- [#13515](https://github.com/emqx/emqx/pull/13515) Fixed an issue where the same client could not subscribe to the same exclusive topic when the node was down for some reason. + +- [#13527](https://github.com/emqx/emqx/pull/13527) Fixed an issue in the Rule Engine where executing a SQL test for the Message Publish event would consistently return no results when a `$bridges/...` source was included in the `FROM` clause. + +- [#13541](https://github.com/emqx/emqx/pull/13541) Fixed an issue where disabling CRL checks for a listener required a listener restart to take effect. +- [#13552](https://github.com/emqx/emqx/pull/13552) Added a startup timeout limit for EMQX plugins with a default timeout of 10 seconds. Before this update, problematic plugins could cause runtime errors during startup, leading to potential issues where the main startup process might hang when EMQX is stopped and restarted. diff --git a/deploy/charts/emqx-enterprise/Chart.yaml b/deploy/charts/emqx-enterprise/Chart.yaml index cd795d4f4..5d19cf95a 100644 --- a/deploy/charts/emqx-enterprise/Chart.yaml +++ b/deploy/charts/emqx-enterprise/Chart.yaml @@ -14,8 +14,8 @@ type: application # This is the chart version. This version number should be incremented each time you make changes # to the chart and its templates, including the app version. -version: 5.7.1 +version: 5.7.2 # This is the version number of the application being deployed. This version number should be # incremented each time you make changes to the application. -appVersion: 5.7.1 +appVersion: 5.7.2 diff --git a/deploy/charts/emqx/Chart.yaml b/deploy/charts/emqx/Chart.yaml index d31648f2f..2948db061 100644 --- a/deploy/charts/emqx/Chart.yaml +++ b/deploy/charts/emqx/Chart.yaml @@ -14,8 +14,8 @@ type: application # This is the chart version. This version number should be incremented each time you make changes # to the chart and its templates, including the app version. -version: 5.7.1 +version: 5.7.2 # This is the version number of the application being deployed. This version number should be # incremented each time you make changes to the application. -appVersion: 5.7.1 +appVersion: 5.7.2 From b80513e941be82d1a9a3bbfbcf5d30b79a0b1bdd Mon Sep 17 00:00:00 2001 From: Kinplemelon Date: Tue, 6 Aug 2024 09:33:20 +0800 Subject: [PATCH 36/50] ci: update emqx docs link in dashboard --- scripts/ui-tests/dashboard_test.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/scripts/ui-tests/dashboard_test.py b/scripts/ui-tests/dashboard_test.py index c349739b6..356432b8e 100644 --- a/scripts/ui-tests/dashboard_test.py +++ b/scripts/ui-tests/dashboard_test.py @@ -131,10 +131,7 @@ def test_docs_link(driver, dashboard_url): # it's v5.x in the url emqx_version = 'v' + emqx_version - if prefix == 'e': - docs_base_url = "https://docs.emqx.com/en/enterprise" - else: - docs_base_url = "https://docs.emqx.com/en/emqx" + docs_base_url = "https://docs.emqx.com/en/emqx" docs_url = f"{docs_base_url}/{emqx_version}" xpath = f"//div[@id='app']//div[@class='nav-header']//a[@href[starts-with(.,'{docs_url}')]]" From 0aa4cdbaf355e6bce3fed48a975b1fe251c1af5d Mon Sep 17 00:00:00 2001 From: Andrew Mayorov Date: Tue, 30 Jul 2024 16:51:14 +0200 Subject: [PATCH 37/50] feat(ds): add generic preconditions implementation --- .../src/emqx_ds_precondition.erl | 184 ++++++++++++++++++ 1 file changed, 184 insertions(+) create mode 100644 apps/emqx_durable_storage/src/emqx_ds_precondition.erl diff --git a/apps/emqx_durable_storage/src/emqx_ds_precondition.erl b/apps/emqx_durable_storage/src/emqx_ds_precondition.erl new file mode 100644 index 000000000..5f86d6c25 --- /dev/null +++ b/apps/emqx_durable_storage/src/emqx_ds_precondition.erl @@ -0,0 +1,184 @@ +%%-------------------------------------------------------------------- +%% Copyright (c) 20212024 EMQ Technologies Co., Ltd. All Rights Reserved. +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. +%%-------------------------------------------------------------------- + +-module(emqx_ds_precondition). +-include_lib("emqx_utils/include/emqx_message.hrl"). +-include_lib("emqx_durable_storage/include/emqx_ds.hrl"). + +-export([verify/3]). +-export([matches/2]). + +-export_type([matcher/0, mismatch/0]). + +-type matcher() :: #message_matcher{}. +-type mismatch() :: emqx_types:message() | not_found. + +-callback lookup_message(_Ctx, matcher()) -> + emqx_types:message() | not_found | emqx_ds:error(_). + +%% + +-spec verify(module(), _Ctx, [emqx_ds:precondition()]) -> + ok | {precondition_failed, mismatch()} | emqx_ds:error(_). +verify(Mod, Ctx, [_Precondition = {Cond, Msg} | Rest]) -> + case verify_precondition(Mod, Ctx, Cond, Msg) of + ok -> + verify(Mod, Ctx, Rest); + Failed -> + Failed + end; +verify(_Mod, _Ctx, []) -> + ok. + +verify_precondition(Mod, Ctx, if_exists, Matcher) -> + case Mod:lookup_message(Ctx, Matcher) of + Msg = #message{} -> + verify_match(Msg, Matcher); + not_found -> + {precondition_failed, not_found}; + Error = {error, _, _} -> + Error + end; +verify_precondition(Mod, Ctx, unless_exists, Matcher) -> + case Mod:lookup_message(Ctx, Matcher) of + Msg = #message{} -> + verify_nomatch(Msg, Matcher); + not_found -> + ok; + Error = {error, _, _} -> + Error + end. + +verify_match(Msg, Matcher) -> + case matches(Msg, Matcher) of + true -> ok; + false -> {precondition_failed, Msg} + end. + +verify_nomatch(Msg, Matcher) -> + case matches(Msg, Matcher) of + false -> ok; + true -> {precondition_failed, Msg} + end. + +-spec matches(emqx_types:message(), matcher()) -> boolean(). +matches( + Message, + #message_matcher{from = From, topic = Topic, payload = Pat, headers = Headers} +) -> + case Message of + #message{from = From, topic = Topic} when Pat =:= '_' -> + matches_headers(Message, Headers); + #message{from = From, topic = Topic, payload = Pat} -> + matches_headers(Message, Headers); + _ -> + false + end. + +matches_headers(_Message, MatchHeaders) when map_size(MatchHeaders) =:= 0 -> + true; +matches_headers(#message{headers = Headers}, MatchHeaders) -> + maps:intersect(MatchHeaders, Headers) =:= MatchHeaders. + +%% Basic tests + +-ifdef(TEST). +-include_lib("eunit/include/eunit.hrl"). +-compile(export_all). + +conjunction_test() -> + %% Contradictory preconditions, always false. + Preconditions = [ + {if_exists, matcher(<<"c1">>, <<"t/1">>, 0, '_')}, + {unless_exists, matcher(<<"c1">>, <<"t/1">>, 0, '_')} + ], + ?assertEqual( + {precondition_failed, not_found}, + verify(?MODULE, [], Preconditions) + ), + %% Check that the order does not matter. + ?assertEqual( + {precondition_failed, not_found}, + verify(?MODULE, [], lists:reverse(Preconditions)) + ), + ?assertEqual( + {precondition_failed, message(<<"c1">>, <<"t/1">>, 0, <<>>)}, + verify( + ?MODULE, + [message(<<"c1">>, <<"t/1">>, 0, <<>>)], + Preconditions + ) + ). + +matches_test() -> + ?assert( + matches( + message(<<"mtest1">>, <<"t/same">>, 12345, <>), + matcher(<<"mtest1">>, <<"t/same">>, 12345, '_') + ) + ). + +matches_headers_test() -> + ?assert( + matches( + message(<<"mtest2">>, <<"t/same">>, 23456, <>, #{h1 => 42, h2 => <<>>}), + matcher(<<"mtest2">>, <<"t/same">>, 23456, '_', #{h2 => <<>>}) + ) + ). + +mismatches_headers_test() -> + ?assertNot( + matches( + message(<<"mtest3">>, <<"t/same">>, 23456, <>, #{h1 => 42, h2 => <<>>}), + matcher(<<"mtest3">>, <<"t/same">>, 23456, '_', #{h2 => <<>>, h3 => <<"required">>}) + ) + ). + +matcher(ClientID, Topic, TS, Payload) -> + matcher(ClientID, Topic, TS, Payload, #{}). + +matcher(ClientID, Topic, TS, Payload, Headers) -> + #message_matcher{ + from = ClientID, + topic = Topic, + timestamp = TS, + payload = Payload, + headers = Headers + }. + +message(ClientID, Topic, TS, Payload) -> + message(ClientID, Topic, TS, Payload, #{}). + +message(ClientID, Topic, TS, Payload, Headers) -> + #message{ + id = <<>>, + qos = 0, + from = ClientID, + topic = Topic, + timestamp = TS, + payload = Payload, + headers = Headers + }. + +lookup_message(Messages, Matcher) -> + case lists:search(fun(M) -> matches(M, Matcher) end, Messages) of + {value, Message} -> + Message; + false -> + not_found + end. + +-endif. From 11951f8f6cb8734378dc2dead96f0a1615f4c0ce Mon Sep 17 00:00:00 2001 From: Andrew Mayorov Date: Tue, 30 Jul 2024 16:53:38 +0200 Subject: [PATCH 38/50] feat(ds): adopt buffer interface to `emqx_ds:operation()` --- .../src/emqx_ds_builtin_local.erl | 26 +++-- .../src/emqx_ds_replication_layer.erl | 31 ++++-- .../src/emqx_ds_buffer.erl | 103 ++++++++---------- .../test/emqx_ds_test_helpers.erl | 2 +- 4 files changed, 89 insertions(+), 73 deletions(-) diff --git a/apps/emqx_ds_builtin_local/src/emqx_ds_builtin_local.erl b/apps/emqx_ds_builtin_local/src/emqx_ds_builtin_local.erl index a7cc795b6..29fc98083 100644 --- a/apps/emqx_ds_builtin_local/src/emqx_ds_builtin_local.erl +++ b/apps/emqx_ds_builtin_local/src/emqx_ds_builtin_local.erl @@ -43,7 +43,7 @@ %% `emqx_ds_buffer': init_buffer/3, flush_buffer/4, - shard_of_message/4 + shard_of_operation/4 ]). %% Internal exports: @@ -55,6 +55,7 @@ -export_type([db_opts/0, shard/0, iterator/0, delete_iterator/0]). -include_lib("emqx_utils/include/emqx_message.hrl"). +-include_lib("emqx_durable_storage/include/emqx_ds.hrl"). %%================================================================================ %% Type declarations @@ -255,14 +256,23 @@ assign_message_timestamps(Latest, [], Acc) -> assign_timestamp(TimestampUs, Message) -> {TimestampUs, Message}. --spec shard_of_message(emqx_ds:db(), emqx_types:message(), clientid | topic, _Options) -> shard(). -shard_of_message(DB, #message{from = From, topic = Topic}, SerializeBy, _Options) -> +-spec shard_of_operation(emqx_ds:db(), emqx_ds:operation(), clientid | topic, _Options) -> shard(). +shard_of_operation(DB, #message{from = From, topic = Topic}, SerializeBy, _Options) -> + case SerializeBy of + clientid -> Key = From; + topic -> Key = Topic + end, + shard_of_key(DB, Key); +shard_of_operation(DB, {_, #message_matcher{from = From, topic = Topic}}, SerializeBy, _Options) -> + case SerializeBy of + clientid -> Key = From; + topic -> Key = Topic + end, + shard_of_key(DB, Key). + +shard_of_key(DB, Key) -> N = emqx_ds_builtin_local_meta:n_shards(DB), - Hash = - case SerializeBy of - clientid -> erlang:phash2(From, N); - topic -> erlang:phash2(Topic, N) - end, + Hash = erlang:phash2(Key, N), integer_to_binary(Hash). -spec get_streams(emqx_ds:db(), emqx_ds:topic_filter(), emqx_ds:time()) -> diff --git a/apps/emqx_ds_builtin_raft/src/emqx_ds_replication_layer.erl b/apps/emqx_ds_builtin_raft/src/emqx_ds_replication_layer.erl index 669abdbf1..f54981884 100644 --- a/apps/emqx_ds_builtin_raft/src/emqx_ds_replication_layer.erl +++ b/apps/emqx_ds_builtin_raft/src/emqx_ds_replication_layer.erl @@ -29,7 +29,7 @@ current_timestamp/2, - shard_of_message/4, + shard_of_operation/4, flush_buffer/4, init_buffer/3 ]). @@ -392,15 +392,30 @@ flush_buffer(DB, Shard, Messages, State) -> end, {State, Result}. --spec shard_of_message(emqx_ds:db(), emqx_types:message(), clientid | topic, _Options) -> +-spec shard_of_operation( + emqx_ds:db(), + emqx_ds:operation() | emqx_ds:precondition(), + clientid | topic, + _Options +) -> emqx_ds_replication_layer:shard_id(). -shard_of_message(DB, #message{from = From, topic = Topic}, SerializeBy, _Options) -> +shard_of_operation(DB, #message{from = From, topic = Topic}, SerializeBy, _Options) -> + case SerializeBy of + clientid -> Key = From; + topic -> Key = Topic + end, + shard_of_key(DB, Key); +shard_of_operation(DB, {_OpName, Matcher}, SerializeBy, _Options) -> + #message_matcher{from = From, topic = Topic} = Matcher, + case SerializeBy of + clientid -> Key = From; + topic -> Key = Topic + end, + shard_of_key(DB, Key). + +shard_of_key(DB, Key) -> N = emqx_ds_replication_shard_allocator:n_shards(DB), - Hash = - case SerializeBy of - clientid -> erlang:phash2(From, N); - topic -> erlang:phash2(Topic, N) - end, + Hash = erlang:phash2(Key, N), integer_to_binary(Hash). %%================================================================================ diff --git a/apps/emqx_durable_storage/src/emqx_ds_buffer.erl b/apps/emqx_durable_storage/src/emqx_ds_buffer.erl index dec9eea80..cf83c8f2e 100644 --- a/apps/emqx_durable_storage/src/emqx_ds_buffer.erl +++ b/apps/emqx_durable_storage/src/emqx_ds_buffer.erl @@ -21,7 +21,7 @@ -behaviour(gen_server). %% API: --export([start_link/4, store_batch/3, shard_of_message/3]). +-export([start_link/4, store_batch/3, shard_of_operation/3]). -export([ls/0]). %% behavior callbacks: @@ -46,19 +46,18 @@ -define(cbm(DB), {?MODULE, DB}). -record(enqueue_req, { - messages :: [emqx_types:message()], + operations :: [emqx_ds:operation()], sync :: boolean(), - atomic :: boolean(), - n_messages :: non_neg_integer(), + n_operations :: non_neg_integer(), payload_bytes :: non_neg_integer() }). -callback init_buffer(emqx_ds:db(), _Shard, _Options) -> {ok, _State}. --callback flush_buffer(emqx_ds:db(), _Shard, [emqx_types:message()], State) -> +-callback flush_buffer(emqx_ds:db(), _Shard, [emqx_ds:operation()], State) -> {State, ok | {error, recoverable | unrecoverable, _}}. --callback shard_of_message(emqx_ds:db(), emqx_types:message(), topic | clientid, _Options) -> +-callback shard_of_operation(emqx_ds:db(), emqx_ds:operation(), topic | clientid, _Options) -> _Shard. %%================================================================================ @@ -77,39 +76,33 @@ start_link(CallbackModule, CallbackOptions, DB, Shard) -> ?via(DB, Shard), ?MODULE, [CallbackModule, CallbackOptions, DB, Shard], [] ). --spec store_batch(emqx_ds:db(), [emqx_types:message()], emqx_ds:message_store_opts()) -> +-spec store_batch(emqx_ds:db(), [emqx_ds:operation()], emqx_ds:message_store_opts()) -> emqx_ds:store_batch_result(). -store_batch(DB, Messages, Opts) -> +store_batch(DB, Operations, Opts) -> Sync = maps:get(sync, Opts, true), - Atomic = maps:get(atomic, Opts, false), %% Usually we expect all messages in the batch to go into the %% single shard, so this function is optimized for the happy case. - case shards_of_batch(DB, Messages) of - [{Shard, {NMsgs, NBytes}}] -> + case shards_of_batch(DB, Operations) of + [{Shard, {NOps, NBytes}}] -> %% Happy case: enqueue_call_or_cast( ?via(DB, Shard), #enqueue_req{ - messages = Messages, + operations = Operations, sync = Sync, - atomic = Atomic, - n_messages = NMsgs, + n_operations = NOps, payload_bytes = NBytes } ); - [_, _ | _] when Atomic -> - %% It's impossible to commit a batch to multiple shards - %% atomically - {error, unrecoverable, atomic_commit_to_multiple_shards}; _Shards -> %% Use a slower implementation for the unlikely case: - repackage_messages(DB, Messages, Sync) + repackage_messages(DB, Operations, Sync) end. --spec shard_of_message(emqx_ds:db(), emqx_types:message(), clientid | topic) -> _Shard. -shard_of_message(DB, Message, ShardBy) -> +-spec shard_of_operation(emqx_ds:db(), emqx_ds:operation(), clientid | topic) -> _Shard. +shard_of_operation(DB, Operation, ShardBy) -> {CBM, Options} = persistent_term:get(?cbm(DB)), - CBM:shard_of_message(DB, Message, ShardBy, Options). + CBM:shard_of_operation(DB, Operation, ShardBy, Options). %%================================================================================ %% behavior callbacks @@ -129,7 +122,7 @@ shard_of_message(DB, Message, ShardBy) -> n = 0 :: non_neg_integer(), n_bytes = 0 :: non_neg_integer(), tref :: undefined | reference(), - queue :: queue:queue(emqx_types:message()), + queue :: queue:queue(emqx_ds:operation()), pending_replies = [] :: [gen_server:from()] }). @@ -168,31 +161,29 @@ format_status(Status) -> handle_call( #enqueue_req{ - messages = Msgs, + operations = Operations, sync = Sync, - atomic = Atomic, - n_messages = NMsgs, + n_operations = NOps, payload_bytes = NBytes }, From, S0 = #s{pending_replies = Replies0} ) -> S = S0#s{pending_replies = [From | Replies0]}, - {noreply, enqueue(Sync, Atomic, Msgs, NMsgs, NBytes, S)}; + {noreply, enqueue(Sync, Operations, NOps, NBytes, S)}; handle_call(_Call, _From, S) -> {reply, {error, unknown_call}, S}. handle_cast( #enqueue_req{ - messages = Msgs, + operations = Operations, sync = Sync, - atomic = Atomic, - n_messages = NMsgs, + n_operations = NOps, payload_bytes = NBytes }, S ) -> - {noreply, enqueue(Sync, Atomic, Msgs, NMsgs, NBytes, S)}; + {noreply, enqueue(Sync, Operations, NOps, NBytes, S)}; handle_cast(_Cast, S) -> {noreply, S}. @@ -215,11 +206,10 @@ terminate(_Reason, #s{db = DB}) -> enqueue( Sync, - Atomic, - Msgs, + Ops, BatchSize, BatchBytes, - S0 = #s{n = NMsgs0, n_bytes = NBytes0, queue = Q0} + S0 = #s{n = NOps0, n_bytes = NBytes0, queue = Q0} ) -> %% At this point we don't split the batches, even when they aren't %% atomic. It wouldn't win us anything in terms of memory, and @@ -227,18 +217,18 @@ enqueue( %% granularity should be fine enough. NMax = application:get_env(emqx_durable_storage, egress_batch_size, 1000), NBytesMax = application:get_env(emqx_durable_storage, egress_batch_bytes, infinity), - NMsgs = NMsgs0 + BatchSize, + NMsgs = NOps0 + BatchSize, NBytes = NBytes0 + BatchBytes, - case (NMsgs >= NMax orelse NBytes >= NBytesMax) andalso (NMsgs0 > 0) of + case (NMsgs >= NMax orelse NBytes >= NBytesMax) andalso (NOps0 > 0) of true -> %% Adding this batch would cause buffer to overflow. Flush %% it now, and retry: S1 = flush(S0), - enqueue(Sync, Atomic, Msgs, BatchSize, BatchBytes, S1); + enqueue(Sync, Ops, BatchSize, BatchBytes, S1); false -> %% The buffer is empty, we enqueue the atomic batch in its %% entirety: - Q1 = lists:foldl(fun queue:in/2, Q0, Msgs), + Q1 = lists:foldl(fun queue:in/2, Q0, Ops), S1 = S0#s{n = NMsgs, n_bytes = NBytes, queue = Q1}, case NMsgs >= NMax orelse NBytes >= NBytesMax of true -> @@ -336,18 +326,18 @@ do_flush( } end. --spec shards_of_batch(emqx_ds:db(), [emqx_types:message()]) -> +-spec shards_of_batch(emqx_ds:db(), [emqx_ds:operation()]) -> [{_ShardId, {NMessages, NBytes}}] when NMessages :: non_neg_integer(), NBytes :: non_neg_integer(). -shards_of_batch(DB, Messages) -> +shards_of_batch(DB, Batch) -> maps:to_list( lists:foldl( - fun(Message, Acc) -> + fun(Operation, Acc) -> %% TODO: sharding strategy must be part of the DS DB schema: - Shard = shard_of_message(DB, Message, clientid), - Size = payload_size(Message), + Shard = shard_of_operation(DB, Operation, clientid), + Size = payload_size(Operation), maps:update_with( Shard, fun({N, S}) -> @@ -358,36 +348,35 @@ shards_of_batch(DB, Messages) -> ) end, #{}, - Messages + Batch ) ). -repackage_messages(DB, Messages, Sync) -> +repackage_messages(DB, Batch, Sync) -> Batches = lists:foldl( - fun(Message, Acc) -> - Shard = shard_of_message(DB, Message, clientid), - Size = payload_size(Message), + fun(Operation, Acc) -> + Shard = shard_of_operation(DB, Operation, clientid), + Size = payload_size(Operation), maps:update_with( Shard, fun({N, S, Msgs}) -> - {N + 1, S + Size, [Message | Msgs]} + {N + 1, S + Size, [Operation | Msgs]} end, - {1, Size, [Message]}, + {1, Size, [Operation]}, Acc ) end, #{}, - Messages + Batch ), maps:fold( - fun(Shard, {NMsgs, ByteSize, RevMessages}, ErrAcc) -> + fun(Shard, {NOps, ByteSize, RevOperations}, ErrAcc) -> Err = enqueue_call_or_cast( ?via(DB, Shard), #enqueue_req{ - messages = lists:reverse(RevMessages), + operations = lists:reverse(RevOperations), sync = Sync, - atomic = false, - n_messages = NMsgs, + n_operations = NOps, payload_bytes = ByteSize } ), @@ -427,4 +416,6 @@ cancel_timer(S = #s{tref = TRef}) -> %% @doc Return approximate size of the MQTT message (it doesn't take %% all things into account, for example headers and extras) payload_size(#message{payload = P, topic = T}) -> - size(P) + size(T). + size(P) + size(T); +payload_size({_OpName, _}) -> + 0. diff --git a/apps/emqx_durable_storage/test/emqx_ds_test_helpers.erl b/apps/emqx_durable_storage/test/emqx_ds_test_helpers.erl index 08c08e0c5..cbf38bb62 100644 --- a/apps/emqx_durable_storage/test/emqx_ds_test_helpers.erl +++ b/apps/emqx_durable_storage/test/emqx_ds_test_helpers.erl @@ -377,7 +377,7 @@ nodes_of_clientid(DB, ClientId, Nodes = [N0 | _]) -> shard_of_clientid(DB, Node, ClientId) -> ?ON( Node, - emqx_ds_buffer:shard_of_message(DB, #message{from = ClientId}, clientid) + emqx_ds_buffer:shard_of_operation(DB, #message{from = ClientId}, clientid) ). %% Consume eagerly: From 5356d678cc33b74f64f15a233e0c345436f693bc Mon Sep 17 00:00:00 2001 From: Andrew Mayorov Date: Tue, 30 Jul 2024 16:55:38 +0200 Subject: [PATCH 39/50] feat(dsraft): support atomic batches + preconditions --- .../src/emqx_ds_replication_layer.erl | 145 ++++++++++++++---- .../src/emqx_ds_replication_layer.hrl | 3 +- apps/emqx_durable_storage/src/emqx_ds.erl | 5 +- .../src/emqx_ds_storage_layer.erl | 65 +++++--- .../src/emqx_ds_storage_reference.erl | 54 +++++-- 5 files changed, 207 insertions(+), 65 deletions(-) diff --git a/apps/emqx_ds_builtin_raft/src/emqx_ds_replication_layer.erl b/apps/emqx_ds_builtin_raft/src/emqx_ds_replication_layer.erl index f54981884..3430a3bda 100644 --- a/apps/emqx_ds_builtin_raft/src/emqx_ds_replication_layer.erl +++ b/apps/emqx_ds_builtin_raft/src/emqx_ds_replication_layer.erl @@ -83,6 +83,7 @@ ra_state/0 ]). +-include_lib("emqx_durable_storage/include/emqx_ds.hrl"). -include_lib("emqx_utils/include/emqx_message.hrl"). -include_lib("snabbkaffe/include/trace.hrl"). -include("emqx_ds_replication_layer.hrl"). @@ -135,11 +136,12 @@ ?enc := emqx_ds_storage_layer:delete_iterator() }. -%% TODO: this type is obsolete and is kept only for compatibility with -%% BPAPIs. Remove it when emqx_ds_proto_v4 is gone (EMQX 5.6) +%% Write batch. +%% Instances of this type currently form the mojority of the Raft log. -type batch() :: #{ ?tag := ?BATCH, - ?batch_messages := [emqx_types:message()] + ?batch_operations := [emqx_ds:operation()], + ?batch_preconditions => [emqx_ds:precondition()] }. -type generation_rank() :: {shard_id(), term()}. @@ -240,16 +242,45 @@ drop_db(DB) -> _ = emqx_ds_proto_v4:drop_db(list_nodes(), DB), emqx_ds_replication_layer_meta:drop_db(DB). --spec store_batch(emqx_ds:db(), [emqx_types:message(), ...], emqx_ds:message_store_opts()) -> +-spec store_batch(emqx_ds:db(), emqx_ds:batch(), emqx_ds:message_store_opts()) -> emqx_ds:store_batch_result(). -store_batch(DB, Messages, Opts) -> +store_batch(DB, Batch = #dsbatch{preconditions = [_ | _]}, Opts) -> + %% NOTE: Atomic batch is implied, will not check with DB config. + store_batch_atomic(DB, Batch, Opts); +store_batch(DB, Batch, Opts) -> + case emqx_ds_replication_layer_meta:db_config(DB) of + #{atomic_batches := true} -> + store_batch_atomic(DB, Batch, Opts); + #{} -> + store_batch_buffered(DB, Batch, Opts) + end. + +store_batch_buffered(DB, #dsbatch{operations = Operations}, Opts) -> + store_batch_buffered(DB, Operations, Opts); +store_batch_buffered(DB, Batch, Opts) -> try - emqx_ds_buffer:store_batch(DB, Messages, Opts) + emqx_ds_buffer:store_batch(DB, Batch, Opts) catch error:{Reason, _Call} when Reason == timeout; Reason == noproc -> {error, recoverable, Reason} end. +store_batch_atomic(DB, Batch, _Opts) -> + Shards = shards_of_batch(DB, Batch), + case Shards of + [Shard] -> + case ra_store_batch(DB, Shard, Batch) of + {timeout, ServerId} -> + {error, recoverable, {timeout, ServerId}}; + Result -> + Result + end; + [] -> + ok; + [_ | _] -> + {error, unrecoverable, nonatomic_batch_spans_multiple_storages} + end. + -spec get_streams(emqx_ds:db(), emqx_ds:topic_filter(), emqx_ds:time()) -> [{emqx_ds:stream_rank(), stream()}]. get_streams(DB, TopicFilter, StartTime) -> @@ -418,6 +449,23 @@ shard_of_key(DB, Key) -> Hash = erlang:phash2(Key, N), integer_to_binary(Hash). +shards_of_batch(DB, #dsbatch{operations = Operations, preconditions = Preconditions}) -> + shards_of_batch(DB, Preconditions, shards_of_batch(DB, Operations, [])); +shards_of_batch(DB, Operations) -> + shards_of_batch(DB, Operations, []). + +shards_of_batch(DB, [Operation | Rest], Acc) -> + case shard_of_operation(DB, Operation, clientid, #{}) of + Shard when Shard =:= hd(Acc) -> + shards_of_batch(DB, Rest, Acc); + Shard when Acc =:= [] -> + shards_of_batch(DB, Rest, [Shard]); + ShardAnother -> + [ShardAnother | Acc] + end; +shards_of_batch(_DB, [], Acc) -> + Acc. + %%================================================================================ %% Internal exports (RPC targets) %%================================================================================ @@ -639,13 +687,22 @@ list_nodes() -> end ). --spec ra_store_batch(emqx_ds:db(), emqx_ds_replication_layer:shard_id(), [emqx_types:message()]) -> - ok | {timeout, _} | {error, recoverable | unrecoverable, _Err}. -ra_store_batch(DB, Shard, Messages) -> - Command = #{ - ?tag => ?BATCH, - ?batch_messages => Messages - }, +-spec ra_store_batch(emqx_ds:db(), emqx_ds_replication_layer:shard_id(), emqx_ds:batch()) -> + ok | {timeout, _} | emqx_ds:error(_). +ra_store_batch(DB, Shard, Batch) -> + case Batch of + #dsbatch{operations = Operations, preconditions = Preconditions} -> + Command = #{ + ?tag => ?BATCH, + ?batch_operations => Operations, + ?batch_preconditions => Preconditions + }; + Operations -> + Command = #{ + ?tag => ?BATCH, + ?batch_operations => Operations + } + end, Servers = emqx_ds_replication_layer_shard:servers(DB, Shard, leader_preferred), case emqx_ds_replication_layer_shard:process_command(Servers, Command, ?RA_TIMEOUT) of {ok, Result, _Leader} -> @@ -782,6 +839,7 @@ ra_drop_shard(DB, Shard) -> -define(pd_ra_idx_need_release, '$emqx_ds_raft_idx_need_release'). -define(pd_ra_bytes_need_release, '$emqx_ds_raft_bytes_need_release'). +-define(pd_ra_force_monotonic, '$emqx_ds_raft_force_monotonic'). -spec init(_Args :: map()) -> ra_state(). init(#{db := DB, shard := Shard}) -> @@ -791,18 +849,30 @@ init(#{db := DB, shard := Shard}) -> {ra_state(), _Reply, _Effects}. apply( RaftMeta, - #{ + Command = #{ ?tag := ?BATCH, - ?batch_messages := MessagesIn + ?batch_operations := OperationsIn }, #{db_shard := DBShard = {DB, Shard}, latest := Latest0} = State0 ) -> - ?tp(ds_ra_apply_batch, #{db => DB, shard => Shard, batch => MessagesIn, latest => Latest0}), - {Stats, Latest, Messages} = assign_timestamps(Latest0, MessagesIn), - Result = emqx_ds_storage_layer:store_batch(DBShard, Messages, #{durable => false}), - State = State0#{latest := Latest}, - set_ts(DBShard, Latest), - Effects = try_release_log(Stats, RaftMeta, State), + ?tp(ds_ra_apply_batch, #{db => DB, shard => Shard, batch => OperationsIn, latest => Latest0}), + Preconditions = maps:get(?batch_preconditions, Command, []), + {Stats, Latest, Operations} = assign_timestamps(DB, Latest0, OperationsIn), + %% FIXME + case emqx_ds_precondition:verify(emqx_ds_storage_layer, DBShard, Preconditions) of + ok -> + Result = emqx_ds_storage_layer:store_batch(DBShard, Operations, #{durable => false}), + State = State0#{latest := Latest}, + set_ts(DBShard, Latest), + Effects = try_release_log(Stats, RaftMeta, State); + PreconditionFailed = {precondition_failed, _} -> + Result = {error, unrecoverable, PreconditionFailed}, + State = State0, + Effects = []; + Result -> + State = State0, + Effects = [] + end, Effects =/= [] andalso ?tp(ds_ra_effects, #{effects => Effects, meta => RaftMeta}), {State, Result, Effects}; apply( @@ -877,6 +947,21 @@ apply( Effects = handle_custom_event(DBShard, Latest, CustomEvent), {State#{latest => Latest}, ok, Effects}. +assign_timestamps(DB, Latest, Messages) -> + ForceMonotonic = force_monotonic_timestamps(DB), + assign_timestamps(ForceMonotonic, Latest, Messages, [], 0, 0). + +force_monotonic_timestamps(DB) -> + case erlang:get(?pd_ra_force_monotonic) of + undefined -> + DBConfig = emqx_ds_replication_layer_meta:db_config(DB), + Flag = maps:get(force_monotonic_timestamps, DBConfig), + erlang:put(?pd_ra_force_monotonic, Flag); + Flag -> + ok + end, + Flag. + try_release_log({_N, BatchSize}, RaftMeta = #{index := CurrentIdx}, State) -> %% NOTE %% Because cursor release means storage flush (see @@ -939,10 +1024,7 @@ tick(TimeMs, #{db_shard := DBShard = {DB, Shard}, latest := Latest}) -> ?tp(emqx_ds_replication_layer_tick, #{db => DB, shard => Shard, timestamp => Timestamp}), handle_custom_event(DBShard, Timestamp, tick). -assign_timestamps(Latest, Messages) -> - assign_timestamps(Latest, Messages, [], 0, 0). - -assign_timestamps(Latest0, [Message0 | Rest], Acc, N, Sz) -> +assign_timestamps(true, Latest0, [Message0 = #message{} | Rest], Acc, N, Sz) -> case emqx_message:timestamp(Message0, microsecond) of TimestampUs when TimestampUs > Latest0 -> Latest = TimestampUs, @@ -951,8 +1033,17 @@ assign_timestamps(Latest0, [Message0 | Rest], Acc, N, Sz) -> Latest = Latest0 + 1, Message = assign_timestamp(Latest, Message0) end, - assign_timestamps(Latest, Rest, [Message | Acc], N + 1, Sz + approx_message_size(Message0)); -assign_timestamps(Latest, [], Acc, N, Size) -> + MSize = approx_message_size(Message0), + assign_timestamps(true, Latest, Rest, [Message | Acc], N + 1, Sz + MSize); +assign_timestamps(false, Latest0, [Message0 = #message{} | Rest], Acc, N, Sz) -> + Timestamp = emqx_message:timestamp(Message0), + Latest = max(Latest0, Timestamp), + Message = assign_timestamp(Timestamp, Message0), + MSize = approx_message_size(Message0), + assign_timestamps(false, Latest, Rest, [Message | Acc], N + 1, Sz + MSize); +assign_timestamps(ForceMonotonic, Latest, [Operation | Rest], Acc, N, Sz) -> + assign_timestamps(ForceMonotonic, Latest, Rest, [Operation | Acc], N + 1, Sz); +assign_timestamps(_ForceMonotonic, Latest, [], Acc, N, Size) -> {{N, Size}, Latest, lists:reverse(Acc)}. assign_timestamp(TimestampUs, Message) -> diff --git a/apps/emqx_ds_builtin_raft/src/emqx_ds_replication_layer.hrl b/apps/emqx_ds_builtin_raft/src/emqx_ds_replication_layer.hrl index f33090c46..c87e9bcba 100644 --- a/apps/emqx_ds_builtin_raft/src/emqx_ds_replication_layer.hrl +++ b/apps/emqx_ds_builtin_raft/src/emqx_ds_replication_layer.hrl @@ -19,7 +19,8 @@ -define(enc, 3). %% ?BATCH --define(batch_messages, 2). +-define(batch_operations, 2). +-define(batch_preconditions, 4). -define(timestamp, 3). %% add_generation / update_config diff --git a/apps/emqx_durable_storage/src/emqx_ds.erl b/apps/emqx_durable_storage/src/emqx_ds.erl index 69de92325..0e4336a26 100644 --- a/apps/emqx_durable_storage/src/emqx_ds.erl +++ b/apps/emqx_durable_storage/src/emqx_ds.erl @@ -56,6 +56,7 @@ topic/0, batch/0, operation/0, + deletion/0, precondition/0, stream/0, delete_stream/0, @@ -110,7 +111,9 @@ message() %% Delete a message. %% Does nothing if the message does not exist. - | {delete, message_matcher('_')}. + | deletion(). + +-type deletion() :: {delete, message_matcher('_')}. %% Precondition. %% Fails whole batch if the storage already has the matching message (`if_exists'), diff --git a/apps/emqx_durable_storage/src/emqx_ds_storage_layer.erl b/apps/emqx_durable_storage/src/emqx_ds_storage_layer.erl index d6250254d..ac2edc254 100644 --- a/apps/emqx_durable_storage/src/emqx_ds_storage_layer.erl +++ b/apps/emqx_durable_storage/src/emqx_ds_storage_layer.erl @@ -37,6 +37,9 @@ next/4, delete_next/5, + %% Preconditions + lookup_message/2, + %% Generations update_config/3, add_generation/2, @@ -74,6 +77,7 @@ batch_store_opts/0 ]). +-include("emqx_ds.hrl"). -include_lib("snabbkaffe/include/snabbkaffe.hrl"). -define(REF(ShardId), {via, gproc, {n, l, {?MODULE, ShardId}}}). @@ -115,6 +119,11 @@ -type gen_id() :: 0..16#ffff. +-type batch() :: [ + {emqx_ds:time(), emqx_types:message()} + | emqx_ds:deletion() +]. + %% Options affecting how batches should be stored. %% See also: `emqx_ds:message_store_opts()'. -type batch_store_opts() :: @@ -294,6 +303,10 @@ | {ok, end_of_stream} | emqx_ds:error(_). +%% Lookup a single message, for preconditions to work. +-callback lookup_message(shard_id(), generation_data(), emqx_ds_precondition:matcher()) -> + emqx_types:message() | not_found | emqx_ds:error(_). + -callback handle_event(shard_id(), generation_data(), emqx_ds:time(), CustomEvent | tick) -> [CustomEvent]. @@ -317,14 +330,10 @@ drop_shard(Shard) -> %% @doc This is a convenicence wrapper that combines `prepare' and %% `commit' operations. --spec store_batch( - shard_id(), - [{emqx_ds:time(), emqx_types:message()}], - batch_store_opts() -) -> +-spec store_batch(shard_id(), batch(), batch_store_opts()) -> emqx_ds:store_batch_result(). -store_batch(Shard, Messages, Options) -> - case prepare_batch(Shard, Messages, #{}) of +store_batch(Shard, Batch, Options) -> + case prepare_batch(Shard, Batch, #{}) of {ok, CookedBatch} -> commit_batch(Shard, CookedBatch, Options); ignore -> @@ -342,23 +351,21 @@ store_batch(Shard, Messages, Options) -> %% %% The underlying storage layout MAY use timestamp as a unique message %% ID. --spec prepare_batch( - shard_id(), - [{emqx_ds:time(), emqx_types:message()}], - batch_prepare_opts() -) -> {ok, cooked_batch()} | ignore | emqx_ds:error(_). -prepare_batch(Shard, Messages = [{Time, _} | _], Options) -> +-spec prepare_batch(shard_id(), batch(), batch_prepare_opts()) -> + {ok, cooked_batch()} | ignore | emqx_ds:error(_). +prepare_batch(Shard, Batch, Options) -> %% NOTE %% We assume that batches do not span generations. Callers should enforce this. ?tp(emqx_ds_storage_layer_prepare_batch, #{ - shard => Shard, messages => Messages, options => Options + shard => Shard, batch => Batch, options => Options }), %% FIXME: always store messages in the current generation - case generation_at(Shard, Time) of + Time = batch_starts_at(Batch), + case is_integer(Time) andalso generation_at(Shard, Time) of {GenId, #{module := Mod, data := GenData}} -> T0 = erlang:monotonic_time(microsecond), Result = - case Mod:prepare_batch(Shard, GenData, Messages, Options) of + case Mod:prepare_batch(Shard, GenData, Batch, Options) of {ok, CookedBatch} -> {ok, #{?tag => ?COOKED_BATCH, ?generation => GenId, ?enc => CookedBatch}}; Error = {error, _, _} -> @@ -368,11 +375,21 @@ prepare_batch(Shard, Messages = [{Time, _} | _], Options) -> %% TODO store->prepare emqx_ds_builtin_metrics:observe_store_batch_time(Shard, T1 - T0), Result; + false -> + %% No write operations in this batch. + ignore; not_found -> + %% Generation is likely already GCed. ignore - end; -prepare_batch(_Shard, [], _Options) -> - ignore. + end. + +-spec batch_starts_at(batch()) -> emqx_ds:time() | undefined. +batch_starts_at([{Time, _Message} | _]) when is_integer(Time) -> + Time; +batch_starts_at([{delete, #message_matcher{timestamp = Time}} | _]) -> + Time; +batch_starts_at([]) -> + undefined. %% @doc Commit cooked batch to the storage. %% @@ -559,6 +576,16 @@ update_config(ShardId, Since, Options) -> add_generation(ShardId, Since) -> gen_server:call(?REF(ShardId), #call_add_generation{since = Since}, infinity). +-spec lookup_message(shard_id(), emqx_ds_precondition:matcher()) -> + emqx_types:message() | not_found | emqx_ds:error(_). +lookup_message(ShardId, Matcher = #message_matcher{timestamp = Time}) -> + case generation_at(ShardId, Time) of + {_GenId, #{module := Mod, data := GenData}} -> + Mod:lookup_message(ShardId, GenData, Matcher); + not_found -> + not_found + end. + -spec list_generations_with_lifetimes(shard_id()) -> #{ gen_id() => #{ diff --git a/apps/emqx_durable_storage/src/emqx_ds_storage_reference.erl b/apps/emqx_durable_storage/src/emqx_ds_storage_reference.erl index cfd6f30ac..869602fcd 100644 --- a/apps/emqx_durable_storage/src/emqx_ds_storage_reference.erl +++ b/apps/emqx_durable_storage/src/emqx_ds_storage_reference.erl @@ -21,6 +21,8 @@ %% used for testing. -module(emqx_ds_storage_reference). +-include("emqx_ds.hrl"). + -behaviour(emqx_ds_storage_layer). %% API: @@ -39,7 +41,8 @@ make_delete_iterator/5, update_iterator/4, next/6, - delete_next/7 + delete_next/7, + lookup_message/3 ]). %% internal exports: @@ -49,6 +52,8 @@ -include_lib("emqx_utils/include/emqx_message.hrl"). +-define(DB_KEY(TIMESTAMP), <>). + %%================================================================================ %% Type declarations %%================================================================================ @@ -102,23 +107,22 @@ drop(_ShardId, DBHandle, _GenId, _CFRefs, #s{cf = CFHandle}) -> ok = rocksdb:drop_column_family(DBHandle, CFHandle), ok. -prepare_batch(_ShardId, _Data, Messages, _Options) -> - {ok, Messages}. +prepare_batch(_ShardId, _Data, Batch, _Options) -> + {ok, Batch}. -commit_batch(_ShardId, #s{db = DB, cf = CF}, Messages, Options) -> - {ok, Batch} = rocksdb:batch(), - lists:foreach( - fun({TS, Msg}) -> - Key = <>, - Val = term_to_binary(Msg), - rocksdb:batch_put(Batch, CF, Key, Val) - end, - Messages - ), - Res = rocksdb:write_batch(DB, Batch, write_batch_opts(Options)), - rocksdb:release_batch(Batch), +commit_batch(_ShardId, S = #s{db = DB}, Batch, Options) -> + {ok, BatchHandle} = rocksdb:batch(), + lists:foreach(fun(Op) -> process_batch_operation(S, Op, BatchHandle) end, Batch), + Res = rocksdb:write_batch(DB, BatchHandle, write_batch_opts(Options)), + rocksdb:release_batch(BatchHandle), Res. +process_batch_operation(S, {TS, Msg = #message{}}, BatchHandle) -> + Val = encode_message(Msg), + rocksdb:batch_put(BatchHandle, S#s.cf, ?DB_KEY(TS), Val); +process_batch_operation(S, {delete, #message_matcher{timestamp = TS}}, BatchHandle) -> + rocksdb:batch_delete(BatchHandle, S#s.cf, ?DB_KEY(TS)). + get_streams(_Shard, _Data, _TopicFilter, _StartTime) -> [#stream{}]. @@ -205,6 +209,16 @@ delete_next(_Shard, #s{db = DB, cf = CF}, It0, Selector, BatchSize, _Now, IsCurr {ok, It, NumDeleted, NumIterated} end. +lookup_message(_ShardId, #s{db = DB, cf = CF}, #message_matcher{timestamp = TS}) -> + case rocksdb:get(DB, CF, ?DB_KEY(TS), _ReadOpts = []) of + {ok, Val} -> + decode_message(Val); + not_found -> + not_found; + {error, Reason} -> + {error, unrecoverable, Reason} + end. + %%================================================================================ %% Internal functions %%================================================================================ @@ -214,7 +228,7 @@ do_next(_, _, _, _, 0, Key, Acc) -> do_next(TopicFilter, StartTime, IT, Action, NLeft, Key0, Acc) -> case rocksdb:iterator_move(IT, Action) of {ok, Key = <>, Blob} -> - Msg = #message{topic = Topic} = binary_to_term(Blob), + Msg = #message{topic = Topic} = decode_message(Blob), TopicWords = emqx_topic:words(Topic), case emqx_topic:match(TopicWords, TopicFilter) andalso TS >= StartTime of true -> @@ -234,7 +248,7 @@ do_delete_next( ) -> case rocksdb:iterator_move(IT, Action) of {ok, Key, Blob} -> - Msg = #message{topic = Topic, timestamp = TS} = binary_to_term(Blob), + Msg = #message{topic = Topic, timestamp = TS} = decode_message(Blob), TopicWords = emqx_topic:words(Topic), case emqx_topic:match(TopicWords, TopicFilter) andalso TS >= StartTime of true -> @@ -285,6 +299,12 @@ do_delete_next( {Key0, {AccDel, AccIter}} end. +encode_message(Msg) -> + term_to_binary(Msg). + +decode_message(Val) -> + binary_to_term(Val). + %% @doc Generate a column family ID for the MQTT messages -spec data_cf(emqx_ds_storage_layer:gen_id()) -> [char()]. data_cf(GenId) -> From 68990f1538dc0b450b6e63fb8a412498fe21e935 Mon Sep 17 00:00:00 2001 From: Andrew Mayorov Date: Tue, 30 Jul 2024 16:56:29 +0200 Subject: [PATCH 40/50] feat(ds): support operations + preconditions in skipstream-lts --- .../src/emqx_ds_storage_skipstream_lts.erl | 104 ++++++++++++++---- 1 file changed, 82 insertions(+), 22 deletions(-) diff --git a/apps/emqx_durable_storage/src/emqx_ds_storage_skipstream_lts.erl b/apps/emqx_durable_storage/src/emqx_ds_storage_skipstream_lts.erl index cb87b8a6f..b466983b7 100644 --- a/apps/emqx_durable_storage/src/emqx_ds_storage_skipstream_lts.erl +++ b/apps/emqx_durable_storage/src/emqx_ds_storage_skipstream_lts.erl @@ -33,7 +33,8 @@ make_delete_iterator/5, update_iterator/4, next/6, - delete_next/7 + delete_next/7, + lookup_message/3 ]). %% internal exports: @@ -43,6 +44,7 @@ -include_lib("emqx_utils/include/emqx_message.hrl"). -include_lib("snabbkaffe/include/trace.hrl"). +-include("emqx_ds.hrl"). -include("emqx_ds_metrics.hrl"). -ifdef(TEST). @@ -56,11 +58,12 @@ %%================================================================================ %% TLOG entry -%% keys: --define(cooked_payloads, 6). +%% Keys: +-define(cooked_msg_ops, 6). -define(cooked_lts_ops, 7). %% Payload: --define(cooked_payload(TIMESTAMP, STATIC, VARYING, VALUE), +-define(cooked_delete, 100). +-define(cooked_msg_op(TIMESTAMP, STATIC, VARYING, VALUE), {TIMESTAMP, STATIC, VARYING, VALUE} ). @@ -176,25 +179,39 @@ drop(_ShardId, DBHandle, _GenId, _CFRefs, #s{data_cf = DataCF, trie_cf = TrieCF, ok = rocksdb:drop_column_family(DBHandle, TrieCF), ok. -prepare_batch(_ShardId, S = #s{trie = Trie}, Messages, _Options) -> +prepare_batch( + _ShardId, + S = #s{trie = Trie}, + Operations, + _Options +) -> _ = erase(?lts_persist_ops), - Payloads = [ - begin - Tokens = words(Topic), - {Static, Varying} = emqx_ds_lts:topic_key(Trie, fun threshold_fun/1, Tokens), - ?cooked_payload(Timestamp, Static, Varying, serialize(S, Varying, Msg)) - end - || {Timestamp, Msg = #message{topic = Topic}} <- Messages - ], + OperationsCooked = emqx_utils:flattermap( + fun + ({Timestamp, Msg = #message{topic = Topic}}) -> + Tokens = words(Topic), + {Static, Varying} = emqx_ds_lts:topic_key(Trie, fun threshold_fun/1, Tokens), + ?cooked_msg_op(Timestamp, Static, Varying, serialize(S, Varying, Msg)); + ({delete, #message_matcher{topic = Topic, timestamp = Timestamp}}) -> + case emqx_ds_lts:lookup_topic_key(Trie, words(Topic)) of + {ok, {Static, Varying}} -> + ?cooked_msg_op(Timestamp, Static, Varying, ?cooked_delete); + undefined -> + %% Topic is unknown, nothing to delete. + [] + end + end, + Operations + ), {ok, #{ - ?cooked_payloads => Payloads, + ?cooked_msg_ops => OperationsCooked, ?cooked_lts_ops => pop_lts_persist_ops() }}. commit_batch( _ShardId, #s{db = DB, trie_cf = TrieCF, data_cf = DataCF, trie = Trie, hash_bytes = HashBytes}, - #{?cooked_lts_ops := LtsOps, ?cooked_payloads := Payloads}, + #{?cooked_lts_ops := LtsOps, ?cooked_msg_ops := Operations}, Options ) -> {ok, Batch} = rocksdb:batch(), @@ -210,12 +227,17 @@ commit_batch( _ = emqx_ds_lts:trie_update(Trie, LtsOps), %% Commit payloads: lists:foreach( - fun(?cooked_payload(Timestamp, Static, Varying, ValBlob)) -> - MasterKey = mk_key(Static, 0, <<>>, Timestamp), - ok = rocksdb:batch_put(Batch, DataCF, MasterKey, ValBlob), - mk_index(Batch, DataCF, HashBytes, Static, Varying, Timestamp) + fun + (?cooked_msg_op(Timestamp, Static, Varying, ValBlob = <<_/bytes>>)) -> + MasterKey = mk_key(Static, 0, <<>>, Timestamp), + ok = rocksdb:batch_put(Batch, DataCF, MasterKey, ValBlob), + mk_index(Batch, DataCF, HashBytes, Static, Varying, Timestamp); + (?cooked_msg_op(Timestamp, Static, Varying, ?cooked_delete)) -> + MasterKey = mk_key(Static, 0, <<>>, Timestamp), + ok = rocksdb:batch_delete(Batch, DataCF, MasterKey), + delete_index(Batch, DataCF, HashBytes, Static, Varying, Timestamp) end, - Payloads + Operations ), Result = rocksdb:write_batch(DB, Batch, [ {disable_wal, not maps:get(durable, Options, true)} @@ -285,6 +307,28 @@ delete_next(Shard, S, It0, Selector, BatchSize, Now, IsCurrent) -> Ret end. +lookup_message( + Shard, + S = #s{db = DB, data_cf = CF, trie = Trie}, + #message_matcher{topic = Topic, timestamp = Timestamp} +) -> + case emqx_ds_lts:lookup_topic_key(Trie, words(Topic)) of + {ok, {StaticIdx, _Varying}} -> + DSKey = mk_key(StaticIdx, 0, <<>>, Timestamp), + case rocksdb:get(DB, CF, DSKey, _ReadOpts = []) of + {ok, Val} -> + {ok, TopicStructure} = emqx_ds_lts:reverse_lookup(Trie, StaticIdx), + Msg = deserialize(S, Val), + enrich(Shard, S, TopicStructure, DSKey, Msg); + not_found -> + not_found; + {error, Reason} -> + {error, unrecoverable, Reason} + end; + undefined -> + not_found + end. + %%================================================================================ %% Internal exports %%================================================================================ @@ -330,12 +374,18 @@ serialize(#s{serialization_schema = SSchema, with_guid = WithGuid}, Varying, Msg }, emqx_ds_msg_serializer:serialize(SSchema, Msg). +enrich(#ctx{shard = Shard, s = S, topic_structure = TopicStructure}, DSKey, Msg0) -> + enrich(Shard, S, TopicStructure, DSKey, Msg0). + enrich( - #ctx{shard = Shard, topic_structure = Structure, s = #s{with_guid = WithGuid}}, + Shard, + #s{with_guid = WithGuid}, + TopicStructure, DSKey, Msg0 ) -> - Topic = emqx_topic:join(emqx_ds_lts:decompress_topic(Structure, words(Msg0#message.topic))), + Tokens = words(Msg0#message.topic), + Topic = emqx_topic:join(emqx_ds_lts:decompress_topic(TopicStructure, Tokens)), Msg0#message{ topic = Topic, id = @@ -584,6 +634,16 @@ mk_index(Batch, CF, HashBytes, Static, Timestamp, N, [TopicLevel | Varying]) -> mk_index(_Batch, _CF, _HashBytes, _Static, _Timestamp, _N, []) -> ok. +delete_index(Batch, CF, HashBytes, Static, Varying, Timestamp) -> + delete_index(Batch, CF, HashBytes, Static, Timestamp, 1, Varying). + +delete_index(Batch, CF, HashBytes, Static, Timestamp, N, [TopicLevel | Varying]) -> + Key = mk_key(Static, N, hash(HashBytes, TopicLevel), Timestamp), + ok = rocksdb:batch_delete(Batch, CF, Key), + delete_index(Batch, CF, HashBytes, Static, Timestamp, N + 1, Varying); +delete_index(_Batch, _CF, _HashBytes, _Static, _Timestamp, _N, []) -> + ok. + %%%%%%%% Keys %%%%%%%%%% get_key_range(StaticIdx, WildcardIdx, Hash) -> From 1559aac48606c2ed33623b5210b290c3d32b049a Mon Sep 17 00:00:00 2001 From: Andrew Mayorov Date: Tue, 30 Jul 2024 16:57:21 +0200 Subject: [PATCH 41/50] test(dsbackend): add shared tests for atomic batches + preconditions --- .../test/emqx_ds_backends_SUITE.erl | 164 ++++++++++++++++-- 1 file changed, 150 insertions(+), 14 deletions(-) diff --git a/apps/emqx_ds_backends/test/emqx_ds_backends_SUITE.erl b/apps/emqx_ds_backends/test/emqx_ds_backends_SUITE.erl index 1fc1594fc..e70d2b682 100644 --- a/apps/emqx_ds_backends/test/emqx_ds_backends_SUITE.erl +++ b/apps/emqx_ds_backends/test/emqx_ds_backends_SUITE.erl @@ -19,6 +19,7 @@ -compile(nowarn_export_all). -include("../../emqx/include/emqx.hrl"). +-include("../../emqx_durable_storage/include/emqx_ds.hrl"). -include_lib("common_test/include/ct.hrl"). -include_lib("stdlib/include/assert.hrl"). -include("../../emqx/include/asserts.hrl"). @@ -145,7 +146,7 @@ t_06_smoke_add_generation(Config) -> ?assertMatch(ok, emqx_ds:add_generation(DB)), [ {Gen1, #{created_at := Created1, since := Since1, until := Until1}}, - {Gen2, #{created_at := Created2, since := Since2, until := undefined}} + {_Gen2, #{created_at := Created2, since := Since2, until := undefined}} ] = maps:to_list(emqx_ds:list_generations_with_lifetimes(DB)), %% Check units of the return values (+/- 10s from test begin time): ?give_or_take(BeginTime, 10_000, Created1), @@ -234,8 +235,8 @@ t_09_atomic_store_batch(Config) -> DB = ?FUNCTION_NAME, ?check_trace( begin - application:set_env(emqx_durable_storage, egress_batch_size, 1), - ?assertMatch(ok, emqx_ds:open_db(DB, opts(Config))), + DBOpts = (opts(Config))#{atomic_batches => true}, + ?assertMatch(ok, emqx_ds:open_db(DB, DBOpts)), Msgs = [ message(<<"1">>, <<"1">>, 0), message(<<"2">>, <<"2">>, 1), @@ -243,13 +244,8 @@ t_09_atomic_store_batch(Config) -> ], ?assertEqual( ok, - emqx_ds:store_batch(DB, Msgs, #{ - atomic => true, - sync => true - }) - ), - {ok, Flush} = ?block_until(#{?snk_kind := emqx_ds_buffer_flush}), - ?assertMatch(#{batch := [_, _, _]}, Flush) + emqx_ds:store_batch(DB, Msgs, #{sync => true}) + ) end, [] ), @@ -289,6 +285,124 @@ t_10_non_atomic_store_batch(Config) -> ), ok. +t_11_batch_preconditions(Config) -> + DB = ?FUNCTION_NAME, + ?check_trace( + begin + DBOpts = (opts(Config))#{ + atomic_batches => true, + force_monotonic_timestamps => false + }, + ?assertMatch(ok, emqx_ds:open_db(DB, DBOpts)), + + %% Conditional delete + TS = 42, + Batch1 = #dsbatch{ + preconditions = [{if_exists, matcher(<<"c1">>, <<"t/a">>, '_', TS)}], + operations = [{delete, matcher(<<"c1">>, <<"t/a">>, '_', TS)}] + }, + %% Conditional insert + M1 = message(<<"c1">>, <<"t/a">>, <<"M1">>, TS), + Batch2 = #dsbatch{ + preconditions = [{unless_exists, matcher(<<"c1">>, <<"t/a">>, '_', TS)}], + operations = [M1] + }, + + %% No such message yet, precondition fails: + ?assertEqual( + {error, unrecoverable, {precondition_failed, not_found}}, + emqx_ds:store_batch(DB, Batch1) + ), + %% No such message yet, `unless` precondition holds: + ?assertEqual( + ok, + emqx_ds:store_batch(DB, Batch2) + ), + %% Now there's such message, `unless` precondition now fails: + ?assertEqual( + {error, unrecoverable, {precondition_failed, M1}}, + emqx_ds:store_batch(DB, Batch2) + ), + %% On the other hand, `if` precondition now holds: + ?assertEqual( + ok, + emqx_ds:store_batch(DB, Batch1) + ) + end, + fun(_Trace) -> + %% Wait at least until current epoch ends. + ct:sleep(1000), + %% There's no messages in the DB. + ?assertEqual( + [], + emqx_ds_test_helpers:consume(DB, emqx_topic:words(<<"t/#">>)) + ) + end + ). + +t_12_batch_precondition_conflicts(Config) -> + DB = ?FUNCTION_NAME, + NBatches = 50, + NMessages = 10, + ?check_trace( + begin + DBOpts = (opts(Config))#{ + atomic_batches => true, + force_monotonic_timestamps => false + }, + ?assertMatch(ok, emqx_ds:open_db(DB, DBOpts)), + + ConflictBatches = [ + #dsbatch{ + %% If the slot is free... + preconditions = [{if_exists, matcher(<<"c1">>, <<"t/slot">>, _Free = <<>>, 0)}], + %% Take it and write NMessages extra messages, so that batches take longer to + %% process and have higher chances to conflict with each other. + operations = + [ + message(<<"c1">>, <<"t/slot">>, integer_to_binary(I), _TS = 0) + | [ + message(<<"c1">>, {"t/owner/~p/~p", [I, J]}, <<>>, I * 100 + J) + || J <- lists:seq(1, NMessages) + ] + ] + } + || I <- lists:seq(1, NBatches) + ], + + %% Run those batches concurrently. + ok = emqx_ds:store_batch(DB, [message(<<"c1">>, <<"t/slot">>, <<>>, 0)]), + Results = emqx_utils:pmap( + fun(B) -> emqx_ds:store_batch(DB, B) end, + ConflictBatches, + infinity + ), + + %% Only one should have succeeded. + ?assertEqual([ok], [Ok || Ok = ok <- Results]), + + %% While other failed with an identical `precondition_failed`. + Failures = lists:usort([PreconditionFailed || {error, _, PreconditionFailed} <- Results]), + ?assertMatch( + [{precondition_failed, #message{topic = <<"t/slot">>, payload = <<_/bytes>>}}], + Failures + ), + + %% Wait at least until current epoch ends. + ct:sleep(1000), + %% Storage should contain single batch's messages. + [{precondition_failed, #message{payload = IOwner}}] = Failures, + WinnerBatch = lists:nth(binary_to_integer(IOwner), ConflictBatches), + BatchMessages = lists:sort(WinnerBatch#dsbatch.operations), + DBMessages = emqx_ds_test_helpers:consume(DB, emqx_topic:words(<<"t/#">>)), + ?assertEqual( + BatchMessages, + DBMessages + ) + end, + [] + ). + t_smoke_delete_next(Config) -> DB = ?FUNCTION_NAME, ?check_trace( @@ -534,12 +648,25 @@ message(ClientId, Topic, Payload, PublishedAt) -> message(Topic, Payload, PublishedAt) -> #message{ - topic = Topic, - payload = Payload, + topic = try_format(Topic), + payload = try_format(Payload), timestamp = PublishedAt, id = emqx_guid:gen() }. +matcher(ClientID, Topic, Payload, Timestamp) -> + #message_matcher{ + from = ClientID, + topic = try_format(Topic), + timestamp = Timestamp, + payload = Payload + }. + +try_format({Fmt, Args}) -> + emqx_utils:format(Fmt, Args); +try_format(String) -> + String. + delete(DB, It, Selector, BatchSize) -> delete(DB, It, Selector, BatchSize, 0). @@ -562,9 +689,18 @@ all() -> groups() -> TCs = emqx_common_test_helpers:all(?MODULE), + %% TODO: Remove once builtin-local supports preconditions + atomic batches. + BuiltinLocalTCs = + TCs -- + [ + t_09_atomic_store_batch, + t_11_batch_preconditions, + t_12_batch_precondition_conflicts + ], + BuiltinRaftTCs = TCs, [ - {builtin_local, TCs}, - {builtin_raft, TCs} + {builtin_local, BuiltinLocalTCs}, + {builtin_raft, BuiltinRaftTCs} ]. init_per_group(builtin_local, Config) -> From 109ffe7a70fdee8d780a999c9db14c115ac3af25 Mon Sep 17 00:00:00 2001 From: Andrew Mayorov Date: Wed, 31 Jul 2024 18:41:28 +0200 Subject: [PATCH 42/50] fix(dsbackend): unify timestamp resolution in operations / preconditions --- .../src/emqx_ds_builtin_local.erl | 18 ++++++++++++------ .../src/emqx_ds_replication_layer.erl | 6 +++--- 2 files changed, 15 insertions(+), 9 deletions(-) diff --git a/apps/emqx_ds_builtin_local/src/emqx_ds_builtin_local.erl b/apps/emqx_ds_builtin_local/src/emqx_ds_builtin_local.erl index 29fc98083..e55f4807e 100644 --- a/apps/emqx_ds_builtin_local/src/emqx_ds_builtin_local.erl +++ b/apps/emqx_ds_builtin_local/src/emqx_ds_builtin_local.erl @@ -231,9 +231,9 @@ flush_buffer(DB, Shard, Messages, S0 = #bs{options = Options}) -> make_batch(_ForceMonotonic = true, Latest, Messages) -> assign_monotonic_timestamps(Latest, Messages, []); make_batch(false, Latest, Messages) -> - assign_message_timestamps(Latest, Messages, []). + assign_operation_timestamps(Latest, Messages, []). -assign_monotonic_timestamps(Latest0, [Message | Rest], Acc0) -> +assign_monotonic_timestamps(Latest0, [Message = #message{} | Rest], Acc0) -> case emqx_message:timestamp(Message, microsecond) of TimestampUs when TimestampUs > Latest0 -> Latest = TimestampUs; @@ -242,15 +242,21 @@ assign_monotonic_timestamps(Latest0, [Message | Rest], Acc0) -> end, Acc = [assign_timestamp(Latest, Message) | Acc0], assign_monotonic_timestamps(Latest, Rest, Acc); +assign_monotonic_timestamps(Latest, [Operation | Rest], Acc0) -> + Acc = [Operation | Acc0], + assign_monotonic_timestamps(Latest, Rest, Acc); assign_monotonic_timestamps(Latest, [], Acc) -> {Latest, lists:reverse(Acc)}. -assign_message_timestamps(Latest0, [Message | Rest], Acc0) -> - TimestampUs = emqx_message:timestamp(Message, microsecond), +assign_operation_timestamps(Latest0, [Message = #message{} | Rest], Acc0) -> + TimestampUs = emqx_message:timestamp(Message), Latest = max(TimestampUs, Latest0), Acc = [assign_timestamp(TimestampUs, Message) | Acc0], - assign_message_timestamps(Latest, Rest, Acc); -assign_message_timestamps(Latest, [], Acc) -> + assign_operation_timestamps(Latest, Rest, Acc); +assign_operation_timestamps(Latest, [Operation | Rest], Acc0) -> + Acc = [Operation | Acc0], + assign_operation_timestamps(Latest, Rest, Acc); +assign_operation_timestamps(Latest, [], Acc) -> {Latest, lists:reverse(Acc)}. assign_timestamp(TimestampUs, Message) -> diff --git a/apps/emqx_ds_builtin_raft/src/emqx_ds_replication_layer.erl b/apps/emqx_ds_builtin_raft/src/emqx_ds_replication_layer.erl index 3430a3bda..6acbf94ca 100644 --- a/apps/emqx_ds_builtin_raft/src/emqx_ds_replication_layer.erl +++ b/apps/emqx_ds_builtin_raft/src/emqx_ds_replication_layer.erl @@ -1036,9 +1036,9 @@ assign_timestamps(true, Latest0, [Message0 = #message{} | Rest], Acc, N, Sz) -> MSize = approx_message_size(Message0), assign_timestamps(true, Latest, Rest, [Message | Acc], N + 1, Sz + MSize); assign_timestamps(false, Latest0, [Message0 = #message{} | Rest], Acc, N, Sz) -> - Timestamp = emqx_message:timestamp(Message0), - Latest = max(Latest0, Timestamp), - Message = assign_timestamp(Timestamp, Message0), + TimestampUs = emqx_message:timestamp(Message0), + Latest = max(Latest0, TimestampUs), + Message = assign_timestamp(TimestampUs, Message0), MSize = approx_message_size(Message0), assign_timestamps(false, Latest, Rest, [Message | Acc], N + 1, Sz + MSize); assign_timestamps(ForceMonotonic, Latest, [Operation | Rest], Acc, N, Sz) -> From 9f96e0957ef6468184120d110b26f3f05b80e78e Mon Sep 17 00:00:00 2001 From: Andrew Mayorov Date: Wed, 31 Jul 2024 18:43:39 +0200 Subject: [PATCH 43/50] test(ds): verify deletions work predictably --- .../test/emqx_ds_storage_layout_SUITE.erl | 112 ++++++++---------- 1 file changed, 47 insertions(+), 65 deletions(-) diff --git a/apps/emqx_durable_storage/test/emqx_ds_storage_layout_SUITE.erl b/apps/emqx_durable_storage/test/emqx_ds_storage_layout_SUITE.erl index e0531dad0..e0fea7875 100644 --- a/apps/emqx_durable_storage/test/emqx_ds_storage_layout_SUITE.erl +++ b/apps/emqx_durable_storage/test/emqx_ds_storage_layout_SUITE.erl @@ -18,11 +18,14 @@ -compile(export_all). -compile(nowarn_export_all). +-include("emqx_ds.hrl"). -include("../../emqx/include/emqx.hrl"). -include_lib("common_test/include/ct.hrl"). -include_lib("snabbkaffe/include/snabbkaffe.hrl"). -include_lib("stdlib/include/assert.hrl"). +-define(assertSameSet(A, B), ?assertEqual(lists:sort(A), lists:sort(B))). + -define(FUTURE, (1 bsl 64 - 1)). -define(SHARD, shard(?FUNCTION_NAME)). @@ -66,6 +69,30 @@ t_store(_Config) -> }, ?assertMatch(ok, emqx_ds:store_batch(?FUNCTION_NAME, [Msg])). +%% Smoke test of applying batch operations +t_operations(db_config, _Config) -> + #{force_monotonic_timestamps => false}. + +t_operations(_Config) -> + Batch1 = [ + make_message(100, <<"t/1">>, <<"M1">>), + make_message(200, <<"t/2">>, <<"M2">>), + make_message(300, <<"t/3">>, <<"M3">>) + ], + Batch2 = [ + make_deletion(200, <<"t/2">>, <<"M2">>), + make_deletion(300, <<"t/3">>, '_'), + make_deletion(400, <<"t/4">>, '_') + ], + ?assertEqual(ok, emqx_ds:store_batch(?FUNCTION_NAME, Batch1)), + ?assertEqual(ok, emqx_ds:store_batch(?FUNCTION_NAME, Batch2)), + ?assertMatch( + [ + #message{timestamp = 100, topic = <<"t/1">>, payload = <<"M1">>} + ], + dump_messages(?SHARD, <<"t/#">>, 0) + ). + %% Smoke test for iteration through a concrete topic t_iterate(_Config) -> %% Prepare data: @@ -124,8 +151,6 @@ t_delete(_Config) -> ?assertNot(is_map_key(TopicToDelete, MessagesByTopic), #{msgs => MessagesByTopic}), ?assertEqual(20, length(Messages)). --define(assertSameSet(A, B), ?assertEqual(lists:sort(A), lists:sort(B))). - %% Smoke test that verifies that concrete topics are mapped to %% individual streams, unless there's too many of them. t_get_streams(Config) -> @@ -417,79 +442,26 @@ dump_stream(Shard, Stream, TopicFilter, StartTime) -> %% || Topic <- Topics, PublishedAt <- Timestamps %% ]. -%% t_iterate_multigen(_Config) -> -%% {ok, 1} = emqx_ds_storage_layer:create_generation(?SHARD, 10, ?COMPACT_CONFIG), -%% {ok, 2} = emqx_ds_storage_layer:create_generation(?SHARD, 50, ?DEFAULT_CONFIG), -%% {ok, 3} = emqx_ds_storage_layer:create_generation(?SHARD, 1000, ?DEFAULT_CONFIG), -%% Topics = ["foo/bar", "foo/bar/baz", "a", "a/bar"], -%% Timestamps = lists:seq(1, 100), -%% _ = [ -%% store(?SHARD, PublishedAt, Topic, term_to_binary({Topic, PublishedAt})) -%% || Topic <- Topics, PublishedAt <- Timestamps -%% ], -%% ?assertEqual( -%% lists:sort([ -%% {Topic, PublishedAt} -%% || Topic <- ["foo/bar", "foo/bar/baz"], PublishedAt <- Timestamps -%% ]), -%% lists:sort([binary_to_term(Payload) || Payload <- iterate(?SHARD, "foo/#", 0)]) -%% ), -%% ?assertEqual( -%% lists:sort([ -%% {Topic, PublishedAt} -%% || Topic <- ["a", "a/bar"], PublishedAt <- lists:seq(60, 100) -%% ]), -%% lists:sort([binary_to_term(Payload) || Payload <- iterate(?SHARD, "a/#", 60)]) -%% ). - -%% t_iterate_multigen_preserve_restore(_Config) -> -%% ReplayID = atom_to_binary(?FUNCTION_NAME), -%% {ok, 1} = emqx_ds_storage_layer:create_generation(?SHARD, 10, ?COMPACT_CONFIG), -%% {ok, 2} = emqx_ds_storage_layer:create_generation(?SHARD, 50, ?DEFAULT_CONFIG), -%% {ok, 3} = emqx_ds_storage_layer:create_generation(?SHARD, 100, ?DEFAULT_CONFIG), -%% Topics = ["foo/bar", "foo/bar/baz", "a/bar"], -%% Timestamps = lists:seq(1, 100), -%% TopicFilter = "foo/#", -%% TopicsMatching = ["foo/bar", "foo/bar/baz"], -%% _ = [ -%% store(?SHARD, TS, Topic, term_to_binary({Topic, TS})) -%% || Topic <- Topics, TS <- Timestamps -%% ], -%% It0 = iterator(?SHARD, TopicFilter, 0), -%% {It1, Res10} = iterate(It0, 10), -%% % preserve mid-generation -%% ok = emqx_ds_storage_layer:preserve_iterator(It1, ReplayID), -%% {ok, It2} = emqx_ds_storage_layer:restore_iterator(?SHARD, ReplayID), -%% {It3, Res100} = iterate(It2, 88), -%% % preserve on the generation boundary -%% ok = emqx_ds_storage_layer:preserve_iterator(It3, ReplayID), -%% {ok, It4} = emqx_ds_storage_layer:restore_iterator(?SHARD, ReplayID), -%% {It5, Res200} = iterate(It4, 1000), -%% ?assertEqual({end_of_stream, []}, iterate(It5, 1)), -%% ?assertEqual( -%% lists:sort([{Topic, TS} || Topic <- TopicsMatching, TS <- Timestamps]), -%% lists:sort([binary_to_term(Payload) || Payload <- Res10 ++ Res100 ++ Res200]) -%% ), -%% ?assertEqual( -%% ok, -%% emqx_ds_storage_layer:discard_iterator(?SHARD, ReplayID) -%% ), -%% ?assertEqual( -%% {error, not_found}, -%% emqx_ds_storage_layer:restore_iterator(?SHARD, ReplayID) -%% ). - make_message(PublishedAt, Topic, Payload) when is_list(Topic) -> make_message(PublishedAt, list_to_binary(Topic), Payload); make_message(PublishedAt, Topic, Payload) when is_binary(Topic) -> ID = emqx_guid:gen(), #message{ id = ID, + from = <>, topic = Topic, timestamp = PublishedAt, payload = Payload }. +make_deletion(Timestamp, Topic, Payload) -> + {delete, #message_matcher{ + from = <>, + topic = Topic, + timestamp = Timestamp, + payload = Payload + }}. + make_topic(Tokens = [_ | _]) -> emqx_topic:join([bin(T) || T <- Tokens]). @@ -535,13 +507,23 @@ end_per_suite(Config) -> ok. init_per_testcase(TC, Config) -> - ok = emqx_ds:open_db(TC, ?DB_CONFIG(Config)), + ok = emqx_ds:open_db(TC, db_config(TC, Config)), Config. end_per_testcase(TC, _Config) -> emqx_ds:drop_db(TC), ok. +db_config(TC, Config) -> + ConfigBase = ?DB_CONFIG(Config), + SpecificConfig = + try + ?MODULE:TC(?FUNCTION_NAME, Config) + catch + error:undef -> #{} + end, + maps:merge(ConfigBase, SpecificConfig). + shard(TC) -> {TC, <<"0">>}. From 5502af18b7ea385010f28badba80603e967d577a Mon Sep 17 00:00:00 2001 From: Andrew Mayorov Date: Wed, 31 Jul 2024 18:44:57 +0200 Subject: [PATCH 44/50] feat(ds): support deletions + precondition-related API in bitfield-lts --- .../src/emqx_ds_storage_bitfield_lts.erl | 67 +++++++++++++------ .../src/emqx_ds_storage_layer.erl | 1 + 2 files changed, 49 insertions(+), 19 deletions(-) diff --git a/apps/emqx_durable_storage/src/emqx_ds_storage_bitfield_lts.erl b/apps/emqx_durable_storage/src/emqx_ds_storage_bitfield_lts.erl index fb831318e..28a4d54c2 100644 --- a/apps/emqx_durable_storage/src/emqx_ds_storage_bitfield_lts.erl +++ b/apps/emqx_durable_storage/src/emqx_ds_storage_bitfield_lts.erl @@ -37,6 +37,7 @@ update_iterator/4, next/6, delete_next/7, + lookup_message/3, handle_event/4 ]). @@ -46,6 +47,7 @@ -export_type([options/0]). +-include("emqx_ds.hrl"). -include("emqx_ds_metrics.hrl"). -include_lib("emqx_utils/include/emqx_message.hrl"). -include_lib("snabbkaffe/include/trace.hrl"). @@ -68,10 +70,13 @@ -define(start_time, 3). -define(storage_key, 4). -define(last_seen_key, 5). --define(cooked_payloads, 6). +-define(cooked_msg_ops, 6). -define(cooked_lts_ops, 7). -define(cooked_ts, 8). +%% atoms: +-define(delete, 100). + -type options() :: #{ bits_per_wildcard_level => pos_integer(), @@ -110,7 +115,7 @@ -type cooked_batch() :: #{ - ?cooked_payloads := [{binary(), binary()}], + ?cooked_msg_ops := [{binary(), binary() | ?delete}], ?cooked_lts_ops := [{binary(), binary()}], ?cooked_ts := integer() }. @@ -271,24 +276,28 @@ drop(_Shard, DBHandle, GenId, CFRefs, #s{trie = Trie, gvars = GVars}) -> -spec prepare_batch( emqx_ds_storage_layer:shard_id(), s(), - [{emqx_ds:time(), emqx_types:message()}, ...], + emqx_ds_storage_layer:batch(), emqx_ds_storage_layer:batch_store_opts() ) -> {ok, cooked_batch()}. -prepare_batch(_ShardId, S, Messages, _Options) -> +prepare_batch(_ShardId, S, Batch, _Options) -> _ = erase(?lts_persist_ops), - {Payloads, MaxTs} = + {Operations, MaxTs} = lists:mapfoldl( - fun({Timestamp, Msg}, Acc) -> - {Key, _} = make_key(S, Timestamp, Msg), - Payload = {Key, message_to_value_v1(Msg)}, - {Payload, max(Acc, Timestamp)} + fun + ({Timestamp, Msg = #message{topic = Topic}}, Acc) -> + {Key, _} = make_key(S, Timestamp, Topic), + Op = {Key, message_to_value_v1(Msg)}, + {Op, max(Acc, Timestamp)}; + ({delete, #message_matcher{topic = Topic, timestamp = Timestamp}}, Acc) -> + {Key, _} = make_key(S, Timestamp, Topic), + {_Op = {Key, ?delete}, Acc} end, 0, - Messages + Batch ), {ok, #{ - ?cooked_payloads => Payloads, + ?cooked_msg_ops => Operations, ?cooked_lts_ops => pop_lts_persist_ops(), ?cooked_ts => MaxTs }}. @@ -302,7 +311,7 @@ prepare_batch(_ShardId, S, Messages, _Options) -> commit_batch( _ShardId, _Data, - #{?cooked_payloads := [], ?cooked_lts_ops := LTS}, + #{?cooked_msg_ops := [], ?cooked_lts_ops := LTS}, _Options ) -> %% Assert: @@ -311,7 +320,7 @@ commit_batch( commit_batch( _ShardId, #s{db = DB, data = DataCF, trie = Trie, trie_cf = TrieCF, gvars = Gvars}, - #{?cooked_lts_ops := LtsOps, ?cooked_payloads := Payloads, ?cooked_ts := MaxTs}, + #{?cooked_lts_ops := LtsOps, ?cooked_msg_ops := Operations, ?cooked_ts := MaxTs}, Options ) -> {ok, Batch} = rocksdb:batch(), @@ -326,10 +335,13 @@ commit_batch( _ = emqx_ds_lts:trie_update(Trie, LtsOps), %% Commit payloads: lists:foreach( - fun({Key, Val}) -> - ok = rocksdb:batch_put(Batch, DataCF, Key, term_to_binary(Val)) + fun + ({Key, Val}) when is_tuple(Val) -> + ok = rocksdb:batch_put(Batch, DataCF, Key, term_to_binary(Val)); + ({Key, ?delete}) -> + ok = rocksdb:batch_delete(Batch, DataCF, Key) end, - Payloads + Operations ), Result = rocksdb:write_batch(DB, Batch, write_batch_opts(Options)), rocksdb:release_batch(Batch), @@ -556,6 +568,23 @@ delete_next_until( rocksdb:iterator_close(ITHandle) end. +-spec lookup_message(emqx_ds_storage_layer:shard_id(), s(), emqx_ds_precondition:matcher()) -> + emqx_types:message() | not_found | emqx_ds:error(_). +lookup_message( + _ShardId, + S = #s{db = DB, data = CF}, + #message_matcher{topic = Topic, timestamp = Timestamp} +) -> + {Key, _} = make_key(S, Timestamp, Topic), + case rocksdb:get(DB, CF, Key, _ReadOpts = []) of + {ok, Blob} -> + deserialize(Blob); + not_found -> + not_found; + Error -> + {error, unrecoverable, {rocksdb, Error}} + end. + handle_event(_ShardId, State = #s{gvars = Gvars}, Time, tick) -> %% If the last message was published more than one epoch ago, and %% the shard remains idle, we need to advance safety cutoff @@ -811,9 +840,9 @@ format_key(KeyMapper, Key) -> Vec = [integer_to_list(I, 16) || I <- emqx_ds_bitmask_keymapper:key_to_vector(KeyMapper, Key)], lists:flatten(io_lib:format("~.16B (~s)", [Key, string:join(Vec, ",")])). --spec make_key(s(), emqx_ds:time(), emqx_types:message()) -> {binary(), [binary()]}. -make_key(#s{keymappers = KeyMappers, trie = Trie}, Timestamp, #message{topic = TopicBin}) -> - Tokens = emqx_topic:words(TopicBin), +-spec make_key(s(), emqx_ds:time(), emqx_types:topic()) -> {binary(), [binary()]}. +make_key(#s{keymappers = KeyMappers, trie = Trie}, Timestamp, Topic) -> + Tokens = emqx_topic:words(Topic), {TopicIndex, Varying} = emqx_ds_lts:topic_key(Trie, fun threshold_fun/1, Tokens), VaryingHashes = [hash_topic_level(I) || I <- Varying], KeyMapper = array:get(length(Varying), KeyMappers), diff --git a/apps/emqx_durable_storage/src/emqx_ds_storage_layer.erl b/apps/emqx_durable_storage/src/emqx_ds_storage_layer.erl index ac2edc254..3afdad01a 100644 --- a/apps/emqx_durable_storage/src/emqx_ds_storage_layer.erl +++ b/apps/emqx_durable_storage/src/emqx_ds_storage_layer.erl @@ -64,6 +64,7 @@ -export_type([ gen_id/0, generation/0, + batch/0, cf_refs/0, stream/0, delete_stream/0, From fc0434afc8d879f9f1508180d6a48a6a95c2faa2 Mon Sep 17 00:00:00 2001 From: Andrew Mayorov Date: Wed, 31 Jul 2024 18:46:12 +0200 Subject: [PATCH 45/50] chore(dslocal): refine few typespecs --- apps/emqx_ds_builtin_local/src/emqx_ds_builtin_local.erl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/apps/emqx_ds_builtin_local/src/emqx_ds_builtin_local.erl b/apps/emqx_ds_builtin_local/src/emqx_ds_builtin_local.erl index e55f4807e..f002c26de 100644 --- a/apps/emqx_ds_builtin_local/src/emqx_ds_builtin_local.erl +++ b/apps/emqx_ds_builtin_local/src/emqx_ds_builtin_local.erl @@ -304,7 +304,7 @@ get_streams(DB, TopicFilter, StartTime) -> -spec make_iterator( emqx_ds:db(), emqx_ds:ds_specific_stream(), emqx_ds:topic_filter(), emqx_ds:time() ) -> - emqx_ds:make_iterator_result(emqx_ds:ds_specific_iterator()). + emqx_ds:make_iterator_result(iterator()). make_iterator(DB, ?stream(Shard, InnerStream), TopicFilter, StartTime) -> ShardId = {DB, Shard}, case @@ -318,7 +318,7 @@ make_iterator(DB, ?stream(Shard, InnerStream), TopicFilter, StartTime) -> Error end. --spec update_iterator(emqx_ds:db(), emqx_ds:ds_specific_iterator(), emqx_ds:message_key()) -> +-spec update_iterator(emqx_ds:db(), iterator(), emqx_ds:message_key()) -> emqx_ds:make_iterator_result(iterator()). update_iterator(DB, Iter0 = #{?tag := ?IT, ?shard := Shard, ?enc := StorageIter0}, Key) -> case emqx_ds_storage_layer:update_iterator({DB, Shard}, StorageIter0, Key) of @@ -396,7 +396,7 @@ do_next(DB, Iter0 = #{?tag := ?IT, ?shard := Shard, ?enc := StorageIter0}, N) -> end. -spec do_delete_next(emqx_ds:db(), delete_iterator(), emqx_ds:delete_selector(), pos_integer()) -> - emqx_ds:delete_next_result(emqx_ds:delete_iterator()). + emqx_ds:delete_next_result(delete_iterator()). do_delete_next( DB, Iter = #{?tag := ?DELETE_IT, ?shard := Shard, ?enc := StorageIter0}, Selector, N ) -> From d8aa39a31071ad1d049ea9af17ec57ac6c7966cb Mon Sep 17 00:00:00 2001 From: Andrew Mayorov Date: Wed, 31 Jul 2024 18:46:54 +0200 Subject: [PATCH 46/50] fix(dsraft): use local application environment --- apps/emqx_ds_builtin_raft/src/emqx_ds_replication_layer.erl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/apps/emqx_ds_builtin_raft/src/emqx_ds_replication_layer.erl b/apps/emqx_ds_builtin_raft/src/emqx_ds_replication_layer.erl index 6acbf94ca..007615674 100644 --- a/apps/emqx_ds_builtin_raft/src/emqx_ds_replication_layer.erl +++ b/apps/emqx_ds_builtin_raft/src/emqx_ds_replication_layer.erl @@ -675,7 +675,7 @@ list_nodes() -> -define(SHARD_RPC(DB, SHARD, NODE, BODY), case emqx_ds_replication_layer_shard:servers( - DB, SHARD, application:get_env(emqx_durable_storage, reads, leader_preferred) + DB, SHARD, application:get_env(emqx_ds_builtin_raft, reads, leader_preferred) ) of [{_, NODE} | _] -> From b0594271b2eb292dd9cf8cf318bbd8c331548efa Mon Sep 17 00:00:00 2001 From: Andrew Mayorov Date: Wed, 31 Jul 2024 19:16:11 +0200 Subject: [PATCH 47/50] chore(dsraft): fix a typespec --- apps/emqx_ds_builtin_raft/src/emqx_ds_replication_layer.erl | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/apps/emqx_ds_builtin_raft/src/emqx_ds_replication_layer.erl b/apps/emqx_ds_builtin_raft/src/emqx_ds_replication_layer.erl index 007615674..8be3417cd 100644 --- a/apps/emqx_ds_builtin_raft/src/emqx_ds_replication_layer.erl +++ b/apps/emqx_ds_builtin_raft/src/emqx_ds_replication_layer.erl @@ -101,7 +101,10 @@ n_shards => pos_integer(), n_sites => pos_integer(), replication_factor => pos_integer(), - replication_options => _TODO :: #{} + replication_options => _TODO :: #{}, + %% Inherited from `emqx_ds:generic_db_opts()`. + force_monotonic_timestamps => boolean(), + atomic_batches => boolean() }. %% This enapsulates the stream entity from the replication level. From 7b85faf12a46137a922f60555377c4e793ba0579 Mon Sep 17 00:00:00 2001 From: Andrew Mayorov Date: Mon, 5 Aug 2024 10:44:33 +0200 Subject: [PATCH 48/50] chore(dsraft): fix few spelling errors Co-Authored-By: Thales Macedo Garitezi --- apps/emqx_ds_builtin_raft/src/emqx_ds_replication_layer.erl | 4 ++-- apps/emqx_durable_storage/src/emqx_ds_precondition.erl | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/apps/emqx_ds_builtin_raft/src/emqx_ds_replication_layer.erl b/apps/emqx_ds_builtin_raft/src/emqx_ds_replication_layer.erl index 8be3417cd..11c809dbd 100644 --- a/apps/emqx_ds_builtin_raft/src/emqx_ds_replication_layer.erl +++ b/apps/emqx_ds_builtin_raft/src/emqx_ds_replication_layer.erl @@ -140,7 +140,7 @@ }. %% Write batch. -%% Instances of this type currently form the mojority of the Raft log. +%% Instances of this type currently form the majority of the Raft log. -type batch() :: #{ ?tag := ?BATCH, ?batch_operations := [emqx_ds:operation()], @@ -281,7 +281,7 @@ store_batch_atomic(DB, Batch, _Opts) -> [] -> ok; [_ | _] -> - {error, unrecoverable, nonatomic_batch_spans_multiple_storages} + {error, unrecoverable, atomic_batch_spans_multiple_shards} end. -spec get_streams(emqx_ds:db(), emqx_ds:topic_filter(), emqx_ds:time()) -> diff --git a/apps/emqx_durable_storage/src/emqx_ds_precondition.erl b/apps/emqx_durable_storage/src/emqx_ds_precondition.erl index 5f86d6c25..3002fcd08 100644 --- a/apps/emqx_durable_storage/src/emqx_ds_precondition.erl +++ b/apps/emqx_durable_storage/src/emqx_ds_precondition.erl @@ -1,5 +1,5 @@ %%-------------------------------------------------------------------- -%% Copyright (c) 20212024 EMQ Technologies Co., Ltd. All Rights Reserved. +%% Copyright (c) 2024 EMQ Technologies Co., Ltd. All Rights Reserved. %% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. From 5dd8fefded702c00e0c544d27fce1e73465bc71a Mon Sep 17 00:00:00 2001 From: Andrew Mayorov Date: Mon, 5 Aug 2024 16:34:17 +0200 Subject: [PATCH 49/50] test(ds): avoid side effects in check phase --- apps/emqx_ds_backends/test/emqx_ds_backends_SUITE.erl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/apps/emqx_ds_backends/test/emqx_ds_backends_SUITE.erl b/apps/emqx_ds_backends/test/emqx_ds_backends_SUITE.erl index e70d2b682..95ebfe99c 100644 --- a/apps/emqx_ds_backends/test/emqx_ds_backends_SUITE.erl +++ b/apps/emqx_ds_backends/test/emqx_ds_backends_SUITE.erl @@ -327,9 +327,8 @@ t_11_batch_preconditions(Config) -> ?assertEqual( ok, emqx_ds:store_batch(DB, Batch1) - ) - end, - fun(_Trace) -> + ), + %% Wait at least until current epoch ends. ct:sleep(1000), %% There's no messages in the DB. @@ -337,7 +336,8 @@ t_11_batch_preconditions(Config) -> [], emqx_ds_test_helpers:consume(DB, emqx_topic:words(<<"t/#">>)) ) - end + end, + [] ). t_12_batch_precondition_conflicts(Config) -> From 9ca3985bbdc23afbba308cf9b2d39e87c131c611 Mon Sep 17 00:00:00 2001 From: Thales Macedo Garitezi Date: Tue, 6 Aug 2024 10:24:21 -0300 Subject: [PATCH 50/50] test: attempt to reduce test flakiness --- .../test/emqx_bridge_kafka_impl_consumer_SUITE.erl | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/apps/emqx_bridge_kafka/test/emqx_bridge_kafka_impl_consumer_SUITE.erl b/apps/emqx_bridge_kafka/test/emqx_bridge_kafka_impl_consumer_SUITE.erl index 74d3a5f54..d7408f0e5 100644 --- a/apps/emqx_bridge_kafka/test/emqx_bridge_kafka_impl_consumer_SUITE.erl +++ b/apps/emqx_bridge_kafka/test/emqx_bridge_kafka_impl_consumer_SUITE.erl @@ -1918,13 +1918,14 @@ t_node_joins_existing_cluster(Config) -> _Attempts2 = 50, [] =/= erpc:call(N2, emqx_router, lookup_routes, [MQTTTopic]) ), + NumMsgs = 50 * NPartitions, {ok, SRef1} = snabbkaffe:subscribe( ?match_event(#{ ?snk_kind := kafka_consumer_handle_message, ?snk_span := {complete, _} }), - NPartitions, + NumMsgs, 20_000 ), lists:foreach( @@ -1933,7 +1934,7 @@ t_node_joins_existing_cluster(Config) -> Val = <<"v", (integer_to_binary(N))/binary>>, publish(Config, KafkaTopic, [#{key => Key, value => Val}]) end, - lists:seq(1, 10 * NPartitions) + lists:seq(1, NumMsgs) ), {ok, _} = snabbkaffe:receive_events(SRef1),