From b2699c687b44d92b93c08090868e2d4ae7ff3156 Mon Sep 17 00:00:00 2001 From: firest Date: Wed, 27 Sep 2023 15:48:11 +0800 Subject: [PATCH] fix(sso): support for SSL update && ensure update is atomic 1. support update SSL key and cert files 2. increase connection timeout 3. ensure the update is atomicity, everything will be consistent --- .../src/emqx_dashboard_sso_manager.erl | 118 +++++++++++++----- .../test/emqx_dashboard_sso_ldap_SUITE.erl | 32 +++++ apps/emqx_ldap/src/emqx_ldap.erl | 2 +- 3 files changed, 123 insertions(+), 29 deletions(-) diff --git a/apps/emqx_dashboard_sso/src/emqx_dashboard_sso_manager.erl b/apps/emqx_dashboard_sso/src/emqx_dashboard_sso_manager.erl index 9b37c37f4..1d080ac5c 100644 --- a/apps/emqx_dashboard_sso/src/emqx_dashboard_sso_manager.erl +++ b/apps/emqx_dashboard_sso/src/emqx_dashboard_sso_manager.erl @@ -43,7 +43,7 @@ -define(MOD_TAB, emqx_dashboard_sso). -define(MOD_KEY_PATH, [dashboard, sso]). --define(CALL_TIMEOUT, timer:seconds(10)). +-define(CALL_TIMEOUT, timer:seconds(15)). -define(MOD_KEY_PATH(Sub), [dashboard, sso, Sub]). -define(RESOURCE_GROUP, <<"emqx_dashboard_sso">>). -define(DEFAULT_RESOURCE_OPTS, #{ @@ -98,7 +98,7 @@ create_resource(ResourceId, Module, Config) -> Config, ?DEFAULT_RESOURCE_OPTS ), - start_resource_if_enabled(ResourceId, Result, Config, fun clean_when_start_failed/1). + start_resource_if_enabled(ResourceId, Result, Config). update_resource(ResourceId, Module, Config) -> Result = emqx_resource:recreate_local( @@ -133,8 +133,8 @@ init([]) -> start_backend_services(), {ok, #{}}. -handle_call({update_config, Req, NewConf}, _From, State) -> - Result = on_config_update(Req, NewConf), +handle_call({update_config, Req, NewConf, OldConf}, _From, State) -> + Result = on_config_update(Req, NewConf, OldConf), {reply, Result, State}; handle_call(_Request, _From, State) -> Reply = ok, @@ -175,7 +175,7 @@ start_backend_services() -> ?SLOG(error, #{ msg => "start_sso_backend_failed", backend => Backend, - reason => Reason + reason => emqx_utils:redact(Reason) }) end end, @@ -189,17 +189,18 @@ update_config(Backend, UpdateReq) -> ?SLOG(info, #{ msg => "update_sso_successfully", backend => Backend, - result => Result + result => emqx_utils:redact(Result) }), Result; {error, Reason} -> + SafeReason = emqx_utils:redact(Reason), ?SLOG(error, #{ msg => "update_sso_failed", backend => Backend, - reason => Reason + reason => SafeReason }), {error, - case Reason of + case SafeReason of {_Stage, _Mod, Reason2} -> Reason2; _ -> @@ -208,14 +209,14 @@ update_config(Backend, UpdateReq) -> end. pre_config_update(_, {update, _Backend, Config}, _OldConf) -> - {ok, Config}; + maybe_write_certs(Config); pre_config_update(_, {delete, _Backend}, undefined) -> throw(not_exists); pre_config_update(_, {delete, _Backend}, _OldConf) -> {ok, null}. -post_config_update(_, UpdateReq, NewConf, _OldConf, _AppEnvs) -> - call({update_config, UpdateReq, NewConf}). +post_config_update(_, UpdateReq, NewConf, OldConf, _AppEnvs) -> + call({update_config, UpdateReq, NewConf, OldConf}). propagated_post_config_update( ?MOD_KEY_PATH(BackendBin) = Path, _UpdateReq, undefined, OldConf, AppEnvs @@ -236,7 +237,7 @@ propagated_post_config_update( Error end. -on_config_update({update, Backend, _RawConfig}, Config) -> +on_config_update({update, Backend, _RawConfig}, Config, OldConfig) -> Provider = provider(Backend), case lookup(Backend) of undefined -> @@ -247,14 +248,27 @@ on_config_update({update, Backend, _RawConfig}, Config) -> end ); Data -> + %% the steps for updating/recreating a resource are: + %% 1. destroy the old resource + %% 2. create a new resource + %% to keep consistency we need to follow those steps too, + %% however a failed update will not change the config, but will lose the resource + %% hence for consistency and atomicity, we should rollback when the update fails + ets:delete(?MOD_TAB, Backend), + UpdateState = fun(State) -> + ets:insert(?MOD_TAB, Data#?MOD_TAB{state = State}) + end, on_backend_updated( emqx_dashboard_sso:update(Provider, Config, Data#?MOD_TAB.state), - fun(State) -> - ets:insert(?MOD_TAB, Data#?MOD_TAB{state = State}) - end + UpdateState, + rollback( + Backend, + OldConfig, + UpdateState + ) ) end; -on_config_update({delete, Backend}, _NewConf) -> +on_config_update({delete, Backend}, _NewConf, _OldConf) -> case lookup(Backend) of undefined -> {error, not_exists}; @@ -276,12 +290,7 @@ lookup(Backend) -> undefined end. -start_resource_if_enabled(ResourceId, Result, Config) -> - start_resource_if_enabled(ResourceId, Result, Config, undefined). - -start_resource_if_enabled( - ResourceId, {ok, _} = Result, #{enable := true}, CleanWhenStartFailed -) -> +start_resource_if_enabled(ResourceId, {ok, _} = Result, #{enable := true}) -> case emqx_resource:start(ResourceId) of ok -> Result; @@ -292,11 +301,10 @@ start_resource_if_enabled( resource_id => ResourceId, reason => SafeReason }), - erlang:is_function(CleanWhenStartFailed) andalso - CleanWhenStartFailed(ResourceId), + clean_when_start_failed(ResourceId), {error, emqx_dashboard_sso:format(["Start backend failed, Reason: ", SafeReason])} end; -start_resource_if_enabled(_ResourceId, Result, _Config, _) -> +start_resource_if_enabled(_ResourceId, Result, _Config) -> Result. %% ensure the backend creation is atomic, clean the corresponding resource when necessary, @@ -309,14 +317,18 @@ clean_when_start_failed(ResourceId) -> _ = emqx_resource:remove_local(ResourceId), ok. +on_backend_updated(Result, OkFun) -> + on_backend_updated(Result, OkFun, undefined). + %% this first level `ok` is for emqx_config_handler, and the second level is for the caller -on_backend_updated({ok, State} = Ok, Fun) -> +on_backend_updated({ok, State} = Ok, Fun, _ErrFun) -> Fun(State), {ok, Ok}; -on_backend_updated(ok, Fun) -> +on_backend_updated(ok, Fun, _ErrFun) -> Fun(), {ok, ok}; -on_backend_updated(Error, _) -> +on_backend_updated(Error, _, ErrFun) -> + erlang:is_function(ErrFun) andalso ErrFun(Error), Error. bin(A) when is_atom(A) -> atom_to_binary(A, utf8); @@ -331,3 +343,53 @@ add_handler() -> remove_handler() -> ok = emqx_conf:remove_handler(?MOD_KEY_PATH('?')). + +maybe_write_certs(#{<<"backend">> := Backend} = Conf) -> + case + emqx_tls_lib:ensure_ssl_files( + ssl_file_path(Backend), maps:get(<<"ssl">>, Conf, undefined) + ) + of + {ok, SSL} -> + {ok, new_ssl_source(Conf, SSL)}; + {error, Reason} -> + ?SLOG(error, Reason#{msg => "bad_ssl_config"}), + throw({bad_ssl_config, Reason}) + end. + +ssl_file_path(Backend) -> + filename:join(["sso", Backend]). + +new_ssl_source(Source, undefined) -> + Source; +new_ssl_source(Source, SSL) -> + Source#{<<"ssl">> => SSL}. + +rollback(Backend, OldConf, OnSucc) -> + fun(_) -> + try_recreate(Backend, OldConf, OnSucc) + end. + +try_recreate(_Backend, undefined, _OnSucc) -> + ok; +try_recreate(_Backend, #{enable := false}, _OnSucc) -> + ok; +try_recreate(Backend, Config, OnSucc) -> + Provider = provider(Backend), + ?SLOG(info, #{ + msg => "backend_rollback", + backend => Backend, + reason => "update_sso_failed", + config => emqx_utils:redact(Config) + }), + on_backend_updated( + emqx_dashboard_sso:create(Provider, Config), + OnSucc, + fun(Error) -> + ?SLOG(error, #{ + msg => "backend_rollback_failed", + backend => Backend, + reason => emqx_utils:redact(Error) + }) + end + ). diff --git a/apps/emqx_dashboard_sso/test/emqx_dashboard_sso_ldap_SUITE.erl b/apps/emqx_dashboard_sso/test/emqx_dashboard_sso_ldap_SUITE.erl index d85ffff10..1ed21d09e 100644 --- a/apps/emqx_dashboard_sso/test/emqx_dashboard_sso_ldap_SUITE.erl +++ b/apps/emqx_dashboard_sso/test/emqx_dashboard_sso_ldap_SUITE.erl @@ -27,6 +27,7 @@ all() -> t_create_atomicly, t_create, t_update, + t_update_atomicly, t_get, t_login_with_bad, t_first_login, @@ -111,6 +112,37 @@ t_update(_) -> ?assertNotEqual(undefined, emqx_dashboard_sso_manager:lookup_state(ldap)), ok. +%% update fails can rollback able +t_update_atomicly(_) -> + CurrRes = emqx_resource_manager:list_group(?RESOURCE_GROUP), + + Path = uri(["sso", "ldap"]), + ?assertMatch( + {ok, 400, _}, + request( + put, + Path, + ldap_config(#{ + <<"username">> => <<"invalid">>, + <<"enable">> => true, + <<"request_timeout">> => <<"1s">> + }) + ) + ), + + ?assertMatch(#{backend := ldap}, emqx:get_config(?MOD_KEY_PATH, undefined)), + ?assertMatch([_], ets:tab2list(?MOD_TAB)), + ?retry( + _Interval = 5, + _NAttempts = 1000, + begin + Res = emqx_resource_manager:list_group(?RESOURCE_GROUP), + ?assertMatch([_], Res), + ?assertNotMatch(CurrRes, Res) + end + ), + ok. + t_get(_) -> Path = uri(["sso", "ldap"]), {ok, 200, Result} = request(get, Path), diff --git a/apps/emqx_ldap/src/emqx_ldap.erl b/apps/emqx_ldap/src/emqx_ldap.erl index 7b856521c..e7ed01113 100644 --- a/apps/emqx_ldap/src/emqx_ldap.erl +++ b/apps/emqx_ldap/src/emqx_ldap.erl @@ -74,7 +74,7 @@ fields(config) -> {request_timeout, ?HOCON(emqx_schema:timeout_duration_ms(), #{ desc => ?DESC(request_timeout), - default => <<"5s">> + default => <<"10s">> })}, {ssl, ?HOCON(?R_REF(?MODULE, ssl), #{