From 08ad09a68f04ac1678815634e5e7dff2a38f6a4a Mon Sep 17 00:00:00 2001 From: firest Date: Wed, 27 Sep 2023 20:53:10 +0800 Subject: [PATCH] fix(sso): refactor backen update logic 1. valid config always can update successfully 2. the `running` endpoint only return successfully created backend 3. enhancement of the `/sso` endpoint, and will check is the resource online --- .../src/emqx_dashboard_sso.erl | 4 +- .../src/emqx_dashboard_sso_api.erl | 36 ++- .../src/emqx_dashboard_sso_manager.erl | 255 ++++++++++-------- .../test/emqx_dashboard_sso_ldap_SUITE.erl | 77 +++--- rel/i18n/emqx_dashboard_sso_api.hocon | 12 + 5 files changed, 212 insertions(+), 172 deletions(-) diff --git a/apps/emqx_dashboard_sso/src/emqx_dashboard_sso.erl b/apps/emqx_dashboard_sso/src/emqx_dashboard_sso.erl index a7742fc36..2214be06f 100644 --- a/apps/emqx_dashboard_sso/src/emqx_dashboard_sso.erl +++ b/apps/emqx_dashboard_sso/src/emqx_dashboard_sso.erl @@ -26,7 +26,9 @@ backend => atom(), atom() => term() }. --type state() :: #{atom() => term()}. + +%% Note: if a backend has a resource, it must be stored in the state and named resource_id +-type state() :: #{resource_id => binary(), atom() => term()}. -type raw_config() :: #{binary() => term()}. -type config() :: parsed_config() | raw_config(). -type hocon_ref() :: ?R_REF(Module :: atom(), Name :: atom() | binary()). diff --git a/apps/emqx_dashboard_sso/src/emqx_dashboard_sso_api.erl b/apps/emqx_dashboard_sso/src/emqx_dashboard_sso_api.erl index 6b8d7ef42..b7859948e 100644 --- a/apps/emqx_dashboard_sso/src/emqx_dashboard_sso_api.erl +++ b/apps/emqx_dashboard_sso/src/emqx_dashboard_sso_api.erl @@ -133,24 +133,28 @@ schema("/sso/:backend") -> }. fields(backend_status) -> - emqx_dashboard_sso_schema:common_backend_schema(emqx_dashboard_sso:types()). + emqx_dashboard_sso_schema:common_backend_schema(emqx_dashboard_sso:types()) ++ + [ + {running, + mk( + boolean(), #{ + desc => ?DESC(running) + } + )}, + {last_error, + mk( + binary(), #{ + desc => ?DESC(last_error) + } + )} + ]. %%-------------------------------------------------------------------- %% API %%-------------------------------------------------------------------- running(get, _Request) -> - SSO = emqx:get_config(?MOD_KEY_PATH, #{}), - {200, - lists:filtermap( - fun - (#{backend := Backend, enable := true}) -> - {true, Backend}; - (_) -> - false - end, - maps:values(SSO) - )}. + {200, emqx_dashboard_sso_manager:running()}. login(post, #{bindings := #{backend := Backend}, body := Body} = Request) -> case emqx_dashboard_sso_manager:lookup_state(Backend) of @@ -185,8 +189,12 @@ sso(get, _Request) -> SSO = emqx:get_config(?MOD_KEY_PATH, #{}), {200, lists:map( - fun(Backend) -> - maps:with([backend, enable], Backend) + fun(#{backend := Backend, enable := Enable}) -> + Status = emqx_dashboard_sso_manager:get_backend_status(Backend, Enable), + Status#{ + backend => Backend, + enable => Enable + } end, maps:values(SSO) )}. diff --git a/apps/emqx_dashboard_sso/src/emqx_dashboard_sso_manager.erl b/apps/emqx_dashboard_sso/src/emqx_dashboard_sso_manager.erl index 1d080ac5c..c1b7ce5b8 100644 --- a/apps/emqx_dashboard_sso/src/emqx_dashboard_sso_manager.erl +++ b/apps/emqx_dashboard_sso/src/emqx_dashboard_sso_manager.erl @@ -24,11 +24,11 @@ -export([ running/0, + get_backend_status/2, lookup_state/1, make_resource_id/1, create_resource/3, - update_resource/3, - call/1 + update_resource/3 ]). -export([ @@ -39,20 +39,21 @@ propagated_post_config_update/5 ]). --import(emqx_dashboard_sso, [provider/1]). +-import(emqx_dashboard_sso, [provider/1, format/1]). -define(MOD_TAB, emqx_dashboard_sso). -define(MOD_KEY_PATH, [dashboard, sso]). --define(CALL_TIMEOUT, timer:seconds(15)). -define(MOD_KEY_PATH(Sub), [dashboard, sso, Sub]). -define(RESOURCE_GROUP, <<"emqx_dashboard_sso">>). +-define(START_ERROR_KEY, start_error). -define(DEFAULT_RESOURCE_OPTS, #{ start_after_created => false }). -record(?MOD_TAB, { backend :: atom(), - state :: map() + state :: map(), + last_error = <<>> :: term() }). %%------------------------------------------------------------------------------ @@ -62,17 +63,36 @@ start_link() -> gen_server:start_link({local, ?MODULE}, ?MODULE, [], []). running() -> - maps:fold( + lists:filtermap( fun - (Type, #{enable := true}, Acc) -> - [Type | Acc]; - (_Type, _Cfg, Acc) -> - Acc + (#?MOD_TAB{backend = Backend, last_error = <<>>}) -> + {true, Backend}; + (_) -> + false end, - [], - emqx:get_config(?MOD_KEY_PATH) + ets:tab2list(?MOD_TAB) ). +get_backend_status(Backend, false) -> + #{ + backend => Backend, + enable => false, + running => false, + last_error => <<>> + }; +get_backend_status(Backend, _) -> + case lookup(Backend) of + undefined -> + #{ + backend => Backend, + enable => true, + running => false, + last_error => <<"resource not found">> + }; + Data -> + maps:merge(#{backend => Backend, enable => true}, do_get_backend_status(Data)) + end. + update(Backend, Config) -> update_config(Backend, {?FUNCTION_NAME, Backend, Config}). delete(Backend) -> @@ -106,14 +126,6 @@ update_resource(ResourceId, Module, Config) -> ), start_resource_if_enabled(ResourceId, Result, Config). -call(Req) -> - try - gen_server:call(?MODULE, Req, ?CALL_TIMEOUT) - catch - exit:{timeout, _} -> - {error, <<"Update backend failed: timeout">>} - end. - %%------------------------------------------------------------------------------ %% gen_server callbacks %%------------------------------------------------------------------------------ @@ -133,9 +145,6 @@ init([]) -> start_backend_services(), {ok, #{}}. -handle_call({update_config, Req, NewConf, OldConf}, _From, State) -> - Result = on_config_update(Req, NewConf, OldConf), - {reply, Result, State}; handle_call(_Request, _From, State) -> Reply = ok, {reply, Reply, State}. @@ -177,35 +186,47 @@ start_backend_services() -> backend => Backend, reason => emqx_utils:redact(Reason) }) - end + end, + record_start_error(Backend, false) end, maps:to_list(Backends) ). update_config(Backend, UpdateReq) -> + OkFun = fun(Result) -> + ?SLOG(info, #{ + msg => "update_sso_successfully", + backend => Backend, + result => emqx_utils:redact(Result) + }), + Result + end, + + ErrFun = fun({error, Reason} = Error) -> + SafeReason = emqx_utils:redact(Reason), + ?SLOG(error, #{ + msg => "update_sso_failed", + backend => Backend, + reason => SafeReason + }), + Error + end, + + %% we always make sure the valid configuration will update successfully, + %% ignore the runtime error during its update case emqx_conf:update(?MOD_KEY_PATH(Backend), UpdateReq, #{override_to => cluster}) of {ok, UpdateResult} -> #{post_config_update := #{?MODULE := Result}} = UpdateResult, - ?SLOG(info, #{ - msg => "update_sso_successfully", - backend => Backend, - result => emqx_utils:redact(Result) - }), - Result; - {error, Reason} -> - SafeReason = emqx_utils:redact(Reason), - ?SLOG(error, #{ - msg => "update_sso_failed", - backend => Backend, - reason => SafeReason - }), - {error, - case SafeReason of - {_Stage, _Mod, Reason2} -> - Reason2; - _ -> - Reason - end} + case Result of + ok -> + OkFun(Result); + {ok, _} -> + OkFun(Result); + {error, _} = Error -> + ErrFun(Error) + end; + {error, _} = Error -> + ErrFun(Error) end. pre_config_update(_, {update, _Backend, Config}, _OldConf) -> @@ -215,8 +236,8 @@ pre_config_update(_, {delete, _Backend}, undefined) -> pre_config_update(_, {delete, _Backend}, _OldConf) -> {ok, null}. -post_config_update(_, UpdateReq, NewConf, OldConf, _AppEnvs) -> - call({update_config, UpdateReq, NewConf, OldConf}). +post_config_update(_, UpdateReq, NewConf, _OldConf, _AppEnvs) -> + {ok, on_config_update(UpdateReq, NewConf)}. propagated_post_config_update( ?MOD_KEY_PATH(BackendBin) = Path, _UpdateReq, undefined, OldConf, AppEnvs @@ -237,44 +258,37 @@ propagated_post_config_update( Error end. -on_config_update({update, Backend, _RawConfig}, Config, OldConfig) -> +on_config_update({update, Backend, _RawConfig}, Config) -> Provider = provider(Backend), case lookup(Backend) of undefined -> on_backend_updated( + Backend, emqx_dashboard_sso:create(Provider, Config), - fun(State) -> - ets:insert(?MOD_TAB, #?MOD_TAB{backend = Backend, state = State}) + fun(State, LastError) -> + ets:insert( + ?MOD_TAB, + #?MOD_TAB{backend = Backend, state = State, last_error = LastError} + ) end ); Data -> - %% the steps for updating/recreating a resource are: - %% 1. destroy the old resource - %% 2. create a new resource - %% to keep consistency we need to follow those steps too, - %% however a failed update will not change the config, but will lose the resource - %% hence for consistency and atomicity, we should rollback when the update fails - ets:delete(?MOD_TAB, Backend), - UpdateState = fun(State) -> - ets:insert(?MOD_TAB, Data#?MOD_TAB{state = State}) - end, on_backend_updated( + Backend, emqx_dashboard_sso:update(Provider, Config, Data#?MOD_TAB.state), - UpdateState, - rollback( - Backend, - OldConfig, - UpdateState - ) + fun(State, LastError) -> + ets:insert(?MOD_TAB, Data#?MOD_TAB{state = State, last_error = LastError}) + end ) end; -on_config_update({delete, Backend}, _NewConf, _OldConf) -> +on_config_update({delete, Backend}, _NewConf) -> case lookup(Backend) of undefined -> {error, not_exists}; Data -> Provider = provider(Backend), on_backend_updated( + Backend, emqx_dashboard_sso:destroy(Provider, Data#?MOD_TAB.state), fun() -> ets:delete(?MOD_TAB, Backend) @@ -290,45 +304,39 @@ lookup(Backend) -> undefined end. +%% to avoid resource leakage the resource start will never affect the update result, +%% so the resource_id will always be recorded start_resource_if_enabled(ResourceId, {ok, _} = Result, #{enable := true}) -> + clear_start_error(), case emqx_resource:start(ResourceId) of ok -> - Result; + ok; {error, Reason} -> SafeReason = emqx_utils:redact(Reason), + mark_start_error(SafeReason), ?SLOG(error, #{ msg => "start_backend_failed", resource_id => ResourceId, reason => SafeReason }), - clean_when_start_failed(ResourceId), - {error, emqx_dashboard_sso:format(["Start backend failed, Reason: ", SafeReason])} - end; + ok + end, + Result; start_resource_if_enabled(_ResourceId, Result, _Config) -> Result. -%% ensure the backend creation is atomic, clean the corresponding resource when necessary, -%% when creating a backend fails, nothing will be inserted into the SSO table, -%% thus the resources created by backend will leakage. -%% Although we can treat start failure as successful, -%% and insert the resource data into the SSO table, -%% it may be strange for users: it succeeds, but can't be used. -clean_when_start_failed(ResourceId) -> - _ = emqx_resource:remove_local(ResourceId), - ok. - -on_backend_updated(Result, OkFun) -> - on_backend_updated(Result, OkFun, undefined). - -%% this first level `ok` is for emqx_config_handler, and the second level is for the caller -on_backend_updated({ok, State} = Ok, Fun, _ErrFun) -> - Fun(State), - {ok, Ok}; -on_backend_updated(ok, Fun, _ErrFun) -> +on_backend_updated(Backend, {ok, State} = Ok, Fun) -> + Fun(State, <<>>), + record_start_error(Backend, true), + Ok; +on_backend_updated(_Backend, {ok, State, LastError} = Ok, Fun) -> + Fun(State, LastError), + Ok; +on_backend_updated(_Backend, ok, Fun) -> Fun(), - {ok, ok}; -on_backend_updated(Error, _, ErrFun) -> - erlang:is_function(ErrFun) andalso ErrFun(Error), + ok; +on_backend_updated(Backend, {error, Reason} = Error, _) -> + update_last_error(Backend, Reason), Error. bin(A) when is_atom(A) -> atom_to_binary(A, utf8); @@ -365,31 +373,44 @@ new_ssl_source(Source, undefined) -> new_ssl_source(Source, SSL) -> Source#{<<"ssl">> => SSL}. -rollback(Backend, OldConf, OnSucc) -> - fun(_) -> - try_recreate(Backend, OldConf, OnSucc) +clear_start_error() -> + mark_start_error(<<>>). + +mark_start_error(Reason) -> + erlang:put(?START_ERROR_KEY, Reason). + +record_start_error(Backend, Force) -> + case erlang:get(?START_ERROR_KEY) of + <<>> when Force -> + update_last_error(Backend, <<>>); + <<>> -> + ok; + Reason -> + update_last_error(Backend, Reason) end. -try_recreate(_Backend, undefined, _OnSucc) -> - ok; -try_recreate(_Backend, #{enable := false}, _OnSucc) -> - ok; -try_recreate(Backend, Config, OnSucc) -> - Provider = provider(Backend), - ?SLOG(info, #{ - msg => "backend_rollback", - backend => Backend, - reason => "update_sso_failed", - config => emqx_utils:redact(Config) - }), - on_backend_updated( - emqx_dashboard_sso:create(Provider, Config), - OnSucc, - fun(Error) -> - ?SLOG(error, #{ - msg => "backend_rollback_failed", - backend => Backend, - reason => emqx_utils:redact(Error) - }) - end - ). +update_last_error(Backend, Reason) -> + ets:update_element(?MOD_TAB, Backend, {#?MOD_TAB.last_error, Reason}). + +do_get_backend_status(#?MOD_TAB{state = #{resource_id := ResourceId}}) -> + case emqx_resource_manager:lookup(ResourceId) of + {ok, _Group, #{status := connected}} -> + #{running => true, last_error => <<>>}; + {ok, _Group, #{status := Status}} -> + #{ + running => false, + last_error => format([<<"Resource not valid, status: ">>, Status]) + }; + {error, not_found} -> + #{ + running => false, + last_error => <<"Resource not found">> + } + end; +do_get_backend_status(#?MOD_TAB{last_error = <<>>}) -> + #{running => true, last_error => <<>>}; +do_get_backend_status(#?MOD_TAB{last_error = LastError}) -> + #{ + running => false, + last_error => format([LastError]) + }. diff --git a/apps/emqx_dashboard_sso/test/emqx_dashboard_sso_ldap_SUITE.erl b/apps/emqx_dashboard_sso/test/emqx_dashboard_sso_ldap_SUITE.erl index 1ed21d09e..7882e17f2 100644 --- a/apps/emqx_dashboard_sso/test/emqx_dashboard_sso_ldap_SUITE.erl +++ b/apps/emqx_dashboard_sso/test/emqx_dashboard_sso_ldap_SUITE.erl @@ -24,14 +24,14 @@ all() -> [ - t_create_atomicly, + t_bad_create, t_create, t_update, - t_update_atomicly, t_get, t_login_with_bad, t_first_login, t_next_login, + t_bad_update, t_delete ]. @@ -61,10 +61,10 @@ end_per_testcase(Case, _) -> end, ok. -t_create_atomicly(_) -> +t_bad_create(_) -> Path = uri(["sso", "ldap"]), ?assertMatch( - {ok, 400, _}, + {ok, 200, _}, request( put, Path, @@ -75,12 +75,18 @@ t_create_atomicly(_) -> }) ) ), - ?assertEqual(undefined, emqx:get_config(?MOD_KEY_PATH, undefined)), - ?assertEqual([], ets:tab2list(?MOD_TAB)), + ?assertMatch(#{backend := ldap}, emqx:get_config(?MOD_KEY_PATH, undefined)), + check_running([]), + ?assertMatch( + [#{backend := <<"ldap">>, enable := true, running := false, last_error := _}], get_sso() + ), + + emqx_dashboard_sso_manager:delete(ldap), + ?retry( - _Interval = 1000, - _NAttempts = 5, - ?assertEqual([], emqx_resource_manager:list_group(?RESOURCE_GROUP)) + _Interval = 500, + _NAttempts = 10, + ?assertMatch([], emqx_resource_manager:list_group(?RESOURCE_GROUP)) ), ok. @@ -112,37 +118,6 @@ t_update(_) -> ?assertNotEqual(undefined, emqx_dashboard_sso_manager:lookup_state(ldap)), ok. -%% update fails can rollback able -t_update_atomicly(_) -> - CurrRes = emqx_resource_manager:list_group(?RESOURCE_GROUP), - - Path = uri(["sso", "ldap"]), - ?assertMatch( - {ok, 400, _}, - request( - put, - Path, - ldap_config(#{ - <<"username">> => <<"invalid">>, - <<"enable">> => true, - <<"request_timeout">> => <<"1s">> - }) - ) - ), - - ?assertMatch(#{backend := ldap}, emqx:get_config(?MOD_KEY_PATH, undefined)), - ?assertMatch([_], ets:tab2list(?MOD_TAB)), - ?retry( - _Interval = 5, - _NAttempts = 1000, - begin - Res = emqx_resource_manager:list_group(?RESOURCE_GROUP), - ?assertMatch([_], Res), - ?assertNotMatch(CurrRes, Res) - end - ), - ok. - t_get(_) -> Path = uri(["sso", "ldap"]), {ok, 200, Result} = request(get, Path), @@ -190,6 +165,28 @@ t_next_login(_) -> ?assertMatch(#{license := _, token := _}, decode_json(Result)), ok. +t_bad_update(_) -> + Path = uri(["sso", "ldap"]), + ?assertMatch( + {ok, 200, _}, + request( + put, + Path, + ldap_config(#{ + <<"username">> => <<"invalid">>, + <<"enable">> => true, + <<"request_timeout">> => <<"1s">> + }) + ) + ), + ?assertMatch(#{backend := ldap}, emqx:get_config(?MOD_KEY_PATH, undefined)), + check_running([]), + ?assertMatch( + [#{backend := <<"ldap">>, enable := true, running := false, last_error := _}], get_sso() + ), + + ok. + t_delete(_) -> Path = uri(["sso", "ldap"]), ?assertMatch({ok, 204, _}, request(delete, Path)), diff --git a/rel/i18n/emqx_dashboard_sso_api.hocon b/rel/i18n/emqx_dashboard_sso_api.hocon index a7ce1f97e..b65142366 100644 --- a/rel/i18n/emqx_dashboard_sso_api.hocon +++ b/rel/i18n/emqx_dashboard_sso_api.hocon @@ -51,4 +51,16 @@ backend_name.desc: backend_name.label: """Backend Name""" +running.desc: +"""Is the backend running""" + +running.label: +"""Running""" + +last_error.desc: +"""Last error of this backend""" + +last_error.label: +"""Last Error""" + }