fix(sso): Handle backend update timeout and fix create errors

1. correctly handle the timeout when call update on a backend
2. fix that config update always returns success
3. do not ignore start failures and ensure start is atomic
This commit is contained in:
firest 2023-09-26 16:18:25 +08:00
parent 8cc626d33f
commit 403714d44e
4 changed files with 132 additions and 31 deletions

View File

@ -16,7 +16,7 @@
login/3
]).
-export([types/0, modules/0, provider/1, backends/0]).
-export([types/0, modules/0, provider/1, backends/0, format/1]).
%%------------------------------------------------------------------------------
%% Callbacks
@ -83,3 +83,23 @@ backends() ->
ldap => emqx_dashboard_sso_ldap,
saml => emqx_dashboard_sso_saml
}.
format(Args) ->
lists:foldl(fun combine/2, <<>>, Args).
combine(Arg, Bin) when is_binary(Arg) ->
<<Bin/binary, Arg/binary>>;
combine(Arg, Bin) when is_list(Arg) ->
case io_lib:printable_unicode_list(Arg) of
true ->
ArgBin = unicode:characters_to_binary(Arg),
<<Bin/binary, ArgBin/binary>>;
_ ->
generic_combine(Arg, Bin)
end;
combine(Arg, Bin) ->
generic_combine(Arg, Bin).
generic_combine(Arg, Bin) ->
Str = io_lib:format("~0p", [Arg]),
erlang:iolist_to_binary([Bin, Str]).

View File

@ -267,8 +267,10 @@ handle_backend_update_result({error, already_exists}, _) ->
{400, #{code => ?BAD_REQUEST, message => <<"Backend already exists">>}};
handle_backend_update_result({error, failed_to_load_metadata}, _) ->
{400, #{code => ?BAD_REQUEST, message => <<"Failed to load metadata">>}};
handle_backend_update_result({error, Reason}, _) when is_binary(Reason) ->
{400, #{code => ?BAD_REQUEST, message => Reason}};
handle_backend_update_result({error, Reason}, _) ->
{400, #{code => ?BAD_REQUEST, message => Reason}}.
{400, #{code => ?BAD_REQUEST, message => emqx_dashboard_sso:format(["Reason: ", Reason])}}.
to_json(Data) ->
emqx_utils_maps:jsonable_map(

View File

@ -41,14 +41,16 @@
-import(emqx_dashboard_sso, [provider/1]).
-define(MOD_TAB, emqx_dashboard_sso).
-define(MOD_KEY_PATH, [dashboard, sso]).
-define(CALL_TIMEOUT, timer:seconds(10)).
-define(MOD_KEY_PATH(Sub), [dashboard, sso, Sub]).
-define(RESOURCE_GROUP, <<"emqx_dashboard_sso">>).
-define(DEFAULT_RESOURCE_OPTS, #{
start_after_created => false
}).
-record(dashboard_sso, {
-record(?MOD_TAB, {
backend :: atom(),
state :: map()
}).
@ -77,9 +79,9 @@ delete(Backend) ->
update_config(Backend, {?FUNCTION_NAME, Backend}).
lookup_state(Backend) ->
case ets:lookup(dashboard_sso, Backend) of
case ets:lookup(?MOD_TAB, Backend) of
[Data] ->
Data#dashboard_sso.state;
Data#?MOD_TAB.state;
[] ->
undefined
end.
@ -96,7 +98,7 @@ create_resource(ResourceId, Module, Config) ->
Config,
?DEFAULT_RESOURCE_OPTS
),
start_resource_if_enabled(ResourceId, Result, Config).
start_resource_if_enabled(ResourceId, Result, Config, fun clean_when_start_failed/1).
update_resource(ResourceId, Module, Config) ->
Result = emqx_resource:recreate_local(
@ -105,7 +107,12 @@ update_resource(ResourceId, Module, Config) ->
start_resource_if_enabled(ResourceId, Result, Config).
call(Req) ->
gen_server:call(?MODULE, Req).
try
gen_server:call(?MODULE, Req, ?CALL_TIMEOUT)
catch
exit:{timeout, _} ->
{error, <<"Update backend failed: timeout">>}
end.
%%------------------------------------------------------------------------------
%% gen_server callbacks
@ -114,12 +121,12 @@ init([]) ->
process_flag(trap_exit, true),
add_handler(),
emqx_utils_ets:new(
dashboard_sso,
?MOD_TAB,
[
set,
ordered_set,
public,
named_table,
{keypos, #dashboard_sso.backend},
{keypos, #?MOD_TAB.backend},
{read_concurrency, true}
]
),
@ -160,13 +167,13 @@ start_backend_services() ->
case emqx_dashboard_sso:create(Provider, Config) of
{ok, State} ->
?SLOG(info, #{
msg => "Start SSO backend successfully",
msg => "start_sso_backend_successfully",
backend => Backend
}),
ets:insert(dashboard_sso, #dashboard_sso{backend = Backend, state = State});
ets:insert(?MOD_TAB, #?MOD_TAB{backend = Backend, state = State});
{error, Reason} ->
?SLOG(error, #{
msg => "Start SSO backend failed",
msg => "start_sso_backend_failed",
backend => Backend,
reason => Reason
})
@ -180,18 +187,24 @@ update_config(Backend, UpdateReq) ->
{ok, UpdateResult} ->
#{post_config_update := #{?MODULE := Result}} = UpdateResult,
?SLOG(info, #{
msg => "Update SSO configuration successfully",
msg => "update_sso_successfully",
backend => Backend,
result => Result
}),
Result;
{error, Reason} = Error ->
{error, Reason} ->
?SLOG(error, #{
msg => "Update SSO configuration failed",
msg => "update_sso_failed",
backend => Backend,
reason => Reason
}),
Error
{error,
case Reason of
{_Stage, _Mod, Reason2} ->
Reason2;
_ ->
Reason
end}
end.
pre_config_update(_, {update, _Backend, Config}, _OldConf) ->
@ -202,8 +215,7 @@ pre_config_update(_, {delete, _Backend}, _OldConf) ->
{ok, null}.
post_config_update(_, UpdateReq, NewConf, _OldConf, _AppEnvs) ->
Result = call({update_config, UpdateReq, NewConf}),
{ok, Result}.
call({update_config, UpdateReq, NewConf}).
propagated_post_config_update(
?MOD_KEY_PATH(BackendBin) = Path, _UpdateReq, undefined, OldConf, AppEnvs
@ -231,14 +243,14 @@ on_config_update({update, Backend, _RawConfig}, Config) ->
on_backend_updated(
emqx_dashboard_sso:create(Provider, Config),
fun(State) ->
ets:insert(dashboard_sso, #dashboard_sso{backend = Backend, state = State})
ets:insert(?MOD_TAB, #?MOD_TAB{backend = Backend, state = State})
end
);
Data ->
on_backend_updated(
emqx_dashboard_sso:update(Provider, Config, Data#dashboard_sso.state),
emqx_dashboard_sso:update(Provider, Config, Data#?MOD_TAB.state),
fun(State) ->
ets:insert(dashboard_sso, Data#dashboard_sso{state = State})
ets:insert(?MOD_TAB, Data#?MOD_TAB{state = State})
end
)
end;
@ -249,33 +261,61 @@ on_config_update({delete, Backend}, _NewConf) ->
Data ->
Provider = provider(Backend),
on_backend_updated(
emqx_dashboard_sso:destroy(Provider, Data#dashboard_sso.state),
emqx_dashboard_sso:destroy(Provider, Data#?MOD_TAB.state),
fun() ->
ets:delete(dashboard_sso, Backend)
ets:delete(?MOD_TAB, Backend)
end
)
end.
lookup(Backend) ->
case ets:lookup(dashboard_sso, Backend) of
case ets:lookup(?MOD_TAB, Backend) of
[Data] ->
Data;
[] ->
undefined
end.
start_resource_if_enabled(ResourceId, {ok, _} = Result, #{enable := true}) ->
_ = emqx_resource:start(ResourceId),
Result;
start_resource_if_enabled(_ResourceId, Result, _Config) ->
start_resource_if_enabled(ResourceId, Result, Config) ->
start_resource_if_enabled(ResourceId, Result, Config, undefined).
start_resource_if_enabled(
ResourceId, {ok, _} = Result, #{enable := true}, CleanWhenStartFailed
) ->
case emqx_resource:start(ResourceId) of
ok ->
Result;
{error, Reason} ->
SafeReason = emqx_utils:redact(Reason),
?SLOG(error, #{
msg => "start_backend_failed",
resource_id => ResourceId,
reason => SafeReason
}),
erlang:is_function(CleanWhenStartFailed) andalso
CleanWhenStartFailed(ResourceId),
{error, emqx_dashboard_sso:format(["Start backend failed, Reason: ", SafeReason])}
end;
start_resource_if_enabled(_ResourceId, Result, _Config, _) ->
Result.
%% ensure the backend creation is atomic, clean the corresponding resource when necessary,
%% when creating a backend fails, nothing will be inserted into the SSO table,
%% thus the resources created by backend will leakage.
%% Although we can treat start failure as successful,
%% and insert the resource data into the SSO table,
%% it may be strange for users: it succeeds, but can't be used.
clean_when_start_failed(ResourceId) ->
_ = emqx_resource:remove_local(ResourceId),
ok.
%% this first level `ok` is for emqx_config_handler, and the second level is for the caller
on_backend_updated({ok, State} = Ok, Fun) ->
Fun(State),
Ok;
{ok, Ok};
on_backend_updated(ok, Fun) ->
Fun(),
ok;
{ok, ok};
on_backend_updated(Error, _) ->
Error.

View File

@ -8,16 +8,23 @@
-compile(export_all).
-include_lib("emqx_dashboard/include/emqx_dashboard.hrl").
-include_lib("snabbkaffe/include/snabbkaffe.hrl").
-include_lib("eunit/include/eunit.hrl").
-define(LDAP_HOST, "ldap").
-define(LDAP_DEFAULT_PORT, 389).
-define(LDAP_USER, <<"mqttuser0001">>).
-define(LDAP_USER_PASSWORD, <<"mqttuser0001">>).
-define(MOD_TAB, emqx_dashboard_sso).
-define(MOD_KEY_PATH, [dashboard, sso, ldap]).
-define(RESOURCE_GROUP, <<"emqx_dashboard_sso">>).
-import(emqx_mgmt_api_test_util, [request/2, request/3, uri/1, request_api/3]).
all() ->
[
t_create_atomicly,
t_create,
t_update,
t_get,
@ -53,11 +60,43 @@ end_per_testcase(Case, _) ->
end,
ok.
t_create_atomicly(_) ->
Path = uri(["sso", "ldap"]),
?assertMatch(
{ok, 400, _},
request(
put,
Path,
ldap_config(#{
<<"username">> => <<"invalid">>,
<<"enable">> => true,
<<"request_timeout">> => <<"1s">>
})
)
),
?assertEqual(undefined, emqx:get_config(?MOD_KEY_PATH, undefined)),
?assertEqual([], ets:tab2list(?MOD_TAB)),
?retry(
_Interval = 1000,
_NAttempts = 5,
?assertEqual([], emqx_resource_manager:list_group(?RESOURCE_GROUP))
),
ok.
t_create(_) ->
check_running([]),
Path = uri(["sso", "ldap"]),
{ok, 200, Result} = request(put, Path, ldap_config()),
check_running([]),
?assertMatch(#{backend := ldap}, emqx:get_config(?MOD_KEY_PATH, undefined)),
?assertMatch([_], ets:tab2list(?MOD_TAB)),
?retry(
_Interval = 500,
_NAttempts = 10,
?assertMatch([_], emqx_resource_manager:list_group(?RESOURCE_GROUP))
),
?assertMatch(#{backend := <<"ldap">>, enable := false}, decode_json(Result)),
?assertMatch([#{backend := <<"ldap">>, enable := false}], get_sso()),
?assertNotEqual(undefined, emqx_dashboard_sso_manager:lookup_state(ldap)),