Merge pull request #9911 from terry-xiaoyu/add_retriers_for_rules-v4.4
Add retriers for rules v4.4
This commit is contained in:
commit
a1911f3ee0
|
@ -3,16 +3,19 @@
|
|||
{VSN,
|
||||
[{<<"4\\.4\\.1[3-4]">>,
|
||||
[{load_module,emqx_rule_registry,brutal_purge,soft_purge,[]},
|
||||
{load_module,emqx_rule_monitor,brutal_purge,soft_purge,[]},
|
||||
{load_module,emqx_rule_engine,brutal_purge,soft_purge,[]}
|
||||
]},
|
||||
{"4.4.12",
|
||||
[{load_module,emqx_rule_registry,brutal_purge,soft_purge,[]},
|
||||
{load_module,emqx_rule_monitor,brutal_purge,soft_purge,[]},
|
||||
{load_module,emqx_rule_engine,brutal_purge,soft_purge,[]},
|
||||
{load_module,emqx_rule_engine_jwt_worker,brutal_purge,soft_purge,[]},
|
||||
{update,emqx_rule_engine_jwt_sup,supervisor},
|
||||
{load_module,emqx_rule_engine_jwt,brutal_purge,soft_purge,[]}]},
|
||||
{"4.4.11",
|
||||
[{load_module,emqx_rule_engine_jwt_worker,brutal_purge,soft_purge,[]},
|
||||
{load_module,emqx_rule_monitor,brutal_purge,soft_purge,[]},
|
||||
{update,emqx_rule_engine_jwt_sup,supervisor},
|
||||
{load_module,emqx_rule_engine_jwt,brutal_purge,soft_purge,[]},
|
||||
{load_module,emqx_rule_registry,brutal_purge,soft_purge,[]},
|
||||
|
@ -228,16 +231,19 @@
|
|||
{<<".*">>,[]}],
|
||||
[{<<"4\\.4\\.1[3-4]">>,
|
||||
[{load_module,emqx_rule_registry,brutal_purge,soft_purge,[]},
|
||||
{load_module,emqx_rule_monitor,brutal_purge,soft_purge,[]},
|
||||
{load_module,emqx_rule_engine,brutal_purge,soft_purge,[]}
|
||||
]},
|
||||
{"4.4.12",
|
||||
[{load_module,emqx_rule_registry,brutal_purge,soft_purge,[]},
|
||||
{load_module,emqx_rule_monitor,brutal_purge,soft_purge,[]},
|
||||
{load_module,emqx_rule_engine,brutal_purge,soft_purge,[]},
|
||||
{load_module,emqx_rule_engine_jwt_worker,brutal_purge,soft_purge,[]},
|
||||
{update,emqx_rule_engine_jwt_sup,supervisor},
|
||||
{load_module,emqx_rule_engine_jwt,brutal_purge,soft_purge,[]}]},
|
||||
{"4.4.11",
|
||||
[{load_module,emqx_rule_engine_jwt_worker,brutal_purge,soft_purge,[]},
|
||||
{load_module,emqx_rule_monitor,brutal_purge,soft_purge,[]},
|
||||
{update,emqx_rule_engine_jwt_sup,supervisor},
|
||||
{load_module,emqx_rule_engine_jwt,brutal_purge,soft_purge,[]},
|
||||
{load_module,emqx_rule_registry,brutal_purge,soft_purge,[]},
|
||||
|
|
|
@ -529,19 +529,27 @@ refresh_resource(#resource{id = ResId, type = Type, config = Config}) ->
|
|||
refresh_rules_when_boot() ->
|
||||
lists:foreach(fun
|
||||
(#rule{enabled = true} = Rule) ->
|
||||
try refresh_rule(Rule)
|
||||
catch _:_ ->
|
||||
%% We set the enable = false when rule init failed to avoid bad rules running
|
||||
%% without actions created properly.
|
||||
%% The init failure might be caused by a disconnected resource, in this case the
|
||||
%% actions can not be created, so the rules won't work.
|
||||
%% After the user fixed the problem he can enable it manually,
|
||||
%% doing so will also recreate the actions.
|
||||
emqx_rule_registry:add_rule(Rule#rule{enabled = false, state = refresh_failed_at_bootup})
|
||||
end;
|
||||
(_) -> ok
|
||||
ensure_rule_retrier(Rule);
|
||||
(#rule{enabled = false, state = refresh_failed_at_bootup} = Rule) ->
|
||||
%% the rule was previously disabled by emqx so we need to retry it
|
||||
ensure_rule_retrier(Rule);
|
||||
(#rule{enabled = false, id = RuleId}) ->
|
||||
?LOG(warning, "rule ~s was disabled by the user, won't re-enable it", [RuleId])
|
||||
end, emqx_rule_registry:get_rules()).
|
||||
|
||||
ensure_rule_retrier(#rule{id = RuleId} = Rule) ->
|
||||
try refresh_rule(Rule)
|
||||
catch _:_ ->
|
||||
%% We set the enable = false when rule init failed to avoid bad rules running
|
||||
%% without actions created properly.
|
||||
%% The init failure might be caused by a disconnected resource, in this case the
|
||||
%% actions can not be created, so the rules won't work.
|
||||
%% After the user fixed the problem he can enable it manually,
|
||||
%% doing so will also recreate the actions.
|
||||
emqx_rule_registry:add_rule(Rule#rule{enabled = false, state = refresh_failed_at_bootup}),
|
||||
emqx_rule_monitor:ensure_rule_retrier(RuleId)
|
||||
end.
|
||||
|
||||
refresh_rule(#rule{id = RuleId, for = Topics, actions = Actions}) ->
|
||||
ok = emqx_rule_metrics:create_rule_metrics(RuleId),
|
||||
lists:foreach(fun emqx_rule_events:load/1, Topics),
|
||||
|
|
|
@ -33,16 +33,21 @@
|
|||
, stop/0
|
||||
, async_refresh_resources_rules/0
|
||||
, ensure_resource_retrier/1
|
||||
, ensure_rule_retrier/1
|
||||
, retry_loop/2
|
||||
, retry_loop/3
|
||||
]).
|
||||
|
||||
%% fot test
|
||||
-export([ put_retry_interval/1
|
||||
, get_retry_interval/0
|
||||
, erase_retry_interval/0
|
||||
-export([ put_resource_retry_interval/1
|
||||
, put_rule_retry_interval/1
|
||||
, get_resource_retry_interval/0
|
||||
, get_rule_retry_interval/0
|
||||
, erase_resource_retry_interval/0
|
||||
, erase_rule_retry_interval/0
|
||||
]).
|
||||
|
||||
-define(T_RETRY, 60000).
|
||||
-define(T_RESOURCE_RETRY, 15000).
|
||||
-define(T_RULE_RETRY, 20000).
|
||||
|
||||
start_link() ->
|
||||
gen_server:start_link({local, ?MODULE}, ?MODULE, [], []).
|
||||
|
@ -54,23 +59,33 @@ init([]) ->
|
|||
_ = erlang:process_flag(trap_exit, true),
|
||||
{ok, #{retryers => #{}}}.
|
||||
|
||||
put_retry_interval(I) when is_integer(I) andalso I >= 10 ->
|
||||
put_resource_retry_interval(I) when is_integer(I) andalso I >= 10 ->
|
||||
_ = persistent_term:put({?MODULE, resource_restart_interval}, I),
|
||||
ok.
|
||||
|
||||
erase_retry_interval() ->
|
||||
_ = persistent_term:erase({?MODULE, resource_restart_interval}),
|
||||
put_rule_retry_interval(I) when is_integer(I) andalso I >= 10 ->
|
||||
_ = persistent_term:put({?MODULE, rule_restart_interval}, I),
|
||||
ok.
|
||||
|
||||
get_retry_interval() ->
|
||||
persistent_term:get({?MODULE, resource_restart_interval}, ?T_RETRY).
|
||||
erase_resource_retry_interval() ->
|
||||
_ = persistent_term:erase({?MODULE, resource_restart_interval}),
|
||||
ok.
|
||||
erase_rule_retry_interval() ->
|
||||
_ = persistent_term:erase({?MODULE, rule_restart_interval}),
|
||||
ok.
|
||||
|
||||
get_resource_retry_interval() ->
|
||||
persistent_term:get({?MODULE, resource_restart_interval}, ?T_RESOURCE_RETRY).
|
||||
get_rule_retry_interval() ->
|
||||
persistent_term:get({?MODULE, rule_restart_interval}, ?T_RULE_RETRY).
|
||||
|
||||
async_refresh_resources_rules() ->
|
||||
gen_server:cast(?MODULE, async_refresh).
|
||||
|
||||
ensure_resource_retrier(ResId) ->
|
||||
Interval = get_retry_interval(),
|
||||
gen_server:cast(?MODULE, {create_restart_handler, resource, ResId, Interval}).
|
||||
gen_server:cast(?MODULE, {create_restart_handler, resource, ResId}).
|
||||
|
||||
ensure_rule_retrier(RuleId) ->
|
||||
gen_server:cast(?MODULE, {create_restart_handler, rule, RuleId}).
|
||||
|
||||
handle_call(_Msg, _From, State) ->
|
||||
{reply, ok, State}.
|
||||
|
@ -82,12 +97,12 @@ handle_cast(async_refresh, State) ->
|
|||
Pid = spawn_link(fun do_async_refresh/0),
|
||||
{noreply, State#{boot_refresh_pid => Pid}};
|
||||
|
||||
handle_cast({create_restart_handler, Tag, Obj, Interval}, State) ->
|
||||
handle_cast({create_restart_handler, Tag, Obj}, State) ->
|
||||
Objects = maps:get(Tag, State, #{}),
|
||||
NewState = case maps:find(Obj, Objects) of
|
||||
error ->
|
||||
update_object(Tag, Obj,
|
||||
create_restart_handler(Tag, Obj, Interval), State);
|
||||
create_restart_handler(Tag, Obj), State);
|
||||
{ok, _Pid} ->
|
||||
State
|
||||
end,
|
||||
|
@ -130,13 +145,17 @@ update_object(Tag, Obj, Retryer, State) ->
|
|||
retryers => Retryers#{Retryer => {Tag, Obj}}
|
||||
}.
|
||||
|
||||
create_restart_handler(Tag, Obj, Interval) ->
|
||||
?LOG(info, "starting_a_retry_loop for ~p ~p, with delay interval: ~p", [Tag, Obj, Interval]),
|
||||
create_restart_handler(Tag, Obj) ->
|
||||
?LOG(warning, "starting_a_retry_loop for ~p ~p", [Tag, Obj]),
|
||||
%% spawn a dedicated process to handle the restarting asynchronously
|
||||
spawn_link(?MODULE, retry_loop, [Tag, Obj, Interval]).
|
||||
spawn_link(?MODULE, retry_loop, [Tag, Obj]).
|
||||
|
||||
retry_loop(resource, ResId, Interval) ->
|
||||
timer:sleep(Interval),
|
||||
%% retry_loop/3 is to avoid crashes during relup
|
||||
retry_loop(Tag, ResId, _Interval) ->
|
||||
retry_loop(Tag, ResId).
|
||||
|
||||
retry_loop(resource, ResId) ->
|
||||
timer:sleep(get_resource_retry_interval()),
|
||||
case emqx_rule_registry:find_resource(ResId) of
|
||||
{ok, #resource{type = Type, config = Config}} ->
|
||||
try
|
||||
|
@ -154,10 +173,30 @@ retry_loop(resource, ResId, Interval) ->
|
|||
end,
|
||||
?LOG_SENSITIVE(warning, "init_resource_retry_failed ~p, ~0p", [ResId, LogContext]),
|
||||
%% keep looping
|
||||
?MODULE:retry_loop(resource, ResId, Interval)
|
||||
?MODULE:retry_loop(resource, ResId)
|
||||
end;
|
||||
not_found ->
|
||||
ok
|
||||
end;
|
||||
|
||||
retry_loop(rule, RuleId) ->
|
||||
timer:sleep(get_rule_retry_interval()),
|
||||
case emqx_rule_registry:get_rule(RuleId) of
|
||||
{ok, #rule{enabled = false, state = refresh_failed_at_bootup} = Rule} ->
|
||||
try
|
||||
emqx_rule_engine:refresh_rule(Rule),
|
||||
emqx_rule_registry:add_rule(Rule#rule{enabled = true, state = normal}),
|
||||
?LOG(warning, "rule ~s has been refreshed and re-enabled", [RuleId])
|
||||
catch
|
||||
Err:Reason:ST ->
|
||||
?LOG(warning, "init_rule failed: ~p, ~0p",
|
||||
[{Err, Reason}, ST]),
|
||||
?MODULE:retry_loop(rule, RuleId)
|
||||
end;
|
||||
{ok, #rule{enabled = false, state = State}} when State =/= refresh_failed_at_bootup ->
|
||||
?LOG(warning, "rule ~s was disabled by the user, won't re-enable it", [RuleId]);
|
||||
_ ->
|
||||
ok
|
||||
end.
|
||||
|
||||
do_async_refresh() ->
|
||||
|
@ -171,6 +210,6 @@ refresh_and_enable_rules_of_resource(ResId) ->
|
|||
fun (#rule{id = Id, enabled = false, state = refresh_failed_at_bootup} = Rule) ->
|
||||
emqx_rule_engine:refresh_rule(Rule),
|
||||
emqx_rule_registry:add_rule(Rule#rule{enabled = true, state = normal}),
|
||||
?LOG(info, "rule ~s is refreshed and re-enabled", [Id]);
|
||||
?LOG(warning, "rule ~s is refreshed and re-enabled", [Id]);
|
||||
(_) -> ok
|
||||
end, emqx_rule_registry:find_rules_depends_on_resource(ResId)).
|
||||
|
|
|
@ -48,7 +48,7 @@ end_per_suite(_Config) ->
|
|||
ok.
|
||||
|
||||
init_per_testcase(t_restart_resource, Config) ->
|
||||
emqx_rule_monitor:put_retry_interval(100),
|
||||
emqx_rule_monitor:put_resource_retry_interval(100),
|
||||
Opts = [public, named_table, set, {read_concurrency, true}],
|
||||
_ = ets:new(?RES_PARAMS_TAB, [{keypos, #resource_params.id}|Opts]),
|
||||
ets:new(t_restart_resource, [named_table, public]),
|
||||
|
@ -95,7 +95,7 @@ common_init_per_testcase() ->
|
|||
|
||||
common_end_per_testcases() ->
|
||||
ok = emqx_alarm:stop(),
|
||||
emqx_rule_monitor:erase_retry_interval(),
|
||||
emqx_rule_monitor:erase_resource_retry_interval(),
|
||||
emqx_rule_monitor:stop().
|
||||
|
||||
t_restart_resource(_) ->
|
||||
|
|
|
@ -2,6 +2,8 @@
|
|||
|
||||
## Enhancements
|
||||
|
||||
- Support HAProxy protocol for dashboard API [9803](https://github.com/emqx/emqx/pull/9803).
|
||||
|
||||
- Added Ubuntu 22.04 package release [#9831](https://github.com/emqx/emqx/pull/9831).
|
||||
|
||||
- Improve the integration of the `banned` and the `delayed` feature [#9790](https://github.com/emqx/emqx/pull/9790).
|
||||
|
@ -25,3 +27,7 @@
|
|||
- Avoid crash logs in CoAP gateway when receiving liveness checking packets from Load Balancer [#9869](https://github.com/emqx/emqx/pull/9869).
|
||||
|
||||
- Fix the exclusive topics aren't removed when the session has already been cleaned [#9868](https://github.com/emqx/emqx/pull/9868).
|
||||
|
||||
- fix the emqx reports `{case_clause,{error,closed}}` error log message when websocket connections interrupted [emqx/cowboy#8](https://github.com/emqx/cowboy/pull/8).
|
||||
|
||||
- fix sometimes the rules cannot be enabled automatically after emqx is restarted [#9911](https://github.com/emqx/emqx/pull/9911).
|
||||
|
|
|
@ -2,6 +2,8 @@
|
|||
|
||||
## 增强
|
||||
|
||||
- 现在 dashboard 支持 `HAProxy` 协议了 [9803](https://github.com/emqx/emqx/pull/9803)。
|
||||
|
||||
- 发布 Ubuntu 22.04 安装包 [#9831](https://github.com/emqx/emqx/pull/9831)。
|
||||
|
||||
- 增强 `封禁` 和 `延迟消息` 这两个功能的集成性 [#9790](https://github.com/emqx/emqx/pull/9790)。
|
||||
|
@ -24,3 +26,7 @@
|
|||
- 修复 CoAP 网关在收到负载均衡的心跳检查报文时产生的崩溃日志 [#9869](https://github.com/emqx/emqx/pull/9869)。
|
||||
|
||||
- 修复会话关闭后,其持有的排他订阅主题没有被释放的问题 [#9868](https://github.com/emqx/emqx/pull/9868)。
|
||||
|
||||
- 修复 Websocket 连接中断时日志报 `{case_clause,{error,closed}}` 错误的问题 [emqx/cowboy#8](https://github.com/emqx/cowboy/pull/8)。
|
||||
|
||||
- 修复某些情况下,重启 emqx 后规则无法自动启用的问题 [#9911](https://github.com/emqx/emqx/pull/9911)。
|
||||
|
|
|
@ -1,5 +0,0 @@
|
|||
# v4.4.15
|
||||
|
||||
## Enhancements
|
||||
|
||||
- Support HAProxy protocol for dashboard API [9803](https://github.com/emqx/emqx/pull/9803).
|
|
@ -1,5 +0,0 @@
|
|||
# v4.4.15
|
||||
|
||||
## 增强
|
||||
|
||||
- 现在 dashboard 增加了对 `HAProxy` 协议的支持 [9803](https://github.com/emqx/emqx/pull/9803)。
|
|
@ -46,7 +46,7 @@
|
|||
, {eredis_cluster, {git, "https://github.com/emqx/eredis_cluster", {tag, "0.7.4"}}}
|
||||
, {gproc, {git, "https://github.com/uwiger/gproc", {tag, "0.9.0"}}}
|
||||
, {jiffy, {git, "https://github.com/emqx/jiffy", {tag, "1.0.5"}}}
|
||||
, {cowboy, {git, "https://github.com/emqx/cowboy", {tag, "2.9.0"}}}
|
||||
, {cowboy, {git, "https://github.com/emqx/cowboy", {tag, "2.9.1"}}}
|
||||
, {esockd, {git, "https://github.com/emqx/esockd", {tag, "5.8.8"}}}
|
||||
, {ekka, {git, "https://github.com/emqx/ekka", {tag, "0.8.1.11"}}}
|
||||
, {gen_rpc, {git, "https://github.com/emqx/gen_rpc", {tag, "3.0.1"}}}
|
||||
|
|
Loading…
Reference in New Issue