feat: don't reset tnx_id when cluster_fix
This commit is contained in:
parent
298211d101
commit
f490a0cba2
|
@ -34,7 +34,8 @@
|
||||||
on_mria_stop/1,
|
on_mria_stop/1,
|
||||||
force_leave_clean/1,
|
force_leave_clean/1,
|
||||||
wait_for_cluster_rpc/0,
|
wait_for_cluster_rpc/0,
|
||||||
maybe_init_tnx_id/2
|
maybe_init_tnx_id/2,
|
||||||
|
update_mfa/3
|
||||||
]).
|
]).
|
||||||
-export([
|
-export([
|
||||||
commit/2,
|
commit/2,
|
||||||
|
@ -42,6 +43,7 @@
|
||||||
get_cluster_tnx_id/0,
|
get_cluster_tnx_id/0,
|
||||||
get_node_tnx_id/1,
|
get_node_tnx_id/1,
|
||||||
init_mfa/2,
|
init_mfa/2,
|
||||||
|
update_mfa_in_trans/3,
|
||||||
latest_tnx_id/0,
|
latest_tnx_id/0,
|
||||||
make_initiate_call_req/3,
|
make_initiate_call_req/3,
|
||||||
read_next_mfa/1,
|
read_next_mfa/1,
|
||||||
|
@ -546,6 +548,36 @@ init_mfa(Node, MFA) ->
|
||||||
{retry, Meta}
|
{retry, Meta}
|
||||||
end.
|
end.
|
||||||
|
|
||||||
|
update_mfa_in_trans(Node, MFA, NodeTnxId) ->
|
||||||
|
mnesia:write_lock_table(?CLUSTER_MFA),
|
||||||
|
case get_node_tnx_id(Node) of
|
||||||
|
NodeTnxId ->
|
||||||
|
TnxId = NodeTnxId + 1,
|
||||||
|
MFARec = #cluster_rpc_mfa{
|
||||||
|
tnx_id = TnxId,
|
||||||
|
mfa = MFA,
|
||||||
|
initiator = Node,
|
||||||
|
created_at = erlang:localtime()
|
||||||
|
},
|
||||||
|
ok = mnesia:write(?CLUSTER_MFA, MFARec, write),
|
||||||
|
lists:foreach(
|
||||||
|
fun(N) ->
|
||||||
|
ok = emqx_cluster_rpc:commit(N, NodeTnxId)
|
||||||
|
end,
|
||||||
|
mria:running_nodes()
|
||||||
|
);
|
||||||
|
NewTnxId ->
|
||||||
|
Fmt = "someone_has_already_updated,tnx_id(~w) is not the latest(~w)",
|
||||||
|
Reason = emqx_utils:format(Fmt, [NodeTnxId, NewTnxId]),
|
||||||
|
mnesia:abort({error, Reason})
|
||||||
|
end.
|
||||||
|
|
||||||
|
update_mfa(Node, MFA, LatestId) ->
|
||||||
|
case transaction(fun ?MODULE:update_mfa_in_trans/3, [Node, MFA, LatestId]) of
|
||||||
|
{atomic, ok} -> ok;
|
||||||
|
{aborted, Error} -> Error
|
||||||
|
end.
|
||||||
|
|
||||||
transaction(Func, Args) ->
|
transaction(Func, Args) ->
|
||||||
mria:transaction(?CLUSTER_RPC_SHARD, Func, Args).
|
mria:transaction(?CLUSTER_RPC_SHARD, Func, Args).
|
||||||
|
|
||||||
|
@ -574,13 +606,21 @@ trans_status() ->
|
||||||
[],
|
[],
|
||||||
?CLUSTER_COMMIT
|
?CLUSTER_COMMIT
|
||||||
),
|
),
|
||||||
|
Nodes = mria:running_nodes(),
|
||||||
|
IndexNodes = lists:zip(Nodes, lists:seq(1, length(Nodes))),
|
||||||
lists:sort(
|
lists:sort(
|
||||||
fun(#{node := NA, tnx_id := IdA}, #{node := NB, tnx_id := IdB}) ->
|
fun(#{node := NA, tnx_id := IdA}, #{node := NB, tnx_id := IdB}) ->
|
||||||
{IdA, NA} > {IdB, NB}
|
{IdA, index_nodes(NA, IndexNodes)} > {IdB, index_nodes(NB, IndexNodes)}
|
||||||
end,
|
end,
|
||||||
List
|
List
|
||||||
).
|
).
|
||||||
|
|
||||||
|
index_nodes(Node, IndexNodes) ->
|
||||||
|
case lists:keyfind(Node, 1, IndexNodes) of
|
||||||
|
false -> 0;
|
||||||
|
{_, Index} -> Index
|
||||||
|
end.
|
||||||
|
|
||||||
trans_query(TnxId) ->
|
trans_query(TnxId) ->
|
||||||
case mnesia:read(?CLUSTER_MFA, TnxId) of
|
case mnesia:read(?CLUSTER_MFA, TnxId) of
|
||||||
[] ->
|
[] ->
|
||||||
|
|
|
@ -25,7 +25,8 @@
|
||||||
admins/1,
|
admins/1,
|
||||||
conf/1,
|
conf/1,
|
||||||
audit/3,
|
audit/3,
|
||||||
unload/0
|
unload/0,
|
||||||
|
mark_fix_log/1
|
||||||
]).
|
]).
|
||||||
|
|
||||||
-export([keys/0, get_config/0, get_config/1, load_config/2]).
|
-export([keys/0, get_config/0, get_config/1, load_config/2]).
|
||||||
|
@ -104,15 +105,8 @@ admins(["fix"]) ->
|
||||||
StoppedNodes =/= [] andalso
|
StoppedNodes =/= [] andalso
|
||||||
emqx_ctl:warning("Found stopped nodes: ~p~n", [StoppedNodes]);
|
emqx_ctl:warning("Found stopped nodes: ~p~n", [StoppedNodes]);
|
||||||
Role ->
|
Role ->
|
||||||
Core =
|
Leader = emqx_cluster_rpc:find_leader(),
|
||||||
case find_highest_node(Status) of
|
emqx_ctl:print("Run fix command on ~p(core) node, but current is ~p~n", [Leader, Role])
|
||||||
{same_tnx_id, _TnxId} ->
|
|
||||||
{ok, Node} = mria_status:upstream_node(?CLUSTER_RPC_SHARD),
|
|
||||||
Node;
|
|
||||||
{ok, Node} ->
|
|
||||||
Node
|
|
||||||
end,
|
|
||||||
emqx_ctl:print("Run fix command on ~p(core) node, but current is ~p~n", [Core, Role])
|
|
||||||
end;
|
end;
|
||||||
admins(["fast_forward"]) ->
|
admins(["fast_forward"]) ->
|
||||||
status(),
|
status(),
|
||||||
|
@ -135,7 +129,7 @@ admins(["fast_forward", Node0, ToTnxId]) ->
|
||||||
admins(_) ->
|
admins(_) ->
|
||||||
emqx_ctl:usage(usage_sync()).
|
emqx_ctl:usage(usage_sync()).
|
||||||
|
|
||||||
fix_lagging_with_raw(Node, Keys) ->
|
fix_lagging_with_raw(ToTnxId, Node, Keys) ->
|
||||||
Confs = lists:foldl(
|
Confs = lists:foldl(
|
||||||
fun(Key, Acc) ->
|
fun(Key, Acc) ->
|
||||||
KeyRaw = atom_to_binary(Key),
|
KeyRaw = atom_to_binary(Key),
|
||||||
|
@ -144,12 +138,25 @@ fix_lagging_with_raw(Node, Keys) ->
|
||||||
#{},
|
#{},
|
||||||
Keys
|
Keys
|
||||||
),
|
),
|
||||||
ok = emqx_cluster_rpc:reset(),
|
case mark_fix_begin(Node, ToTnxId) of
|
||||||
case load_config_from_raw(Confs, #{mode => replace}) of
|
ok ->
|
||||||
ok -> waiting_for_fix_finish();
|
case load_config_from_raw(Confs, #{mode => replace}) of
|
||||||
Error -> Error
|
ok -> waiting_for_fix_finish();
|
||||||
|
Error0 -> Error0
|
||||||
|
end;
|
||||||
|
{error, Reason} ->
|
||||||
|
emqx_ctl:warning("mark fix begin failed: ~s~n", [Reason])
|
||||||
end.
|
end.
|
||||||
|
|
||||||
|
mark_fix_begin(Node, TnxId) ->
|
||||||
|
{atomic, Status} = emqx_cluster_rpc:status(),
|
||||||
|
MFA = {?MODULE, mark_fix_log, [Status]},
|
||||||
|
emqx_cluster_rpc:update_mfa(Node, MFA, TnxId).
|
||||||
|
|
||||||
|
mark_fix_log(Status) ->
|
||||||
|
?SLOG(warning, #{msg => cluster_fix_log, status => Status}),
|
||||||
|
ok.
|
||||||
|
|
||||||
audit(Level, From, Log) ->
|
audit(Level, From, Log) ->
|
||||||
?AUDIT(Level, redact(Log#{from => From})).
|
?AUDIT(Level, redact(Log#{from => From})).
|
||||||
|
|
||||||
|
@ -213,25 +220,25 @@ maybe_fix_lagging(Status, #{fix := Fix}) ->
|
||||||
%% because the raw conf is hard to be compared. (e.g, 1000ms vs 1s)
|
%% because the raw conf is hard to be compared. (e.g, 1000ms vs 1s)
|
||||||
AllConfs = find_running_confs(),
|
AllConfs = find_running_confs(),
|
||||||
case find_lagging(Status, AllConfs) of
|
case find_lagging(Status, AllConfs) of
|
||||||
{inconsistent_tnx_id_key, Target, InconsistentKeys} when Fix ->
|
{inconsistent_tnx_id_key, ToTnxId, Target, InconsistentKeys} when Fix ->
|
||||||
_ = fix_lagging_with_raw(Target, InconsistentKeys),
|
_ = fix_lagging_with_raw(ToTnxId, Target, InconsistentKeys),
|
||||||
ok;
|
ok;
|
||||||
{inconsistent_tnx_id_key, Target, InconsistentKeys} ->
|
{inconsistent_tnx_id_key, _ToTnxId, Target, InconsistentKeys} ->
|
||||||
emqx_ctl:warning("Inconsistent keys: ~p~n", [InconsistentKeys]),
|
emqx_ctl:warning("Inconsistent keys: ~p~n", [InconsistentKeys]),
|
||||||
print_inconsistent_conf(InconsistentKeys, Target, Status, AllConfs);
|
print_inconsistent_conf(InconsistentKeys, Target, Status, AllConfs);
|
||||||
inconsistent_tnx_id when Fix ->
|
{inconsistent_tnx_id, Target, ToTnxId} when Fix ->
|
||||||
print_tnx_id_status(Status),
|
print_tnx_id_status(Status),
|
||||||
ok = emqx_cluster_rpc:reset(),
|
ok = mark_fix_begin(Target, ToTnxId),
|
||||||
emqx_ctl:print("Reset tnxid to 0 successfully~n");
|
emqx_ctl:print("Forward tnxid to ~w successfully~n", [ToTnxId + 1]);
|
||||||
inconsistent_tnx_id ->
|
{inconsistent_tnx_id, _Target, _ToTnxId} ->
|
||||||
print_tnx_id_status(Status),
|
print_tnx_id_status(Status),
|
||||||
Leader = emqx_cluster_rpc:find_leader(),
|
Leader = emqx_cluster_rpc:find_leader(),
|
||||||
emqx_ctl:print(?SUGGESTION(Leader));
|
emqx_ctl:print(?SUGGESTION(Leader));
|
||||||
{inconsistent_key, TnxId, InconsistentKeys} ->
|
{inconsistent_key, ToTnxId, InconsistentKeys} ->
|
||||||
[{Target, _} | _] = AllConfs,
|
[{Target, _} | _] = AllConfs,
|
||||||
print_inconsistent_conf(InconsistentKeys, Target, Status, AllConfs),
|
print_inconsistent_conf(InconsistentKeys, Target, Status, AllConfs),
|
||||||
emqx_ctl:warning("All configuration synchronized(tnx_id=~w)~n", [
|
emqx_ctl:warning("All configuration synchronized(tnx_id=~w)~n", [
|
||||||
TnxId
|
ToTnxId
|
||||||
]),
|
]),
|
||||||
emqx_ctl:warning(
|
emqx_ctl:warning(
|
||||||
"but inconsistent keys were found: ~p, which come from environment variables or etc/emqx.conf.~n",
|
"but inconsistent keys were found: ~p, which come from environment variables or etc/emqx.conf.~n",
|
||||||
|
@ -693,11 +700,11 @@ find_lagging(Status, AllConfs) ->
|
||||||
InconsistentKeys ->
|
InconsistentKeys ->
|
||||||
{inconsistent_key, TnxId, InconsistentKeys}
|
{inconsistent_key, TnxId, InconsistentKeys}
|
||||||
end;
|
end;
|
||||||
{ok, Target} ->
|
{ok, TargetId, Target} ->
|
||||||
{value, {_, TargetConf}, OtherConfs} = lists:keytake(Target, 1, AllConfs),
|
{value, {_, TargetConf}, OtherConfs} = lists:keytake(Target, 1, AllConfs),
|
||||||
case find_inconsistent_key(TargetConf, OtherConfs) of
|
case find_inconsistent_key(TargetConf, OtherConfs) of
|
||||||
[] -> inconsistent_tnx_id;
|
[] -> {inconsistent_tnx_id, TargetId, Target};
|
||||||
ChangedKeys -> {inconsistent_tnx_id_key, Target, ChangedKeys}
|
ChangedKeys -> {inconsistent_tnx_id_key, TargetId, Target, ChangedKeys}
|
||||||
end
|
end
|
||||||
end.
|
end.
|
||||||
|
|
||||||
|
@ -722,8 +729,8 @@ find_highest_node(Status) ->
|
||||||
case lists:usort(fun({A, _}, {B, _}) -> A >= B end, Ids) of
|
case lists:usort(fun({A, _}, {B, _}) -> A >= B end, Ids) of
|
||||||
[{TnxId, _}] ->
|
[{TnxId, _}] ->
|
||||||
{same_tnx_id, TnxId};
|
{same_tnx_id, TnxId};
|
||||||
[{_TnxId, Target} | _] ->
|
[{TnxId, Target} | _] ->
|
||||||
{ok, Target}
|
{ok, TnxId, Target}
|
||||||
end.
|
end.
|
||||||
|
|
||||||
changed(K, V, Conf) ->
|
changed(K, V, Conf) ->
|
||||||
|
@ -870,7 +877,7 @@ find_inconsistent_test() ->
|
||||||
{node2, #{<<"mqtt">> => TargetMqtt1, <<"stats">> => Stats}}
|
{node2, #{<<"mqtt">> => TargetMqtt1, <<"stats">> => Stats}}
|
||||||
],
|
],
|
||||||
?assertEqual(
|
?assertEqual(
|
||||||
{inconsistent_tnx_id_key, node2, [<<"mqtt">>]}, find_lagging(Status, Confs0)
|
{inconsistent_tnx_id_key, 3, node2, [<<"mqtt">>]}, find_lagging(Status, Confs0)
|
||||||
),
|
),
|
||||||
|
|
||||||
%% conf is the same, no changed
|
%% conf is the same, no changed
|
||||||
|
@ -879,7 +886,7 @@ find_inconsistent_test() ->
|
||||||
{node2, #{<<"mqtt">> => Mqtt, <<"stats">> => Stats}},
|
{node2, #{<<"mqtt">> => Mqtt, <<"stats">> => Stats}},
|
||||||
{node3, #{<<"mqtt">> => Mqtt, <<"stats">> => Stats}}
|
{node3, #{<<"mqtt">> => Mqtt, <<"stats">> => Stats}}
|
||||||
],
|
],
|
||||||
?assertEqual(inconsistent_tnx_id, find_lagging(Status, NoDiffConfs)),
|
?assertEqual({inconsistent_tnx_id, 3, node2}, find_lagging(Status, NoDiffConfs)),
|
||||||
|
|
||||||
%% same tnx_id
|
%% same tnx_id
|
||||||
SameStatus = [
|
SameStatus = [
|
||||||
|
|
Loading…
Reference in New Issue