From c6085a6ab04795571c9ded38fad49cb834b9dc72 Mon Sep 17 00:00:00 2001 From: Andrew Mayorov Date: Tue, 26 Mar 2024 18:13:30 +0100 Subject: [PATCH] wip(dsrepl): avoid contacting offline nodes prematurely --- .../src/emqx_ds_replication_layer_shard.erl | 22 +++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/apps/emqx_durable_storage/src/emqx_ds_replication_layer_shard.erl b/apps/emqx_durable_storage/src/emqx_ds_replication_layer_shard.erl index 983ce54e2..bf0951407 100644 --- a/apps/emqx_durable_storage/src/emqx_ds_replication_layer_shard.erl +++ b/apps/emqx_durable_storage/src/emqx_ds_replication_layer_shard.erl @@ -82,7 +82,7 @@ get_servers_leader_preferred(DB, Shard) -> [Leader | lists:delete(Leader, Servers)]; undefined -> %% TODO: Dynamic membership. - get_shard_servers(DB, Shard) + get_online_servers(DB, Shard) end. get_server_local_preferred(DB, Shard) -> @@ -97,9 +97,27 @@ get_server_local_preferred(DB, Shard) -> %% Leader is unkonwn if there are no servers of this group on the %% local node. We want to pick a replica in that case as well. %% TODO: Dynamic membership. - pick_random(get_shard_servers(DB, Shard)) + pick_random(get_online_servers(DB, Shard)) end. +get_online_servers(DB, Shard) -> + filter_online(get_shard_servers(DB, Shard)). + +filter_online(Servers) -> + case lists:filter(fun is_server_online/1, Servers) of + [] -> + %% FIXME + Servers; + Online -> + Online + end. + +is_server_online({_Name, Node}) -> + is_node_online(Node). + +is_node_online(Node) -> + Node == node() orelse lists:member(Node, nodes()). + pick_local(Servers) -> case lists:dropwhile(fun({_Name, Node}) -> Node =/= node() end, Servers) of [Local | _] ->