Merge pull request #9927 from emqx/file-transfer

feat: implement file transfer over mqtt
This commit is contained in:
Andrew Mayorov 2023-05-19 18:35:13 +03:00 committed by GitHub
commit 9aaa0b6fe1
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
116 changed files with 13173 additions and 75 deletions

View File

@ -7,6 +7,7 @@ INFLUXDB_TAG=2.5.0
TDENGINE_TAG=3.0.2.4
DYNAMO_TAG=1.21.0
CASSANDRA_TAG=3.11.6
MINIO_TAG=RELEASE.2023-03-20T20-16-18Z
OPENTS_TAG=9aa7f88
MS_IMAGE_ADDR=mcr.microsoft.com/mssql/server

View File

@ -0,0 +1,21 @@
version: '3.7'
services:
minio:
hostname: minio
image: quay.io/minio/minio:${MINIO_TAG}
command: server --address ":9000" --console-address ":9001" /minio-data
expose:
- "9000"
- "9001"
ports:
- "9000:9000"
- "9001:9001"
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:9000/minio/health/live"]
interval: 30s
timeout: 5s
retries: 3
networks:
emqx_bridge:

View File

@ -0,0 +1,23 @@
version: '3.7'
services:
minio_tls:
hostname: minio-tls
image: quay.io/minio/minio:${MINIO_TAG}
command: server --certs-dir /etc/certs --address ":9100" --console-address ":9101" /minio-data
volumes:
- ./certs/server.crt:/etc/certs/public.crt
- ./certs/server.key:/etc/certs/private.key
expose:
- "9100"
- "9101"
ports:
- "9100:9100"
- "9101:9101"
healthcheck:
test: ["CMD", "curl", "-k", "-f", "https://localhost:9100/minio/health/live"]
interval: 30s
timeout: 5s
retries: 3
networks:
emqx_bridge:

View File

@ -13,19 +13,37 @@ services:
volumes:
- "./toxiproxy.json:/config/toxiproxy.json"
ports:
# Toxiproxy management API
- 8474:8474
# InfluxDB
- 8086:8086
# InfluxDB TLS
- 8087:8087
# SQL Server
- 11433:1433
# MySQL
- 13306:3306
# MySQL TLS
- 13307:3307
# PostgreSQL
- 15432:5432
# PostgreSQL TLS
- 15433:5433
# TDEngine
- 16041:6041
# DynamoDB
- 18000:8000
# RocketMQ
- 19876:9876
# Cassandra
- 19042:9042
# Cassandra TLS
- 19142:9142
# S3
- 19000:19000
# S3 TLS
- 19100:19100
# IOTDB
- 14242:4242
- 28080:18080
command:

View File

@ -131,5 +131,17 @@
"listen": "0.0.0.0:18080",
"upstream": "iotdb:18080",
"enabled": true
},
{
"name": "minio_tcp",
"listen": "0.0.0.0:19000",
"upstream": "minio:9000",
"enabled": true
},
{
"name": "minio_tls",
"listen": "0.0.0.0:19100",
"upstream": "minio-tls:9100",
"enabled": true
}
]

1
.github/CODEOWNERS vendored
View File

@ -8,6 +8,7 @@
/apps/emqx_connector/ @emqx/emqx-review-board @JimMoen
/apps/emqx_dashboard/ @emqx/emqx-review-board @JimMoen @lafirest
/apps/emqx_exhook/ @emqx/emqx-review-board @JimMoen @lafirest
/apps/emqx_ft/ @emqx/emqx-review-board @savonarola @keynslug
/apps/emqx_gateway/ @emqx/emqx-review-board @lafirest
/apps/emqx_management/ @emqx/emqx-review-board @lafirest @sstrigler
/apps/emqx_plugin_libs/ @emqx/emqx-review-board @lafirest

View File

@ -193,6 +193,7 @@ jobs:
INFLUXDB_TAG: "2.5.0"
TDENGINE_TAG: "3.0.2.4"
OPENTS_TAG: "9aa7f88"
MINIO_TAG: "RELEASE.2023-03-20T20-16-18Z"
PROFILE: ${{ matrix.profile }}
CT_COVER_EXPORT_PREFIX: ${{ matrix.profile }}-${{ matrix.otp }}
run: ./scripts/ct/run.sh --ci --app ${{ matrix.app }}

View File

@ -29,3 +29,17 @@
)
)
).
-define(assertInclude(PATTERN, LIST),
?assert(
lists:any(
fun(X__Elem_) ->
case X__Elem_ of
PATTERN -> true;
_ -> false
end
end,
LIST
)
)
).

View File

@ -9,12 +9,16 @@
{emqx_bridge,4}.
{emqx_broker,1}.
{emqx_cm,1}.
{emqx_cm,2}.
{emqx_conf,1}.
{emqx_conf,2}.
{emqx_dashboard,1}.
{emqx_delayed,1}.
{emqx_eviction_agent,1}.
{emqx_exhook,1}.
{emqx_ft_storage_exporter_fs,1}.
{emqx_ft_storage_fs,1}.
{emqx_ft_storage_fs_reader,1}.
{emqx_gateway_api_listeners,1}.
{emqx_gateway_cm,1}.
{emqx_gateway_http,1}.

View File

@ -717,9 +717,13 @@ do_publish(_PacketId, Msg = #message{qos = ?QOS_0}, Channel) ->
{ok, NChannel};
do_publish(PacketId, Msg = #message{qos = ?QOS_1}, Channel) ->
PubRes = emqx_broker:publish(Msg),
RC = puback_reason_code(PubRes),
NChannel = ensure_quota(PubRes, Channel),
handle_out(puback, {PacketId, RC}, NChannel);
RC = puback_reason_code(PacketId, Msg, PubRes),
case RC of
undefined ->
{ok, Channel};
_Value ->
do_finish_publish(PacketId, PubRes, RC, Channel)
end;
do_publish(
PacketId,
Msg = #message{qos = ?QOS_2},
@ -727,7 +731,7 @@ do_publish(
) ->
case emqx_session:publish(ClientInfo, PacketId, Msg, Session) of
{ok, PubRes, NSession} ->
RC = puback_reason_code(PubRes),
RC = pubrec_reason_code(PubRes),
NChannel0 = set_session(NSession, Channel),
NChannel1 = ensure_timer(await_timer, NChannel0),
NChannel2 = ensure_quota(PubRes, NChannel1),
@ -740,6 +744,10 @@ do_publish(
handle_out(disconnect, RC, Channel)
end.
do_finish_publish(PacketId, PubRes, RC, Channel) ->
NChannel = ensure_quota(PubRes, Channel),
handle_out(puback, {PacketId, RC}, NChannel).
ensure_quota(_, Channel = #channel{quota = infinity}) ->
Channel;
ensure_quota(PubRes, Channel = #channel{quota = Limiter}) ->
@ -759,9 +767,14 @@ ensure_quota(PubRes, Channel = #channel{quota = Limiter}) ->
ensure_timer(quota_timer, Intv, Channel#channel{quota = NLimiter})
end.
-compile({inline, [puback_reason_code/1]}).
puback_reason_code([]) -> ?RC_NO_MATCHING_SUBSCRIBERS;
puback_reason_code([_ | _]) -> ?RC_SUCCESS.
-compile({inline, [pubrec_reason_code/1]}).
pubrec_reason_code([]) -> ?RC_NO_MATCHING_SUBSCRIBERS;
pubrec_reason_code([_ | _]) -> ?RC_SUCCESS.
puback_reason_code(PacketId, Msg, [] = PubRes) ->
emqx_hooks:run_fold('message.puback', [PacketId, Msg, PubRes], ?RC_NO_MATCHING_SUBSCRIBERS);
puback_reason_code(PacketId, Msg, [_ | _] = PubRes) ->
emqx_hooks:run_fold('message.puback', [PacketId, Msg, PubRes], ?RC_SUCCESS).
-compile({inline, [after_message_acked/3]}).
after_message_acked(ClientInfo, Msg, PubAckProps) ->
@ -1266,6 +1279,8 @@ handle_info(die_if_test = Info, Channel) ->
{ok, Channel};
handle_info({disconnect, ReasonCode, ReasonName, Props}, Channel) ->
handle_out(disconnect, {ReasonCode, ReasonName, Props}, Channel);
handle_info({puback, PacketId, PubRes, RC}, Channel) ->
do_finish_publish(PacketId, PubRes, RC, Channel);
handle_info(Info, Channel) ->
?SLOG(error, #{msg => "unexpected_info", info => Info}),
{ok, Channel}.

View File

@ -97,6 +97,7 @@
mark_channel_connected/1,
mark_channel_disconnected/1,
get_connected_client_count/0,
takeover_finish/2,
do_kick_session/3,
do_get_chan_stats/2,
@ -188,11 +189,13 @@ unregister_channel(ClientId) when is_binary(ClientId) ->
ok.
%% @private
do_unregister_channel(Chan) ->
do_unregister_channel({_ClientId, ChanPid} = Chan) ->
ok = emqx_cm_registry:unregister_channel(Chan),
true = ets:delete(?CHAN_CONN_TAB, Chan),
true = ets:delete(?CHAN_INFO_TAB, Chan),
ets:delete_object(?CHAN_TAB, Chan).
ets:delete_object(?CHAN_TAB, Chan),
ok = emqx_hooks:run('channel.unregistered', [ChanPid]),
true.
-spec connection_closed(emqx_types:clientid()) -> true.
connection_closed(ClientId) ->
@ -220,7 +223,7 @@ do_get_chan_info(ClientId, ChanPid) ->
-spec get_chan_info(emqx_types:clientid(), chan_pid()) ->
maybe(emqx_types:infos()).
get_chan_info(ClientId, ChanPid) ->
wrap_rpc(emqx_cm_proto_v1:get_chan_info(ClientId, ChanPid)).
wrap_rpc(emqx_cm_proto_v2:get_chan_info(ClientId, ChanPid)).
%% @doc Update infos of the channel.
-spec set_chan_info(emqx_types:clientid(), emqx_types:attrs()) -> boolean().
@ -250,7 +253,7 @@ do_get_chan_stats(ClientId, ChanPid) ->
-spec get_chan_stats(emqx_types:clientid(), chan_pid()) ->
maybe(emqx_types:stats()).
get_chan_stats(ClientId, ChanPid) ->
wrap_rpc(emqx_cm_proto_v1:get_chan_stats(ClientId, ChanPid)).
wrap_rpc(emqx_cm_proto_v2:get_chan_stats(ClientId, ChanPid)).
%% @doc Set channel's stats.
-spec set_chan_stats(emqx_types:clientid(), emqx_types:stats()) -> boolean().
@ -312,13 +315,7 @@ open_session(false, ClientInfo = #{clientid := ClientId}, ConnInfo) ->
}};
{living, ConnMod, ChanPid, Session} ->
ok = emqx_session:resume(ClientInfo, Session),
case
request_stepdown(
{takeover, 'end'},
ConnMod,
ChanPid
)
of
case wrap_rpc(emqx_cm_proto_v2:takeover_finish(ConnMod, ChanPid)) of
{ok, Pendings} ->
Session1 = emqx_persistent_session:persist(
ClientInfo, ConnInfo, Session
@ -408,6 +405,13 @@ takeover_session(ClientId) ->
takeover_session(ClientId, ChanPid)
end.
takeover_finish(ConnMod, ChanPid) ->
request_stepdown(
{takeover, 'end'},
ConnMod,
ChanPid
).
takeover_session(ClientId, Pid) ->
try
do_takeover_session(ClientId, Pid)
@ -437,7 +441,7 @@ do_takeover_session(ClientId, ChanPid) when node(ChanPid) == node() ->
end
end;
do_takeover_session(ClientId, ChanPid) ->
wrap_rpc(emqx_cm_proto_v1:takeover_session(ClientId, ChanPid)).
wrap_rpc(emqx_cm_proto_v2:takeover_session(ClientId, ChanPid)).
%% @doc Discard all the sessions identified by the ClientId.
-spec discard_session(emqx_types:clientid()) -> ok.
@ -539,7 +543,7 @@ do_kick_session(Action, ClientId, ChanPid) ->
%% @private This function is shared for session 'kick' and 'discard' (as the first arg Action).
kick_session(Action, ClientId, ChanPid) ->
try
wrap_rpc(emqx_cm_proto_v1:kick_session(Action, ClientId, ChanPid))
wrap_rpc(emqx_cm_proto_v2:kick_session(Action, ClientId, ChanPid))
catch
Error:Reason ->
%% This should mostly be RPC failures.
@ -759,7 +763,7 @@ do_get_chann_conn_mod(ClientId, ChanPid) ->
end.
get_chann_conn_mod(ClientId, ChanPid) ->
wrap_rpc(emqx_cm_proto_v1:get_chann_conn_mod(ClientId, ChanPid)).
wrap_rpc(emqx_cm_proto_v2:get_chann_conn_mod(ClientId, ChanPid)).
mark_channel_connected(ChanPid) ->
?tp(emqx_cm_connected_client_count_inc, #{chan_pid => ChanPid}),

View File

@ -0,0 +1,90 @@
%%--------------------------------------------------------------------
%% Copyright (c) 2017-2023 EMQ Technologies Co., Ltd. All Rights Reserved.
%%
%% Licensed under the Apache License, Version 2.0 (the "License");
%% you may not use this file except in compliance with the License.
%% You may obtain a copy of the License at
%%
%% http://www.apache.org/licenses/LICENSE-2.0
%%
%% Unless required by applicable law or agreed to in writing, software
%% distributed under the License is distributed on an "AS IS" BASIS,
%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
%% See the License for the specific language governing permissions and
%% limitations under the License.
%%--------------------------------------------------------------------
-module(emqx_maybe).
-include_lib("emqx/include/types.hrl").
-export([to_list/1]).
-export([from_list/1]).
-export([define/2]).
-export([apply/2]).
-type t(T) :: maybe(T).
-export_type([t/1]).
-spec to_list(maybe(A)) -> [A].
to_list(undefined) ->
[];
to_list(Term) ->
[Term].
-spec from_list([A]) -> maybe(A).
from_list([]) ->
undefined;
from_list([Term]) ->
Term.
-spec define(maybe(A), B) -> A | B.
define(undefined, Term) ->
Term;
define(Term, _) ->
Term.
%% @doc Apply a function to a maybe argument.
-spec apply(fun((A) -> maybe(A)), maybe(A)) ->
maybe(A).
apply(_Fun, undefined) ->
undefined;
apply(Fun, Term) when is_function(Fun) ->
erlang:apply(Fun, [Term]).
%%
-ifdef(TEST).
-include_lib("eunit/include/eunit.hrl").
to_list_test_() ->
[
?_assertEqual([], to_list(undefined)),
?_assertEqual([42], to_list(42))
].
from_list_test_() ->
[
?_assertEqual(undefined, from_list([])),
?_assertEqual(3.1415, from_list([3.1415])),
?_assertError(_, from_list([1, 2, 3]))
].
define_test_() ->
[
?_assertEqual(42, define(42, undefined)),
?_assertEqual(<<"default">>, define(undefined, <<"default">>)),
?_assertEqual(undefined, define(undefined, undefined))
].
apply_test_() ->
[
?_assertEqual(<<"42">>, ?MODULE:apply(fun erlang:integer_to_binary/1, 42)),
?_assertEqual(undefined, ?MODULE:apply(fun erlang:integer_to_binary/1, undefined)),
?_assertEqual(undefined, ?MODULE:apply(fun crash/1, undefined))
].
crash(_) ->
erlang:error(crashed).
-endif.

View File

@ -101,6 +101,8 @@
-export_type([oom_policy/0]).
-export_type([takeover_data/0]).
-type proto_ver() ::
?MQTT_PROTO_V3
| ?MQTT_PROTO_V4
@ -242,3 +244,5 @@
max_heap_size => non_neg_integer(),
enable => boolean()
}.
-type takeover_data() :: map().

View File

@ -0,0 +1,208 @@
%%--------------------------------------------------------------------
%% Copyright (c) 2020-2023 EMQ Technologies Co., Ltd. All Rights Reserved.
%%
%% Licensed under the Apache License, Version 2.0 (the "License");
%% you may not use this file except in compliance with the License.
%% You may obtain a copy of the License at
%%
%% http://www.apache.org/licenses/LICENSE-2.0
%%
%% Unless required by applicable law or agreed to in writing, software
%% distributed under the License is distributed on an "AS IS" BASIS,
%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
%% See the License for the specific language governing permissions and
%% limitations under the License.
%%--------------------------------------------------------------------
%% Weighted directed graph.
%%
%% Purely functional, built on top of a single `gb_tree`.
%% Weights are currently assumed to be non-negative numbers, hovewer
%% presumably anything that is 0 should work (but won't typecheck 🥲).
-module(emqx_wdgraph).
-export([new/0]).
-export([insert_edge/5]).
-export([find_edge/3]).
-export([get_edges/2]).
-export([fold/3]).
-export([find_shortest_path/3]).
-export_type([t/0]).
-export_type([t/2]).
-export_type([weight/0]).
-type gnode() :: term().
-type weight() :: _NonNegative :: number().
-type label() :: term().
-opaque t() :: t(gnode(), label()).
-opaque t(Node, Label) :: gb_trees:tree({Node}, [{Node, weight(), Label}]).
%%
-spec new() -> t(_, _).
new() ->
gb_trees:empty().
%% Add an edge.
%% Nodes are not expected to exist beforehand, and created lazily.
%% There could be only one edge between each pair of nodes, this function
%% replaces any existing edge in the graph.
-spec insert_edge(Node, Node, weight(), Label, t(Node, Label)) -> t(Node, Label).
insert_edge(From, To, Weight, EdgeLabel, G) ->
Edges = tree_lookup({From}, G, []),
EdgesNext = lists:keystore(To, 1, Edges, {To, Weight, EdgeLabel}),
tree_update({From}, EdgesNext, G).
%% Find exising edge between two nodes, if any.
-spec find_edge(Node, Node, t(Node, Label)) -> {weight(), Label} | false.
find_edge(From, To, G) ->
Edges = tree_lookup({From}, G, []),
case lists:keyfind(To, 1, Edges) of
{To, Weight, Label} ->
{Weight, Label};
false ->
false
end.
%% Get all edges from the given node.
-spec get_edges(Node, t(Node, Label)) -> [{Node, weight(), Label}].
get_edges(Node, G) ->
tree_lookup({Node}, G, []).
-spec fold(FoldFun, Acc, t(Node, Label)) -> Acc when
FoldFun :: fun((Node, _Edge :: {Node, weight(), Label}, Acc) -> Acc).
fold(FoldFun, Acc, G) ->
fold_iterator(FoldFun, Acc, gb_trees:iterator(G)).
fold_iterator(FoldFun, AccIn, It) ->
case gb_trees:next(It) of
{{Node}, Edges = [_ | _], ItNext} ->
AccNext = lists:foldl(
fun(Edge = {_To, _Weight, _Label}, Acc) ->
FoldFun(Node, Edge, Acc)
end,
AccIn,
Edges
),
fold_iterator(FoldFun, AccNext, ItNext);
none ->
AccIn
end.
% Find the shortest path between two nodes, if any. If the path exists, return list
% of edge labels along that path.
% This is a Dijkstra shortest path algorithm. It is one-way right now, for
% simplicity sake.
-spec find_shortest_path(Node, Node, t(Node, Label)) -> [Label] | {false, _StoppedAt :: Node}.
find_shortest_path(From, To, G1) ->
% NOTE
% If `From` and `To` are the same node, then path is `[]` even if this
% node does not exist in the graph.
G2 = set_cost(From, 0, [], G1),
case find_shortest_path(From, 0, To, G2) of
{true, G3} ->
construct_path(From, To, [], G3);
{false, Last} ->
{false, Last}
end.
find_shortest_path(Node, Cost, Target, G1) ->
Edges = get_edges(Node, G1),
G2 = update_neighbours(Node, Cost, Edges, G1),
case take_queued(G2) of
{Target, _NextCost, G3} ->
{true, G3};
{Next, NextCost, G3} ->
find_shortest_path(Next, NextCost, Target, G3);
none ->
{false, Node}
end.
construct_path(From, From, Acc, _) ->
Acc;
construct_path(From, To, Acc, G) ->
{Prev, Label} = get_label(To, G),
construct_path(From, Prev, [Label | Acc], G).
update_neighbours(Node, NodeCost, Edges, G1) ->
lists:foldl(
fun(Edge, GAcc) -> update_neighbour(Node, NodeCost, Edge, GAcc) end,
G1,
Edges
).
update_neighbour(Node, NodeCost, {Neighbour, Weight, Label}, G) ->
case is_visited(G, Neighbour) of
false ->
CurrentCost = get_cost(Neighbour, G),
case NodeCost + Weight of
NeighCost when NeighCost < CurrentCost ->
set_cost(Neighbour, NeighCost, {Node, Label}, G);
_ ->
G
end;
true ->
G
end.
get_cost(Node, G) ->
case tree_lookup({Node, cost}, G, inf) of
{Cost, _Label} ->
Cost;
inf ->
inf
end.
get_label(Node, G) ->
{_Cost, Label} = gb_trees:get({Node, cost}, G),
Label.
set_cost(Node, Cost, Label, G1) ->
G3 =
case tree_lookup({Node, cost}, G1, inf) of
{CostWas, _Label} ->
{true, G2} = gb_trees:take({queued, CostWas, Node}, G1),
gb_trees:insert({queued, Cost, Node}, true, G2);
inf ->
gb_trees:insert({queued, Cost, Node}, true, G1)
end,
G4 = tree_update({Node, cost}, {Cost, Label}, G3),
G4.
take_queued(G1) ->
It = gb_trees:iterator_from({queued, 0, 0}, G1),
case gb_trees:next(It) of
{{queued, Cost, Node} = Index, true, _It} ->
{Node, Cost, gb_trees:delete(Index, G1)};
_ ->
none
end.
is_visited(G, Node) ->
case tree_lookup({Node, cost}, G, inf) of
inf ->
false;
{Cost, _Label} ->
not tree_lookup({queued, Cost, Node}, G, false)
end.
tree_lookup(Index, Tree, Default) ->
case gb_trees:lookup(Index, Tree) of
{value, V} ->
V;
none ->
Default
end.
tree_update(Index, Value, Tree) ->
case gb_trees:is_defined(Index, Tree) of
true ->
gb_trees:update(Index, Value, Tree);
false ->
gb_trees:insert(Index, Value, Tree)
end.

View File

@ -0,0 +1,88 @@
%%--------------------------------------------------------------------
%% Copyright (c) 2022 EMQ Technologies Co., Ltd. All Rights Reserved.
%%
%% Licensed under the Apache License, Version 2.0 (the "License");
%% you may not use this file except in compliance with the License.
%% You may obtain a copy of the License at
%%
%% http://www.apache.org/licenses/LICENSE-2.0
%%
%% Unless required by applicable law or agreed to in writing, software
%% distributed under the License is distributed on an "AS IS" BASIS,
%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
%% See the License for the specific language governing permissions and
%% limitations under the License.
%%--------------------------------------------------------------------
-module(emqx_cm_proto_v2).
-behaviour(emqx_bpapi).
-export([
introduced_in/0,
lookup_client/2,
kickout_client/2,
get_chan_stats/2,
get_chan_info/2,
get_chann_conn_mod/2,
takeover_session/2,
takeover_finish/2,
kick_session/3
]).
-include("bpapi.hrl").
-include("src/emqx_cm.hrl").
introduced_in() ->
"5.0.0".
-spec kickout_client(node(), emqx_types:clientid()) -> ok | {badrpc, _}.
kickout_client(Node, ClientId) ->
rpc:call(Node, emqx_cm, kick_session, [ClientId]).
-spec lookup_client(node(), {clientid, emqx_types:clientid()} | {username, emqx_types:username()}) ->
[emqx_cm:channel_info()] | {badrpc, _}.
lookup_client(Node, Key) ->
rpc:call(Node, emqx_cm, lookup_client, [Key]).
-spec get_chan_stats(emqx_types:clientid(), emqx_cm:chan_pid()) -> emqx_types:stats() | {badrpc, _}.
get_chan_stats(ClientId, ChanPid) ->
rpc:call(node(ChanPid), emqx_cm, do_get_chan_stats, [ClientId, ChanPid], ?T_GET_INFO * 2).
-spec get_chan_info(emqx_types:clientid(), emqx_cm:chan_pid()) -> emqx_types:infos() | {badrpc, _}.
get_chan_info(ClientId, ChanPid) ->
rpc:call(node(ChanPid), emqx_cm, do_get_chan_info, [ClientId, ChanPid], ?T_GET_INFO * 2).
-spec get_chann_conn_mod(emqx_types:clientid(), emqx_cm:chan_pid()) ->
module() | undefined | {badrpc, _}.
get_chann_conn_mod(ClientId, ChanPid) ->
rpc:call(node(ChanPid), emqx_cm, do_get_chann_conn_mod, [ClientId, ChanPid], ?T_GET_INFO * 2).
-spec takeover_session(emqx_types:clientid(), emqx_cm:chan_pid()) ->
none
| {expired | persistent, emqx_session:session()}
| {living, _ConnMod :: atom(), emqx_cm:chan_pid(), emqx_session:session()}
| {badrpc, _}.
takeover_session(ClientId, ChanPid) ->
rpc:call(node(ChanPid), emqx_cm, takeover_session, [ClientId, ChanPid], ?T_TAKEOVER * 2).
-spec takeover_finish(module(), emqx_cm:chan_pid()) ->
{ok, emqx_type:takeover_data()}
| {ok, list(emqx_type:deliver()), emqx_type:takeover_data()}
| {error, term()}
| {badrpc, _}.
takeover_finish(ConnMod, ChanPid) ->
erpc:call(
node(ChanPid),
emqx_cm,
takeover_finish,
[ConnMod, ChanPid],
?T_TAKEOVER * 2
).
-spec kick_session(kick | discard, emqx_types:clientid(), emqx_cm:chan_pid()) -> ok | {badrpc, _}.
kick_session(Action, ClientId, ChanPid) ->
rpc:call(node(ChanPid), emqx_cm, do_kick_session, [Action, ClientId, ChanPid], ?T_KICK * 2).

View File

@ -1133,7 +1133,7 @@ t_ws_cookie_init(_) ->
?assertMatch(#{ws_cookie := WsCookie}, emqx_channel:info(clientinfo, Channel)).
%%--------------------------------------------------------------------
%% Test cases for other mechnisms
%% Test cases for other mechanisms
%%--------------------------------------------------------------------
t_flapping_detect(_) ->

View File

@ -0,0 +1,70 @@
%%--------------------------------------------------------------------
%% Copyright (c) 2018-2022 EMQ Technologies Co., Ltd. All Rights Reserved.
%%
%% Licensed under the Apache License, Version 2.0 (the "License");
%% you may not use this file except in compliance with the License.
%% You may obtain a copy of the License at
%%
%% http://www.apache.org/licenses/LICENSE-2.0
%%
%% Unless required by applicable law or agreed to in writing, software
%% distributed under the License is distributed on an "AS IS" BASIS,
%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
%% See the License for the specific language governing permissions and
%% limitations under the License.
%%--------------------------------------------------------------------
-module(emqx_channel_delayed_puback_SUITE).
-compile(export_all).
-compile(nowarn_export_all).
-include_lib("eunit/include/eunit.hrl").
-include_lib("common_test/include/ct.hrl").
-include_lib("snabbkaffe/include/snabbkaffe.hrl").
-include_lib("emqx/include/emqx.hrl").
-include_lib("emqx/include/emqx_mqtt.hrl").
-include_lib("emqx/include/emqx_hooks.hrl").
all() ->
emqx_common_test_helpers:all(?MODULE).
init_per_suite(Config) ->
emqx_common_test_helpers:boot_modules(all),
emqx_common_test_helpers:start_apps([]),
Config.
end_per_suite(_Config) ->
emqx_common_test_helpers:stop_apps([]).
init_per_testcase(Case, Config) ->
?MODULE:Case({init, Config}).
end_per_testcase(Case, Config) ->
?MODULE:Case({'end', Config}).
%%--------------------------------------------------------------------
%% Test cases
%%--------------------------------------------------------------------
t_delayed_puback({init, Config}) ->
emqx_hooks:put('message.puback', {?MODULE, on_message_puback, []}, ?HP_LOWEST),
Config;
t_delayed_puback({'end', _Config}) ->
emqx_hooks:del('message.puback', {?MODULE, on_message_puback});
t_delayed_puback(_Config) ->
{ok, ConnPid} = emqtt:start_link([{clientid, <<"clientid">>}, {proto_ver, v5}]),
{ok, _} = emqtt:connect(ConnPid),
{ok, #{reason_code := ?RC_UNSPECIFIED_ERROR}} = emqtt:publish(
ConnPid, <<"topic">>, <<"hello">>, 1
),
emqtt:disconnect(ConnPid).
%%--------------------------------------------------------------------
%% Helpers
%%--------------------------------------------------------------------
on_message_puback(PacketId, _Msg, PubRes, _RC) ->
erlang:send(self(), {puback, PacketId, PubRes, ?RC_UNSPECIFIED_ERROR}),
{stop, undefined}.

View File

@ -30,6 +30,7 @@
start_apps/1,
start_apps/2,
start_apps/3,
start_app/2,
stop_apps/1,
stop_apps/2,
reload/2,
@ -244,6 +245,9 @@ do_render_app_config(App, Schema, ConfigFile, Opts) ->
copy_certs(App, RenderedConfigFile),
ok.
start_app(App, SpecAppConfig) ->
start_app(App, SpecAppConfig, #{}).
start_app(App, SpecAppConfig, Opts) ->
render_and_load_app_config(App, Opts),
SpecAppConfig(App),
@ -302,12 +306,7 @@ read_schema_configs(no_schema, _ConfigFile) ->
ok;
read_schema_configs(Schema, ConfigFile) ->
NewConfig = generate_config(Schema, ConfigFile),
lists:foreach(
fun({App, Configs}) ->
[application:set_env(App, Par, Value) || {Par, Value} <- Configs]
end,
NewConfig
).
application:set_env(NewConfig).
generate_config(SchemaModule, ConfigFile) when is_atom(SchemaModule) ->
{ok, Conf0} = hocon:load(ConfigFile, #{format => richmap}),

View File

@ -43,12 +43,21 @@
ip/0,
port/0,
limited_atom/0,
limited_latin_atom/0
limited_latin_atom/0,
printable_utf8/0,
printable_codepoint/0
]).
%% Generic Types
-export([
scaled/2
]).
%% Iterators
-export([nof/1]).
-type proptype() :: proper_types:raw_type().
%%--------------------------------------------------------------------
%% Types High level
%%--------------------------------------------------------------------
@ -606,6 +615,20 @@ limited_atom() ->
limited_any_term() ->
oneof([binary(), number(), string()]).
printable_utf8() ->
?SUCHTHAT(
String,
?LET(L, list(printable_codepoint()), unicode:characters_to_binary(L)),
is_binary(String)
).
printable_codepoint() ->
frequency([
{7, range(16#20, 16#7E)},
{2, range(16#00A0, 16#D7FF)},
{1, range(16#E000, 16#FFFD)}
]).
%%--------------------------------------------------------------------
%% Iterators
%%--------------------------------------------------------------------
@ -632,6 +655,14 @@ limited_list(N, T) ->
end
).
%%--------------------------------------------------------------------
%% Generic Types
%%--------------------------------------------------------------------
-spec scaled(number(), proptype()) -> proptype().
scaled(F, T) when F > 0 ->
?SIZED(S, resize(round(S * F), T)).
%%--------------------------------------------------------------------
%% Internal funcs
%%--------------------------------------------------------------------

View File

@ -0,0 +1,104 @@
%%--------------------------------------------------------------------
%% Copyright (c) 2020-2023 EMQ Technologies Co., Ltd. All Rights Reserved.
%%
%% Licensed under the Apache License, Version 2.0 (the "License");
%% you may not use this file except in compliance with the License.
%% You may obtain a copy of the License at
%%
%% http://www.apache.org/licenses/LICENSE-2.0
%%
%% Unless required by applicable law or agreed to in writing, software
%% distributed under the License is distributed on an "AS IS" BASIS,
%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
%% See the License for the specific language governing permissions and
%% limitations under the License.
%%--------------------------------------------------------------------
-module(emqx_wdgraph_tests).
-include_lib("eunit/include/eunit.hrl").
empty_test_() ->
G = emqx_wdgraph:new(),
[
?_assertEqual([], emqx_wdgraph:get_edges(foo, G)),
?_assertEqual(false, emqx_wdgraph:find_edge(foo, bar, G))
].
edges_nodes_test_() ->
G1 = emqx_wdgraph:new(),
G2 = emqx_wdgraph:insert_edge(foo, bar, 42, "fancy", G1),
G3 = emqx_wdgraph:insert_edge(bar, baz, 1, "cheapest", G2),
G4 = emqx_wdgraph:insert_edge(bar, foo, 0, "free", G3),
G5 = emqx_wdgraph:insert_edge(foo, bar, 100, "luxury", G4),
[
?_assertEqual({42, "fancy"}, emqx_wdgraph:find_edge(foo, bar, G2)),
?_assertEqual({100, "luxury"}, emqx_wdgraph:find_edge(foo, bar, G5)),
?_assertEqual([{bar, 100, "luxury"}], emqx_wdgraph:get_edges(foo, G5)),
?_assertEqual({1, "cheapest"}, emqx_wdgraph:find_edge(bar, baz, G5)),
?_assertEqual([{baz, 1, "cheapest"}, {foo, 0, "free"}], emqx_wdgraph:get_edges(bar, G5))
].
fold_test_() ->
G1 = emqx_wdgraph:new(),
G2 = emqx_wdgraph:insert_edge(foo, bar, 42, "fancy", G1),
G3 = emqx_wdgraph:insert_edge(bar, baz, 1, "cheapest", G2),
G4 = emqx_wdgraph:insert_edge(bar, foo, 0, "free", G3),
G5 = emqx_wdgraph:insert_edge(foo, bar, 100, "luxury", G4),
[
?_assertEqual(
% 100 + 0 + 1
101,
emqx_wdgraph:fold(fun(_From, {_, Weight, _}, Acc) -> Weight + Acc end, 0, G5)
),
?_assertEqual(
[bar, baz, foo],
lists:usort(
emqx_wdgraph:fold(fun(From, {To, _, _}, Acc) -> [From, To | Acc] end, [], G5)
)
)
].
nonexistent_nodes_path_test_() ->
G1 = emqx_wdgraph:new(),
G2 = emqx_wdgraph:insert_edge(foo, bar, 42, "fancy", G1),
G3 = emqx_wdgraph:insert_edge(bar, baz, 1, "cheapest", G2),
[
?_assertEqual(
{false, nosuchnode},
emqx_wdgraph:find_shortest_path(nosuchnode, baz, G3)
),
?_assertEqual(
[],
emqx_wdgraph:find_shortest_path(nosuchnode, nosuchnode, G3)
)
].
nonexistent_path_test_() ->
G1 = emqx_wdgraph:new(),
G2 = emqx_wdgraph:insert_edge(foo, bar, 42, "fancy", G1),
G3 = emqx_wdgraph:insert_edge(baz, boo, 1, "cheapest", G2),
G4 = emqx_wdgraph:insert_edge(boo, last, 3.5, "change", G3),
[
?_assertEqual(
{false, last},
emqx_wdgraph:find_shortest_path(baz, foo, G4)
),
?_assertEqual(
{false, bar},
emqx_wdgraph:find_shortest_path(foo, last, G4)
)
].
shortest_path_test() ->
G1 = emqx_wdgraph:new(),
G2 = emqx_wdgraph:insert_edge(foo, bar, 42, "fancy", G1),
G3 = emqx_wdgraph:insert_edge(bar, baz, 1, "cheapest", G2),
G4 = emqx_wdgraph:insert_edge(baz, last, 0, "free", G3),
G5 = emqx_wdgraph:insert_edge(bar, last, 100, "luxury", G4),
G6 = emqx_wdgraph:insert_edge(bar, foo, 0, "comeback", G5),
?assertEqual(
["fancy", "cheapest", "free"],
emqx_wdgraph:find_shortest_path(foo, last, G6)
).

View File

@ -1,6 +1,6 @@
%% -*- mode: erlang; -*-
{erl_opts, [debug_info]}.
{deps, [ {erlcloud, {git, "https://github.com/emqx/erlcloud.git", {tag, "3.5.16-emqx-1"}}}
{deps, [ {erlcloud, {git, "https://github.com/emqx/erlcloud", {tag, "3.6.8-emqx-1"}}}
, {emqx_connector, {path, "../../apps/emqx_connector"}}
, {emqx_resource, {path, "../../apps/emqx_resource"}}
, {emqx_bridge, {path, "../../apps/emqx_bridge"}}

View File

@ -32,8 +32,6 @@
-include_lib("emqx/include/http_api.hrl").
-include_lib("emqx/include/emqx_release.hrl").
-define(BASE_PATH, "/api/v5").
-define(EMQX_MIDDLE, emqx_dashboard_middleware).
%%--------------------------------------------------------------------
@ -52,7 +50,7 @@ start_listeners(Listeners) ->
GlobalSpec = #{
openapi => "3.0.0",
info => #{title => "EMQX API", version => ?EMQX_API_VERSION},
servers => [#{url => ?BASE_PATH}],
servers => [#{url => emqx_dashboard_swagger:base_path()}],
components => #{
schemas => #{},
'securitySchemes' => #{
@ -69,11 +67,11 @@ start_listeners(Listeners) ->
{"/", cowboy_static, {priv_file, emqx_dashboard, "www/index.html"}},
{"/static/[...]", cowboy_static, {priv_dir, emqx_dashboard, "www/static"}},
{emqx_mgmt_api_status:path(), emqx_mgmt_api_status, []},
{?BASE_PATH ++ "/[...]", emqx_dashboard_bad_api, []},
{emqx_dashboard_swagger:relative_uri("/[...]"), emqx_dashboard_bad_api, []},
{'_', cowboy_static, {priv_file, emqx_dashboard, "www/index.html"}}
],
BaseMinirest = #{
base_path => ?BASE_PATH,
base_path => emqx_dashboard_swagger:base_path(),
modules => minirest_api:find_api_modules(apps()),
authorization => Authorization,
security => [#{'basicAuth' => []}, #{'bearerAuth' => []}],

View File

@ -19,12 +19,17 @@
-include_lib("typerefl/include/types.hrl").
-include_lib("hocon/include/hoconsc.hrl").
-define(BASE_PATH, "/api/v5").
%% API
-export([spec/1, spec/2]).
-export([namespace/0, namespace/1, fields/1]).
-export([schema_with_example/2, schema_with_examples/2]).
-export([error_codes/1, error_codes/2]).
-export([file_schema/1]).
-export([base_path/0]).
-export([relative_uri/1]).
-export([compose_filters/2]).
-export([
filter_check_request/2,
@ -84,14 +89,30 @@
-type request() :: #{bindings => map(), query_string => map(), body => map()}.
-type request_meta() :: #{module => module(), path => string(), method => atom()}.
-type filter_result() :: {ok, request()} | {400, 'BAD_REQUEST', binary()}.
-type filter() :: fun((request(), request_meta()) -> filter_result()).
%% More exact types are defined in minirest.hrl, but we don't want to include it
%% because it defines a lot of types and they may clash with the types declared locally.
-type status_code() :: pos_integer().
-type error_code() :: atom() | binary().
-type error_message() :: binary().
-type response_body() :: term().
-type headers() :: map().
-type response() ::
status_code()
| {status_code()}
| {status_code(), response_body()}
| {status_code(), headers(), response_body()}
| {status_code(), error_code(), error_message()}.
-type filter_result() :: {ok, request()} | response().
-type filter() :: emqx_maybe:t(fun((request(), request_meta()) -> filter_result())).
-type spec_opts() :: #{
check_schema => boolean() | filter(),
translate_body => boolean(),
schema_converter => fun((hocon_schema:schema(), Module :: atom()) -> map()),
i18n_lang => atom() | string() | binary()
i18n_lang => atom() | string() | binary(),
filter => filter()
}.
-type route_path() :: string() | binary().
@ -117,9 +138,9 @@ spec(Module, Options) ->
lists:foldl(
fun(Path, {AllAcc, AllRefsAcc}) ->
{OperationId, Specs, Refs} = parse_spec_ref(Module, Path, Options),
CheckSchema = support_check_schema(Options),
Opts = #{filter => filter(Options)},
{
[{filename:join("/", Path), Specs, OperationId, CheckSchema} | AllAcc],
[{filename:join("/", Path), Specs, OperationId, Opts} | AllAcc],
Refs ++ AllRefsAcc
}
end,
@ -184,6 +205,14 @@ error_codes(Codes = [_ | _], MsgDesc) ->
})}
].
-spec base_path() -> uri_string:uri_string().
base_path() ->
?BASE_PATH.
-spec relative_uri(uri_string:uri_string()) -> uri_string:uri_string().
relative_uri(Uri) ->
base_path() ++ Uri.
file_schema(FileName) ->
#{
content => #{
@ -242,6 +271,21 @@ gen_api_schema_json_iodata(SchemaMod, SchemaInfo, Converter) ->
[pretty, force_utf8]
).
-spec compose_filters(filter(), filter()) -> filter().
compose_filters(undefined, Filter2) ->
Filter2;
compose_filters(Filter1, undefined) ->
Filter1;
compose_filters(Filter1, Filter2) ->
fun(Request, RequestMeta) ->
case Filter1(Request, RequestMeta) of
{ok, Request1} ->
Filter2(Request1, RequestMeta);
Response ->
Response
end
end.
%%------------------------------------------------------------------------------
%% Private functions
%%------------------------------------------------------------------------------
@ -273,14 +317,22 @@ check_only(Schema, Map, Opts) ->
_ = hocon_tconf:check_plain(Schema, Map, Opts),
Map.
support_check_schema(#{check_schema := true, translate_body := true}) ->
#{filter => fun ?MODULE:filter_check_request_and_translate_body/2};
support_check_schema(#{check_schema := true}) ->
#{filter => fun ?MODULE:filter_check_request/2};
support_check_schema(#{check_schema := Filter}) when is_function(Filter, 2) ->
#{filter => Filter};
support_check_schema(_) ->
#{filter => undefined}.
filter(Options) ->
CheckSchemaFilter = check_schema_filter(Options),
CustomFilter = custom_filter(Options),
compose_filters(CheckSchemaFilter, CustomFilter).
custom_filter(Options) ->
maps:get(filter, Options, undefined).
check_schema_filter(#{check_schema := true, translate_body := true}) ->
fun ?MODULE:filter_check_request_and_translate_body/2;
check_schema_filter(#{check_schema := true}) ->
fun ?MODULE:filter_check_request/2;
check_schema_filter(#{check_schema := Filter}) when is_function(Filter, 2) ->
Filter;
check_schema_filter(_) ->
undefined.
parse_spec_ref(Module, Path, Options) ->
Schema =

View File

@ -26,11 +26,12 @@
request/4,
multipart_formdata_request/3,
multipart_formdata_request/4,
host/0,
uri/0,
uri/1
]).
-define(HOST, "http://127.0.0.1:18083/").
-define(HOST, "http://127.0.0.1:18083").
-define(API_VERSION, "v5").
-define(BASE_PATH, "api").
@ -98,10 +99,13 @@ request(Username, Method, Url, Body) ->
{error, Reason}
end.
host() ->
?HOST.
uri() -> uri([]).
uri(Parts) when is_list(Parts) ->
NParts = [E || E <- Parts],
?HOST ++ to_list(filename:join([?BASE_PATH, ?API_VERSION | NParts])).
host() ++ "/" ++ to_list(filename:join([?BASE_PATH, ?API_VERSION | NParts])).
auth_header(Username) ->
Password = <<"public">>,

94
apps/emqx_ft/BSL.txt Normal file
View File

@ -0,0 +1,94 @@
Business Source License 1.1
Licensor: Hangzhou EMQ Technologies Co., Ltd.
Licensed Work: EMQX Enterprise Edition
The Licensed Work is (c) 2023
Hangzhou EMQ Technologies Co., Ltd.
Additional Use Grant: Students and educators are granted right to copy,
modify, and create derivative work for research
or education.
Change Date: 2027-02-01
Change License: Apache License, Version 2.0
For information about alternative licensing arrangements for the Software,
please contact Licensor: https://www.emqx.com/en/contact
Notice
The Business Source License (this document, or the “License”) is not an Open
Source license. However, the Licensed Work will eventually be made available
under an Open Source License, as stated in this License.
License text copyright (c) 2017 MariaDB Corporation Ab, All Rights Reserved.
“Business Source License” is a trademark of MariaDB Corporation Ab.
-----------------------------------------------------------------------------
Business Source License 1.1
Terms
The Licensor hereby grants you the right to copy, modify, create derivative
works, redistribute, and make non-production use of the Licensed Work. The
Licensor may make an Additional Use Grant, above, permitting limited
production use.
Effective on the Change Date, or the fourth anniversary of the first publicly
available distribution of a specific version of the Licensed Work under this
License, whichever comes first, the Licensor hereby grants you rights under
the terms of the Change License, and the rights granted in the paragraph
above terminate.
If your use of the Licensed Work does not comply with the requirements
currently in effect as described in this License, you must purchase a
commercial license from the Licensor, its affiliated entities, or authorized
resellers, or you must refrain from using the Licensed Work.
All copies of the original and modified Licensed Work, and derivative works
of the Licensed Work, are subject to this License. This License applies
separately for each version of the Licensed Work and the Change Date may vary
for each version of the Licensed Work released by Licensor.
You must conspicuously display this License on each original or modified copy
of the Licensed Work. If you receive the Licensed Work in original or
modified form from a third party, the terms and conditions set forth in this
License apply to your use of that work.
Any use of the Licensed Work in violation of this License will automatically
terminate your rights under this License for the current and all other
versions of the Licensed Work.
This License does not grant you any right in any trademark or logo of
Licensor or its affiliates (provided that you may use a trademark or logo of
Licensor as expressly required by this License).
TO THE EXTENT PERMITTED BY APPLICABLE LAW, THE LICENSED WORK IS PROVIDED ON
AN “AS IS” BASIS. LICENSOR HEREBY DISCLAIMS ALL WARRANTIES AND CONDITIONS,
EXPRESS OR IMPLIED, INCLUDING (WITHOUT LIMITATION) WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NON-INFRINGEMENT, AND
TITLE.
MariaDB hereby grants you permission to use this Licenses text to license
your works, and to refer to it using the trademark “Business Source License”,
as long as you comply with the Covenants of Licensor below.
Covenants of Licensor
In consideration of the right to use this Licenses text and the “Business
Source License” name and trademark, Licensor covenants to MariaDB, and to all
other recipients of the licensed work to be provided by Licensor:
1. To specify as the Change License the GPL Version 2.0 or any later version,
or a license that is compatible with GPL Version 2.0 or a later version,
where “compatible” means that software provided under the Change License can
be included in a program with software provided under GPL Version 2.0 or a
later version. Licensor may specify additional Change Licenses without
limitation.
2. To either: (a) specify an additional grant of rights to use that does not
impose any additional restriction on the right granted in this License, as
the Additional Use Grant; or (b) insert the text “None”.
3. To specify a Change Date.
4. Not to modify this License in any other way.

86
apps/emqx_ft/README.md Normal file
View File

@ -0,0 +1,86 @@
# EMQX File Transfer
EMQX File Transfer application enables the _File Transfer over MQTT_ feature described in [EIP-0021](https://github.com/emqx/eip), and provides support to publish transferred files either to the node-local file system or to the S3 API compatible remote object storage.
## Usage
As almost any other EMQX application, `emqx_ft` is configured via the EMQX configuration system. The following snippet is the minimal configuration that will enable File Transfer over MQTT.
```
file_transfer {
enabled = true
}
```
The configuration above will make File Transfer available to all MQTT clients, and will use the default storage backend, which in turn uses node-local file system both for temporary storage and for the final destination of the transferred files.
## Configuration
Every configuration parameter is described in the `emqx_ft_schema` module.
The most important configuration parameter is `storage`, which defines the storage backend to use. Currently, only `local` storage backend is available, which stores all the temporary data accumulating during file transfers in the node-local file system. Those go into `${EMQX_DATA_DIR}/file_transfer` directory by default, but can be configured via `local.storage.segments.root` parameter. The final destination of the transferred files on the other hand is defined by `local.storage.exporter` parameter, and currently can be either `local` or `s3`.
### Local Exporter
The `local` exporter is the default one, and it stores the transferred files in the node-local file system. The final destination directory is defined by `local.storage.exporter.local.root` parameter, and defaults to `${EMQX_DATA_DIR}/file_transfer/exports` directory.
```
file_transfer {
enabled = true
storage {
local {
exporter {
local { root = "/var/lib/emqx/transfers" }
}
}
}
}
```
Important to note that even though the transferred files go into the node-local file system, the File Transfer API provides a cluster-wide view of the transferred files, and any file can be downloaded from any node in the cluster.
### S3 Exporter
The `s3` exporter stores the transferred files in the S3 API compatible remote object storage. The destination bucket is defined by `local.storage.exporter.s3.bucket` parameter.
This snippet configures File Transfer to store the transferred files in the `my-bucket` bucket in the `us-east-1` region of the AWS S3 service.
```
file_transfer {
enabled = true
storage {
local {
exporter {
s3 {
host = "s3.us-east-1.amazonaws.com"
port = "443"
access_key_id = "AKIA27EZDDM9XLINWXFE"
secret_access_key = "..."
bucket = "my-bucket"
}
}
}
}
}
```
## API
### MQTT
When enabled, File Transfer application reserves MQTT topics starting with `$file/` prefix for the purpose of serving the File Transfer protocol, as described in [EIP-0021](https://github.com/emqx/eip).
### REST
Application publishes a basic set of APIs, to:
* List all the transferred files available for download.
* Configure the application, including the storage backend.
* (When using `local` storage exporter) Download the transferred files.
Switching to the `s3` storage exporter is possible at any time, but the files transferred before the switch will not be
available for download anymore. Though, the files will still be available in the node-local file system.
## Contributing
Please see our [contributing.md](../../CONTRIBUTING.md).

1
apps/emqx_ft/docker-ct Normal file
View File

@ -0,0 +1 @@
minio

View File

View File

@ -0,0 +1,29 @@
%%--------------------------------------------------------------------
%% Copyright (c) 2023 EMQ Technologies Co., Ltd. All Rights Reserved.
%%
%% Licensed under the Apache License, Version 2.0 (the "License");
%% you may not use this file except in compliance with the License.
%% You may obtain a copy of the License at
%%
%% http://www.apache.org/licenses/LICENSE-2.0
%%
%% Unless required by applicable law or agreed to in writing, software
%% distributed under the License is distributed on an "AS IS" BASIS,
%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
%% See the License for the specific language governing permissions and
%% limitations under the License.
%%--------------------------------------------------------------------
-ifndef(EMQX_FT_STORAGE_FS_HRL).
-define(EMQX_FT_STORAGE_FS_HRL, true).
-record(gcstats, {
started_at :: integer(),
finished_at :: integer() | undefined,
files = 0 :: non_neg_integer(),
directories = 0 :: non_neg_integer(),
space = 0 :: non_neg_integer(),
errors = #{} :: #{_GCSubject => {error, _}}
}).
-endif.

11
apps/emqx_ft/rebar.config Normal file
View File

@ -0,0 +1,11 @@
%% -*- mode: erlang -*-
{erl_opts, [debug_info]}.
{deps, [{emqx, {path, "../emqx"}}]}.
{shell, [
% {config, "config/sys.config"},
{apps, [emqx_ft]}
]}.
{project_plugins, [erlfmt]}.

View File

@ -0,0 +1,14 @@
{application, emqx_ft, [
{description, "EMQX file transfer over MQTT"},
{vsn, "0.1.0"},
{registered, []},
{mod, {emqx_ft_app, []}},
{applications, [
kernel,
stdlib,
gproc,
emqx_s3
]},
{env, []},
{modules, []}
]}.

View File

@ -0,0 +1,425 @@
%%--------------------------------------------------------------------
%% Copyright (c) 2023 EMQ Technologies Co., Ltd. All Rights Reserved.
%%
%% Licensed under the Apache License, Version 2.0 (the "License");
%% you may not use this file except in compliance with the License.
%% You may obtain a copy of the License at
%%
%% http://www.apache.org/licenses/LICENSE-2.0
%%
%% Unless required by applicable law or agreed to in writing, software
%% distributed under the License is distributed on an "AS IS" BASIS,
%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
%% See the License for the specific language governing permissions and
%% limitations under the License.
%%--------------------------------------------------------------------
-module(emqx_ft).
-include_lib("emqx/include/emqx.hrl").
-include_lib("emqx/include/emqx_mqtt.hrl").
-include_lib("emqx/include/emqx_hooks.hrl").
-include_lib("snabbkaffe/include/trace.hrl").
-export([
hook/0,
unhook/0
]).
-export([
on_message_publish/1,
on_message_puback/4
]).
-export([
decode_filemeta/1,
encode_filemeta/1
]).
-export([on_complete/4]).
-export_type([
clientid/0,
transfer/0,
bytes/0,
offset/0,
filemeta/0,
segment/0,
checksum/0
]).
%% Number of bytes
-type bytes() :: non_neg_integer().
%% MQTT Client ID
-type clientid() :: binary().
-type fileid() :: binary().
-type transfer() :: {clientid(), fileid()}.
-type offset() :: bytes().
-type checksum() :: {_Algo :: atom(), _Digest :: binary()}.
-type filemeta() :: #{
%% Display name
name := string(),
%% Size in bytes, as advertised by the client.
%% Client is free to specify here whatever it wants, which means we can end
%% up with a file of different size after assembly. It's not clear from
%% specification what that means (e.g. what are clients' expectations), we
%% currently do not condider that an error (or, specifically, a signal that
%% the resulting file is corrupted during transmission).
size => _Bytes :: non_neg_integer(),
checksum => checksum(),
expire_at := emqx_datetime:epoch_second(),
%% TTL of individual segments
%% Somewhat confusing that we won't know it on the nodes where the filemeta
%% is missing.
segments_ttl => _Seconds :: pos_integer(),
user_data => emqx_ft_schema:json_value()
}.
-type segment() :: {offset(), _Content :: binary()}.
%%--------------------------------------------------------------------
%% API for app
%%--------------------------------------------------------------------
hook() ->
ok = emqx_hooks:put('message.publish', {?MODULE, on_message_publish, []}, ?HP_LOWEST),
ok = emqx_hooks:put('message.puback', {?MODULE, on_message_puback, []}, ?HP_LOWEST).
unhook() ->
ok = emqx_hooks:del('message.publish', {?MODULE, on_message_publish}),
ok = emqx_hooks:del('message.puback', {?MODULE, on_message_puback}).
%%--------------------------------------------------------------------
%% API
%%--------------------------------------------------------------------
decode_filemeta(Payload) when is_binary(Payload) ->
case emqx_utils_json:safe_decode(Payload, [return_maps]) of
{ok, Map} ->
decode_filemeta(Map);
{error, Error} ->
{error, {invalid_filemeta_json, Error}}
end;
decode_filemeta(Map) when is_map(Map) ->
Schema = emqx_ft_schema:schema(filemeta),
try
Meta = hocon_tconf:check_plain(Schema, Map, #{atom_key => true, required => false}),
{ok, Meta}
catch
throw:{_Schema, Errors} ->
{error, {invalid_filemeta, Errors}}
end.
encode_filemeta(Meta = #{}) ->
Schema = emqx_ft_schema:schema(filemeta),
hocon_tconf:make_serializable(Schema, emqx_utils_maps:binary_key_map(Meta), #{}).
%%--------------------------------------------------------------------
%% Hooks
%%--------------------------------------------------------------------
on_message_publish(
Msg = #message{
id = _Id,
topic = <<"$file/", _/binary>>
}
) ->
Headers = Msg#message.headers,
{stop, Msg#message{headers = Headers#{allow_publish => false}}};
on_message_publish(Msg) ->
{ok, Msg}.
on_message_puback(PacketId, #message{topic = Topic} = Msg, _PubRes, _RC) ->
case Topic of
<<"$file/", FileCommand/binary>> ->
{stop, on_file_command(PacketId, Msg, FileCommand)};
_ ->
ignore
end.
%%--------------------------------------------------------------------
%% Handlers for transfer messages
%%--------------------------------------------------------------------
%% TODO Move to emqx_ft_mqtt?
on_file_command(PacketId, Msg, FileCommand) ->
case emqx_topic:tokens(FileCommand) of
[FileIdIn | Rest] ->
validate([{fileid, FileIdIn}], fun([FileId]) ->
on_file_command(PacketId, FileId, Msg, Rest)
end);
[] ->
?RC_UNSPECIFIED_ERROR
end.
on_file_command(PacketId, FileId, Msg, FileCommand) ->
Transfer = transfer(Msg, FileId),
case FileCommand of
[<<"init">>] ->
validate(
[{filemeta, Msg#message.payload}],
fun([Meta]) ->
on_init(PacketId, Msg, Transfer, Meta)
end
);
[<<"fin">>, FinalSizeBin | MaybeChecksum] when length(MaybeChecksum) =< 1 ->
ChecksumBin = emqx_maybe:from_list(MaybeChecksum),
validate(
[{size, FinalSizeBin}, {{maybe, checksum}, ChecksumBin}],
fun([FinalSize, Checksum]) ->
on_fin(PacketId, Msg, Transfer, FinalSize, Checksum)
end
);
[<<"abort">>] ->
on_abort(Msg, Transfer);
[OffsetBin] ->
validate([{offset, OffsetBin}], fun([Offset]) ->
on_segment(PacketId, Msg, Transfer, Offset, undefined)
end);
[OffsetBin, ChecksumBin] ->
validate(
[{offset, OffsetBin}, {checksum, ChecksumBin}],
fun([Offset, Checksum]) ->
validate(
[{integrity, Msg#message.payload, Checksum}],
fun(_) ->
on_segment(PacketId, Msg, Transfer, Offset, Checksum)
end
)
end
);
_ ->
?RC_UNSPECIFIED_ERROR
end.
on_init(PacketId, Msg, Transfer, Meta) ->
?tp(info, "file_transfer_init", #{
mqtt_msg => Msg,
packet_id => PacketId,
transfer => Transfer,
filemeta => Meta
}),
PacketKey = {self(), PacketId},
Callback = fun(Result) ->
?MODULE:on_complete("store_filemeta", PacketKey, Transfer, Result)
end,
with_responder(PacketKey, Callback, emqx_ft_conf:init_timeout(), fun() ->
case store_filemeta(Transfer, Meta) of
% Stored, ack through the responder right away
ok ->
emqx_ft_responder:ack(PacketKey, ok);
% Storage operation started, packet will be acked by the responder
% {async, Pid} ->
% ok = emqx_ft_responder:kickoff(PacketKey, Pid),
% ok;
%% Storage operation failed, ack through the responder
{error, _} = Error ->
emqx_ft_responder:ack(PacketKey, Error)
end
end).
on_abort(_Msg, _FileId) ->
%% TODO
?RC_SUCCESS.
on_segment(PacketId, Msg, Transfer, Offset, Checksum) ->
?tp(info, "file_transfer_segment", #{
mqtt_msg => Msg,
packet_id => PacketId,
transfer => Transfer,
offset => Offset,
checksum => Checksum
}),
Segment = {Offset, Msg#message.payload},
PacketKey = {self(), PacketId},
Callback = fun(Result) ->
?MODULE:on_complete("store_segment", PacketKey, Transfer, Result)
end,
with_responder(PacketKey, Callback, emqx_ft_conf:store_segment_timeout(), fun() ->
case store_segment(Transfer, Segment) of
ok ->
emqx_ft_responder:ack(PacketKey, ok);
% {async, Pid} ->
% ok = emqx_ft_responder:kickoff(PacketKey, Pid),
% ok;
{error, _} = Error ->
emqx_ft_responder:ack(PacketKey, Error)
end
end).
on_fin(PacketId, Msg, Transfer, FinalSize, Checksum) ->
?tp(info, "file_transfer_fin", #{
mqtt_msg => Msg,
packet_id => PacketId,
transfer => Transfer,
final_size => FinalSize,
checksum => Checksum
}),
%% TODO: handle checksum? Do we need it?
FinPacketKey = {self(), PacketId},
Callback = fun(Result) ->
?MODULE:on_complete("assemble", FinPacketKey, Transfer, Result)
end,
with_responder(FinPacketKey, Callback, emqx_ft_conf:assemble_timeout(), fun() ->
case assemble(Transfer, FinalSize) of
%% Assembling completed, ack through the responder right away
ok ->
emqx_ft_responder:ack(FinPacketKey, ok);
%% Assembling started, packet will be acked by the responder
{async, Pid} ->
ok = emqx_ft_responder:kickoff(FinPacketKey, Pid),
ok;
%% Assembling failed, ack through the responder
{error, _} = Error ->
emqx_ft_responder:ack(FinPacketKey, Error)
end
end).
with_responder(Key, Callback, Timeout, CriticalSection) ->
case emqx_ft_responder:start(Key, Callback, Timeout) of
%% We have new packet
{ok, _} ->
CriticalSection();
%% Packet already received.
%% Since we are still handling the previous one,
%% we probably have retransmit here
{error, {already_started, _}} ->
ok
end,
undefined.
store_filemeta(Transfer, Segment) ->
try
emqx_ft_storage:store_filemeta(Transfer, Segment)
catch
C:E:S ->
?tp(error, "start_store_filemeta_failed", #{
class => C, reason => E, stacktrace => S
}),
{error, {internal_error, E}}
end.
store_segment(Transfer, Segment) ->
try
emqx_ft_storage:store_segment(Transfer, Segment)
catch
C:E:S ->
?tp(error, "start_store_segment_failed", #{
class => C, reason => E, stacktrace => S
}),
{error, {internal_error, E}}
end.
assemble(Transfer, FinalSize) ->
try
emqx_ft_storage:assemble(Transfer, FinalSize)
catch
C:E:S ->
?tp(error, "start_assemble_failed", #{
class => C, reason => E, stacktrace => S
}),
{error, {internal_error, E}}
end.
transfer(Msg, FileId) ->
ClientId = Msg#message.from,
{clientid_to_binary(ClientId), FileId}.
on_complete(Op, {ChanPid, PacketId}, Transfer, Result) ->
?tp(debug, "on_complete", #{
operation => Op,
packet_id => PacketId,
transfer => Transfer
}),
case Result of
{Mode, ok} when Mode == ack orelse Mode == down ->
erlang:send(ChanPid, {puback, PacketId, [], ?RC_SUCCESS});
{Mode, {error, _} = Reason} when Mode == ack orelse Mode == down ->
?tp(error, Op ++ "_failed", #{
transfer => Transfer,
reason => Reason
}),
erlang:send(ChanPid, {puback, PacketId, [], ?RC_UNSPECIFIED_ERROR});
timeout ->
?tp(error, Op ++ "_timed_out", #{
transfer => Transfer
}),
erlang:send(ChanPid, {puback, PacketId, [], ?RC_UNSPECIFIED_ERROR})
end.
validate(Validations, Fun) ->
case do_validate(Validations, []) of
{ok, Parsed} ->
Fun(Parsed);
{error, Reason} ->
?tp(info, "client_violated_protocol", #{reason => Reason}),
?RC_UNSPECIFIED_ERROR
end.
do_validate([], Parsed) ->
{ok, lists:reverse(Parsed)};
do_validate([{fileid, FileId} | Rest], Parsed) ->
case byte_size(FileId) of
S when S > 0 ->
do_validate(Rest, [FileId | Parsed]);
0 ->
{error, {invalid_fileid, FileId}}
end;
do_validate([{filemeta, Payload} | Rest], Parsed) ->
case decode_filemeta(Payload) of
{ok, Meta} ->
do_validate(Rest, [Meta | Parsed]);
{error, Reason} ->
{error, Reason}
end;
do_validate([{offset, Offset} | Rest], Parsed) ->
case string:to_integer(Offset) of
{Int, <<>>} ->
do_validate(Rest, [Int | Parsed]);
_ ->
{error, {invalid_offset, Offset}}
end;
do_validate([{size, Size} | Rest], Parsed) ->
case string:to_integer(Size) of
{Int, <<>>} ->
do_validate(Rest, [Int | Parsed]);
_ ->
{error, {invalid_size, Size}}
end;
do_validate([{checksum, Checksum} | Rest], Parsed) ->
case parse_checksum(Checksum) of
{ok, Bin} ->
do_validate(Rest, [Bin | Parsed]);
{error, _Reason} ->
{error, {invalid_checksum, Checksum}}
end;
do_validate([{integrity, Payload, Checksum} | Rest], Parsed) ->
case crypto:hash(sha256, Payload) of
Checksum ->
do_validate(Rest, [Payload | Parsed]);
Mismatch ->
{error, {checksum_mismatch, binary:encode_hex(Mismatch)}}
end;
do_validate([{{maybe, _}, undefined} | Rest], Parsed) ->
do_validate(Rest, [undefined | Parsed]);
do_validate([{{maybe, T}, Value} | Rest], Parsed) ->
do_validate([{T, Value} | Rest], Parsed).
parse_checksum(Checksum) when is_binary(Checksum) andalso byte_size(Checksum) =:= 64 ->
try
{ok, binary:decode_hex(Checksum)}
catch
error:badarg ->
{error, invalid_checksum}
end;
parse_checksum(_Checksum) ->
{error, invalid_checksum}.
clientid_to_binary(A) when is_atom(A) ->
atom_to_binary(A);
clientid_to_binary(B) when is_binary(B) ->
B.

View File

@ -0,0 +1,239 @@
%%--------------------------------------------------------------------
%% Copyright (c) 2023 EMQ Technologies Co., Ltd. All Rights Reserved.
%%
%% Licensed under the Apache License, Version 2.0 (the "License");
%% you may not use this file except in compliance with the License.
%% You may obtain a copy of the License at
%%
%% http://www.apache.org/licenses/LICENSE-2.0
%%
%% Unless required by applicable law or agreed to in writing, software
%% distributed under the License is distributed on an "AS IS" BASIS,
%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
%% See the License for the specific language governing permissions and
%% limitations under the License.
%%--------------------------------------------------------------------
-module(emqx_ft_api).
-behaviour(minirest_api).
-include_lib("typerefl/include/types.hrl").
-include_lib("hocon/include/hoconsc.hrl").
%% Swagger specs from hocon schema
-export([
api_spec/0,
paths/0,
schema/1,
namespace/0
]).
-export([
roots/0,
fields/1
]).
%% Minirest filter for checking if file transfer is enabled
-export([check_ft_enabled/2]).
%% API callbacks
-export([
'/file_transfer/files'/2,
'/file_transfer/files/:clientid/:fileid'/2
]).
-import(hoconsc, [mk/2, ref/1, ref/2]).
namespace() -> "file_transfer".
api_spec() ->
emqx_dashboard_swagger:spec(?MODULE, #{
check_schema => true, filter => fun ?MODULE:check_ft_enabled/2
}).
paths() ->
[
"/file_transfer/files",
"/file_transfer/files/:clientid/:fileid"
].
schema("/file_transfer/files") ->
#{
'operationId' => '/file_transfer/files',
get => #{
tags => [<<"file_transfer">>],
summary => <<"List all uploaded files">>,
description => ?DESC("file_list"),
parameters => [
ref(following),
ref(emqx_dashboard_swagger, limit)
],
responses => #{
200 => <<"Operation success">>,
400 => emqx_dashboard_swagger:error_codes(
['BAD_REQUEST'], <<"Invalid cursor">>
),
503 => emqx_dashboard_swagger:error_codes(
['SERVICE_UNAVAILABLE'], error_desc('SERVICE_UNAVAILABLE')
)
}
}
};
schema("/file_transfer/files/:clientid/:fileid") ->
#{
'operationId' => '/file_transfer/files/:clientid/:fileid',
get => #{
tags => [<<"file_transfer">>],
summary => <<"List files uploaded in a specific transfer">>,
description => ?DESC("file_list_transfer"),
parameters => [
ref(client_id),
ref(file_id)
],
responses => #{
200 => <<"Operation success">>,
404 => emqx_dashboard_swagger:error_codes(
['FILES_NOT_FOUND'], error_desc('FILES_NOT_FOUND')
),
503 => emqx_dashboard_swagger:error_codes(
['SERVICE_UNAVAILABLE'], error_desc('SERVICE_UNAVAILABLE')
)
}
}
}.
check_ft_enabled(Params, _Meta) ->
case emqx_ft_conf:enabled() of
true ->
{ok, Params};
false ->
{503, error_msg('SERVICE_UNAVAILABLE', <<"Service unavailable">>)}
end.
'/file_transfer/files'(get, #{
query_string := QueryString
}) ->
try
Limit = limit(QueryString),
Query =
case maps:get(<<"following">>, QueryString, undefined) of
undefined ->
#{limit => Limit};
Cursor ->
#{limit => Limit, following => Cursor}
end,
case emqx_ft_storage:files(Query) of
{ok, Page} ->
{200, format_page(Page)};
{error, _} ->
{503, error_msg('SERVICE_UNAVAILABLE')}
end
catch
error:{badarg, cursor} ->
{400, error_msg('BAD_REQUEST', <<"Invalid cursor">>)}
end.
'/file_transfer/files/:clientid/:fileid'(get, #{
bindings := #{clientid := ClientId, fileid := FileId}
}) ->
Transfer = {ClientId, FileId},
case emqx_ft_storage:files(#{transfer => Transfer}) of
{ok, Page} ->
{200, format_page(Page)};
{error, [{_Node, enoent} | _]} ->
{404, error_msg('FILES_NOT_FOUND')};
{error, _} ->
{503, error_msg('SERVICE_UNAVAILABLE')}
end.
format_page(#{items := Files, cursor := Cursor}) ->
#{
<<"files">> => lists:map(fun format_file_info/1, Files),
<<"cursor">> => Cursor
};
format_page(#{items := Files}) ->
#{
<<"files">> => lists:map(fun format_file_info/1, Files)
}.
error_msg(Code) ->
#{code => Code, message => error_desc(Code)}.
error_msg(Code, Msg) ->
#{code => Code, message => emqx_utils:readable_error_msg(Msg)}.
error_desc('FILES_NOT_FOUND') ->
<<"Files requested for this transfer could not be found">>;
error_desc('SERVICE_UNAVAILABLE') ->
<<"Service unavailable">>.
roots() ->
[].
-spec fields(hocon_schema:name()) -> [hoconsc:field()].
fields(client_id) ->
[
{clientid,
mk(binary(), #{
in => path,
desc => <<"MQTT Client ID">>,
required => true
})}
];
fields(file_id) ->
[
{fileid,
mk(binary(), #{
in => path,
desc => <<"File ID">>,
required => true
})}
];
fields(following) ->
[
{following,
mk(binary(), #{
in => query,
desc => <<"Cursor to start listing files from">>,
required => false
})}
].
%%--------------------------------------------------------------------
%% Helpers
%%--------------------------------------------------------------------
format_file_info(
Info = #{
name := Name,
size := Size,
uri := URI,
timestamp := Timestamp,
transfer := {ClientId, FileId}
}
) ->
Res = #{
name => format_name(Name),
size => Size,
timestamp => format_timestamp(Timestamp),
clientid => ClientId,
fileid => FileId,
uri => iolist_to_binary(URI)
},
case Info of
#{meta := Meta} ->
Res#{metadata => emqx_ft:encode_filemeta(Meta)};
#{} ->
Res
end.
format_timestamp(Timestamp) ->
iolist_to_binary(calendar:system_time_to_rfc3339(Timestamp, [{unit, second}])).
format_name(NameBin) when is_binary(NameBin) ->
NameBin;
format_name(Name) when is_list(Name) ->
iolist_to_binary(Name).
limit(QueryString) ->
maps:get(<<"limit">>, QueryString, emqx_mgmt:default_row_limit()).

View File

@ -0,0 +1,30 @@
%%--------------------------------------------------------------------
%% Copyright (c) 2023 EMQ Technologies Co., Ltd. All Rights Reserved.
%%
%% Licensed under the Apache License, Version 2.0 (the "License");
%% you may not use this file except in compliance with the License.
%% You may obtain a copy of the License at
%%
%% http://www.apache.org/licenses/LICENSE-2.0
%%
%% Unless required by applicable law or agreed to in writing, software
%% distributed under the License is distributed on an "AS IS" BASIS,
%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
%% See the License for the specific language governing permissions and
%% limitations under the License.
%%--------------------------------------------------------------------
-module(emqx_ft_app).
-behaviour(application).
-export([start/2, stop/1]).
start(_StartType, _StartArgs) ->
{ok, Sup} = emqx_ft_sup:start_link(),
ok = emqx_ft_conf:load(),
{ok, Sup}.
stop(_State) ->
ok = emqx_ft_conf:unload(),
ok.

View File

@ -0,0 +1,192 @@
%%--------------------------------------------------------------------
%% Copyright (c) 2023 EMQ Technologies Co., Ltd. All Rights Reserved.
%%
%% Licensed under the Apache License, Version 2.0 (the "License");
%% you may not use this file except in compliance with the License.
%% You may obtain a copy of the License at
%%
%% http://www.apache.org/licenses/LICENSE-2.0
%%
%% Unless required by applicable law or agreed to in writing, software
%% distributed under the License is distributed on an "AS IS" BASIS,
%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
%% See the License for the specific language governing permissions and
%% limitations under the License.
%%--------------------------------------------------------------------
-module(emqx_ft_assembler).
-export([start_link/3]).
-behaviour(gen_statem).
-export([callback_mode/0]).
-export([init/1]).
-export([handle_event/4]).
-export([terminate/3]).
-export([where/1]).
-type stdata() :: #{
storage := emqx_ft_storage_fs:storage(),
transfer := emqx_ft:transfer(),
assembly := emqx_ft_assembly:t(),
export => emqx_ft_storage_exporter:export()
}.
-define(NAME(Transfer), {n, l, {?MODULE, Transfer}}).
-define(REF(Transfer), {via, gproc, ?NAME(Transfer)}).
%%
start_link(Storage, Transfer, Size) ->
gen_statem:start_link(?REF(Transfer), ?MODULE, {Storage, Transfer, Size}, []).
where(Transfer) ->
gproc:where(?NAME(Transfer)).
%%
-type state() ::
idle
| list_local_fragments
| {list_remote_fragments, [node()]}
| start_assembling
| {assemble, [{node(), emqx_ft_storage_fs:filefrag()}]}
| complete.
-define(internal(C), {next_event, internal, C}).
callback_mode() ->
handle_event_function.
-spec init(_Args) -> {ok, state(), stdata()}.
init({Storage, Transfer, Size}) ->
_ = erlang:process_flag(trap_exit, true),
St = #{
storage => Storage,
transfer => Transfer,
assembly => emqx_ft_assembly:new(Size)
},
{ok, idle, St}.
-spec handle_event(info | internal, _, state(), stdata()) ->
{next_state, state(), stdata(), {next_event, internal, _}}
| {stop, {shutdown, ok | {error, _}}, stdata()}.
handle_event(info, kickoff, idle, St) ->
% NOTE
% Someone's told us to start the work, which usually means that it has set up a monitor.
% We could wait for this message and handle it at the end of the assembling rather than at
% the beginning, however it would make error handling much more messier.
{next_state, list_local_fragments, St, ?internal([])};
handle_event(info, kickoff, _, _St) ->
keep_state_and_data;
handle_event(
internal,
_,
list_local_fragments,
St = #{storage := Storage, transfer := Transfer, assembly := Asm}
) ->
% TODO: what we do with non-transients errors here (e.g. `eacces`)?
{ok, Fragments} = emqx_ft_storage_fs:list(Storage, Transfer, fragment),
NAsm = emqx_ft_assembly:update(emqx_ft_assembly:append(Asm, node(), Fragments)),
NSt = St#{assembly := NAsm},
case emqx_ft_assembly:status(NAsm) of
complete ->
{next_state, start_assembling, NSt, ?internal([])};
{incomplete, _} ->
Nodes = mria_mnesia:running_nodes() -- [node()],
{next_state, {list_remote_fragments, Nodes}, NSt, ?internal([])};
% TODO: recovery?
{error, _} = Error ->
{stop, {shutdown, Error}}
end;
handle_event(
internal,
_,
{list_remote_fragments, Nodes},
St = #{transfer := Transfer, assembly := Asm}
) ->
% TODO
% Async would better because we would not need to wait for some lagging nodes if
% the coverage is already complete.
% TODO: portable "storage" ref
Results = emqx_ft_storage_fs_proto_v1:multilist(Nodes, Transfer, fragment),
NodeResults = lists:zip(Nodes, Results),
NAsm = emqx_ft_assembly:update(
lists:foldl(
fun
({Node, {ok, {ok, Fragments}}}, Acc) ->
emqx_ft_assembly:append(Acc, Node, Fragments);
({_Node, _Result}, Acc) ->
% TODO: log?
Acc
end,
Asm,
NodeResults
)
),
NSt = St#{assembly := NAsm},
case emqx_ft_assembly:status(NAsm) of
complete ->
{next_state, start_assembling, NSt, ?internal([])};
% TODO: retries / recovery?
{incomplete, _} = Status ->
{stop, {shutdown, {error, Status}}};
{error, _} = Error ->
{stop, {shutdown, Error}}
end;
handle_event(
internal,
_,
start_assembling,
St = #{storage := Storage, transfer := Transfer, assembly := Asm}
) ->
Filemeta = emqx_ft_assembly:filemeta(Asm),
Coverage = emqx_ft_assembly:coverage(Asm),
case emqx_ft_storage_exporter:start_export(Storage, Transfer, Filemeta) of
{ok, Export} ->
{next_state, {assemble, Coverage}, St#{export => Export}, ?internal([])};
{error, _} = Error ->
{stop, {shutdown, Error}}
end;
handle_event(internal, _, {assemble, [{Node, Segment} | Rest]}, St = #{export := Export}) ->
% TODO
% Currently, race is possible between getting segment info from the remote node and
% this node garbage collecting the segment itself.
% TODO: pipelining
% TODO: better error handling
{ok, Content} = pread(Node, Segment, St),
case emqx_ft_storage_exporter:write(Export, Content) of
{ok, NExport} ->
{next_state, {assemble, Rest}, St#{export := NExport}, ?internal([])};
{error, _} = Error ->
{stop, {shutdown, Error}, maps:remove(export, St)}
end;
handle_event(internal, _, {assemble, []}, St = #{}) ->
{next_state, complete, St, ?internal([])};
handle_event(internal, _, complete, St = #{export := Export}) ->
Result = emqx_ft_storage_exporter:complete(Export),
_ = maybe_garbage_collect(Result, St),
{stop, {shutdown, Result}, maps:remove(export, St)}.
-spec terminate(_Reason, state(), stdata()) -> _.
terminate(_Reason, _StateName, #{export := Export}) ->
emqx_ft_storage_exporter:discard(Export);
terminate(_Reason, _StateName, #{}) ->
ok.
pread(Node, Segment, #{storage := Storage, transfer := Transfer}) when Node =:= node() ->
emqx_ft_storage_fs:pread(Storage, Transfer, Segment, 0, segsize(Segment));
pread(Node, Segment, #{transfer := Transfer}) ->
emqx_ft_storage_fs_proto_v1:pread(Node, Transfer, Segment, 0, segsize(Segment)).
%%
maybe_garbage_collect(ok, #{storage := Storage, transfer := Transfer, assembly := Asm}) ->
Nodes = emqx_ft_assembly:nodes(Asm),
emqx_ft_storage_fs_gc:collect(Storage, Transfer, Nodes);
maybe_garbage_collect({error, _}, _St) ->
ok.
segsize(#{fragment := {segment, Info}}) ->
maps:get(size, Info).

View File

@ -0,0 +1,47 @@
%%--------------------------------------------------------------------
%% Copyright (c) 2023 EMQ Technologies Co., Ltd. All Rights Reserved.
%%
%% Licensed under the Apache License, Version 2.0 (the "License");
%% you may not use this file except in compliance with the License.
%% You may obtain a copy of the License at
%%
%% http://www.apache.org/licenses/LICENSE-2.0
%%
%% Unless required by applicable law or agreed to in writing, software
%% distributed under the License is distributed on an "AS IS" BASIS,
%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
%% See the License for the specific language governing permissions and
%% limitations under the License.
%%--------------------------------------------------------------------
-module(emqx_ft_assembler_sup).
-export([start_link/0]).
-export([ensure_child/3]).
-behaviour(supervisor).
-export([init/1]).
start_link() ->
supervisor:start_link({local, ?MODULE}, ?MODULE, []).
ensure_child(Storage, Transfer, Size) ->
Childspec = #{
id => Transfer,
start => {emqx_ft_assembler, start_link, [Storage, Transfer, Size]},
restart => temporary
},
case supervisor:start_child(?MODULE, Childspec) of
{ok, Pid} ->
{ok, Pid};
{error, {already_started, Pid}} ->
{ok, Pid}
end.
init(_) ->
SupFlags = #{
strategy => one_for_one,
intensity => 10,
period => 1000
},
{ok, {SupFlags, []}}.

View File

@ -0,0 +1,416 @@
%%--------------------------------------------------------------------
%% Copyright (c) 2023 EMQ Technologies Co., Ltd. All Rights Reserved.
%%
%% Licensed under the Apache License, Version 2.0 (the "License");
%% you may not use this file except in compliance with the License.
%% You may obtain a copy of the License at
%%
%% http://www.apache.org/licenses/LICENSE-2.0
%%
%% Unless required by applicable law or agreed to in writing, software
%% distributed under the License is distributed on an "AS IS" BASIS,
%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
%% See the License for the specific language governing permissions and
%% limitations under the License.
%%--------------------------------------------------------------------
-module(emqx_ft_assembly).
-export([new/1]).
-export([append/3]).
-export([update/1]).
-export([status/1]).
-export([filemeta/1]).
-export([nodes/1]).
-export([coverage/1]).
-export([properties/1]).
-export_type([t/0]).
-type filemeta() :: emqx_ft:filemeta().
-type filefrag() :: emqx_ft_storage_fs:filefrag().
-type filefrag(T) :: emqx_ft_storage_fs:filefrag(T).
-type segmentinfo() :: emqx_ft_storage_fs:segmentinfo().
-record(asm, {
status :: status(),
coverage :: coverage() | undefined,
properties :: properties() | undefined,
meta :: #{filemeta() => {node(), filefrag({filemeta, filemeta()})}},
segs :: emqx_wdgraph:t(emqx_ft:offset(), {node(), filefrag({segment, segmentinfo()})}),
size :: emqx_ft:bytes()
}).
-type status() ::
{incomplete, {missing, _}}
| complete
| {error, {inconsistent, _}}.
-type coverage() :: [{node(), filefrag({segment, segmentinfo()})}].
-type properties() :: #{
%% Node where "most" of the segments are located.
dominant => node()
}.
-opaque t() :: #asm{}.
-spec new(emqx_ft:bytes()) -> t().
new(Size) ->
#asm{
status = {incomplete, {missing, filemeta}},
meta = #{},
segs = emqx_wdgraph:new(),
size = Size
}.
-spec append(t(), node(), filefrag() | [filefrag()]) -> t().
append(Asm, Node, Fragments) when is_list(Fragments) ->
lists:foldl(fun(F, AsmIn) -> append(AsmIn, Node, F) end, Asm, Fragments);
append(Asm, Node, Fragment = #{fragment := {filemeta, _}}) ->
append_filemeta(Asm, Node, Fragment);
append(Asm, Node, Segment = #{fragment := {segment, _}}) ->
append_segmentinfo(Asm, Node, Segment).
-spec update(t()) -> t().
update(Asm) ->
case status(meta, Asm) of
{complete, _Meta} ->
case status(coverage, Asm) of
{complete, Coverage, Props} ->
Asm#asm{
status = complete,
coverage = Coverage,
properties = Props
};
Status ->
Asm#asm{status = Status}
end;
Status ->
Asm#asm{status = Status}
end.
-spec status(t()) -> status().
status(#asm{status = Status}) ->
Status.
-spec filemeta(t()) -> filemeta().
filemeta(Asm) ->
case status(meta, Asm) of
{complete, Meta} -> Meta;
_Other -> undefined
end.
-spec coverage(t()) -> coverage() | undefined.
coverage(#asm{coverage = Coverage}) ->
Coverage.
-spec nodes(t()) -> [node()].
nodes(#asm{meta = Meta, segs = Segs}) ->
S1 = maps:fold(
fun(_Meta, {Node, _Fragment}, Acc) ->
ordsets:add_element(Node, Acc)
end,
ordsets:new(),
Meta
),
S2 = emqx_wdgraph:fold(
fun(_Offset, {_End, _, {Node, _Fragment}}, Acc) ->
ordsets:add_element(Node, Acc)
end,
ordsets:new(),
Segs
),
ordsets:to_list(ordsets:union(S1, S2)).
properties(#asm{properties = Properties}) ->
Properties.
status(meta, #asm{meta = Meta}) ->
status(meta, maps:to_list(Meta));
status(meta, [{Meta, {_Node, _Frag}}]) ->
{complete, Meta};
status(meta, []) ->
{incomplete, {missing, filemeta}};
status(meta, [_M1, _M2 | _] = Metas) ->
{error, {inconsistent, [Frag#{node => Node} || {_, {Node, Frag}} <- Metas]}};
status(coverage, #asm{segs = Segments, size = Size}) ->
case coverage(Segments, Size) of
Coverage when is_list(Coverage) ->
{complete, Coverage, #{
dominant => dominant(Coverage)
}};
Missing = {missing, _} ->
{incomplete, Missing}
end.
append_filemeta(Asm, Node, Fragment = #{fragment := {filemeta, Meta}}) ->
Asm#asm{
meta = maps:put(Meta, {Node, Fragment}, Asm#asm.meta)
}.
append_segmentinfo(Asm, _Node, #{fragment := {segment, #{size := 0}}}) ->
% NOTE
% Empty segments are valid but meaningless for coverage.
Asm;
append_segmentinfo(Asm, Node, Fragment = #{fragment := {segment, Info}}) ->
Offset = maps:get(offset, Info),
Size = maps:get(size, Info),
End = Offset + Size,
Segs = add_edge(Asm#asm.segs, Offset, End, locality(Node) * Size, {Node, Fragment}),
Asm#asm{
% TODO
% In theory it's possible to have two segments with same offset + size on
% different nodes but with differing content. We'd need a checksum to
% be able to disambiguate them though.
segs = Segs
}.
add_edge(Segs, Offset, End, Weight, Label) ->
% NOTE
% We are expressing coverage problem as a shortest path problem on weighted directed
% graph, where nodes are segments offsets, two nodes are connected with edge if
% there is a segment which "covers" these offsets (i.e. it starts at first node's
% offset and ends at second node's offst) and weights are segments sizes adjusted
% for locality (i.e. weight are always 0 for any local segment).
case emqx_wdgraph:find_edge(Offset, End, Segs) of
{WeightWas, _Label} when WeightWas =< Weight ->
% NOTE
% Discarding any edges with higher weight here. This is fine as long as we
% optimize for locality.
Segs;
_ ->
emqx_wdgraph:insert_edge(Offset, End, Weight, Label, Segs)
end.
coverage(Segs, Size) ->
case emqx_wdgraph:find_shortest_path(0, Size, Segs) of
Path when is_list(Path) ->
Path;
{false, LastOffset} ->
% NOTE
% This is far from being accurate, but needs no hairy specifics in the
% `emqx_wdgraph` interface.
{missing, {segment, LastOffset, Size}}
end.
dominant(Coverage) ->
% TODO: needs improvement, better defined _dominance_, maybe some score
Freqs = frequencies(fun({Node, Segment}) -> {Node, segsize(Segment)} end, Coverage),
maxfreq(Freqs, node()).
frequencies(Fun, List) ->
lists:foldl(
fun(E, Acc) ->
{K, N} = Fun(E),
maps:update_with(K, fun(M) -> M + N end, N, Acc)
end,
#{},
List
).
maxfreq(Freqs, Init) ->
{_, Max} = maps:fold(
fun
(F, N, {M, _MF}) when N > M -> {N, F};
(_F, _N, {M, MF}) -> {M, MF}
end,
{0, Init},
Freqs
),
Max.
locality(Node) when Node =:= node() ->
% NOTE
% This should prioritize locally available segments over those on remote nodes.
0;
locality(_RemoteNode) ->
1.
segsize(#{fragment := {segment, Info}}) ->
maps:get(size, Info).
-ifdef(TEST).
-include_lib("eunit/include/eunit.hrl").
incomplete_new_test() ->
?assertEqual(
{incomplete, {missing, filemeta}},
status(update(new(42)))
).
incomplete_test() ->
?assertEqual(
{incomplete, {missing, filemeta}},
status(
update(
append(new(142), node(), [
segment(p1, 0, 42),
segment(p1, 42, 100)
])
)
)
).
consistent_test() ->
Asm1 = append(new(42), n1, [filemeta(m1, "blarg")]),
Asm2 = append(Asm1, n2, [segment(s2, 0, 42)]),
Asm3 = append(Asm2, n3, [filemeta(m3, "blarg")]),
?assertMatch({complete, _}, status(meta, Asm3)).
inconsistent_test() ->
Asm1 = append(new(42), node(), [segment(s1, 0, 42)]),
Asm2 = append(Asm1, n1, [filemeta(m1, "blarg")]),
Asm3 = append(Asm2, n2, [segment(s2, 0, 42), filemeta(m1, "blorg")]),
Asm4 = append(Asm3, n3, [filemeta(m3, "blarg")]),
?assertMatch(
{error,
{inconsistent, [
% blarg < blorg
#{node := n3, path := m3, fragment := {filemeta, #{name := "blarg"}}},
#{node := n2, path := m1, fragment := {filemeta, #{name := "blorg"}}}
]}},
status(meta, Asm4)
).
simple_coverage_test() ->
Node = node(),
Segs = [
{node42, segment(n1, 20, 30)},
{Node, segment(n2, 0, 10)},
{Node, segment(n3, 50, 50)},
{Node, segment(n4, 10, 10)}
],
Asm = append_many(new(100), Segs),
?assertMatch(
{complete,
[
{Node, #{path := n2}},
{Node, #{path := n4}},
{node42, #{path := n1}},
{Node, #{path := n3}}
],
#{dominant := Node}},
status(coverage, Asm)
).
redundant_coverage_test() ->
Node = node(),
Segs = [
{Node, segment(n1, 0, 20)},
{node1, segment(n2, 0, 10)},
{Node, segment(n3, 20, 40)},
{node2, segment(n4, 10, 10)},
{node2, segment(n5, 50, 20)},
{node3, segment(n6, 20, 20)},
{Node, segment(n7, 50, 10)},
{node1, segment(n8, 40, 10)}
],
Asm = append_many(new(70), Segs),
?assertMatch(
{complete,
[
{Node, #{path := n1}},
{node3, #{path := n6}},
{node1, #{path := n8}},
{node2, #{path := n5}}
],
#{dominant := _}},
status(coverage, Asm)
).
redundant_coverage_prefer_local_test() ->
Node = node(),
Segs = [
{node1, segment(n1, 0, 20)},
{Node, segment(n2, 0, 10)},
{Node, segment(n3, 10, 10)},
{node2, segment(n4, 20, 20)},
{Node, segment(n5, 30, 10)},
{Node, segment(n6, 20, 10)}
],
Asm = append_many(new(40), Segs),
?assertMatch(
{complete,
[
{Node, #{path := n2}},
{Node, #{path := n3}},
{Node, #{path := n6}},
{Node, #{path := n5}}
],
#{dominant := Node}},
status(coverage, Asm)
).
missing_coverage_test() ->
Node = node(),
Segs = [
{Node, segment(n1, 0, 10)},
{node1, segment(n3, 10, 20)},
{Node, segment(n2, 0, 20)},
{node2, segment(n4, 50, 50)},
{Node, segment(n5, 40, 60)}
],
Asm = append_many(new(100), Segs),
?assertEqual(
% {incomplete, {missing, {segment, 30, 40}}} would be more accurate
{incomplete, {missing, {segment, 30, 100}}},
status(coverage, Asm)
).
missing_end_coverage_test() ->
Node = node(),
Segs = [
{Node, segment(n1, 0, 15)},
{node1, segment(n3, 10, 10)}
],
Asm = append_many(new(20), Segs),
?assertEqual(
{incomplete, {missing, {segment, 15, 20}}},
status(coverage, Asm)
).
missing_coverage_with_redudancy_test() ->
Segs = [
{node(), segment(n1, 0, 10)},
{node(), segment(n2, 0, 20)},
{node42, segment(n3, 10, 20)},
{node43, segment(n4, 10, 50)},
{node(), segment(n5, 40, 60)}
],
Asm = append_many(new(100), Segs),
?assertEqual(
% {incomplete, {missing, {segment, 50, 60}}}, ???
{incomplete, {missing, {segment, 60, 100}}},
status(coverage, Asm)
).
append_many(Asm, List) ->
lists:foldl(
fun({Node, Frag}, Acc) -> append(Acc, Node, Frag) end,
Asm,
List
).
filemeta(Path, Name) ->
#{
path => Path,
fragment =>
{filemeta, #{
name => Name
}}
}.
segment(Path, Offset, Size) ->
#{
path => Path,
fragment =>
{segment, #{
offset => Offset,
size => Size
}}
}.
-endif.

View File

@ -0,0 +1,143 @@
%%--------------------------------------------------------------------
%% Copyright (c) 2023 EMQ Technologies Co., Ltd. All Rights Reserved.
%%
%% Licensed under the Apache License, Version 2.0 (the "License");
%% you may not use this file except in compliance with the License.
%% You may obtain a copy of the License at
%%
%% http://www.apache.org/licenses/LICENSE-2.0
%%
%% Unless required by applicable law or agreed to in writing, software
%% distributed under the License is distributed on an "AS IS" BASIS,
%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
%% See the License for the specific language governing permissions and
%% limitations under the License.
%%--------------------------------------------------------------------
%% @doc File Transfer configuration management module
-module(emqx_ft_conf).
-behaviour(emqx_config_handler).
-include_lib("emqx/include/logger.hrl").
%% Accessors
-export([enabled/0]).
-export([storage/0]).
-export([gc_interval/1]).
-export([segments_ttl/1]).
-export([init_timeout/0]).
-export([store_segment_timeout/0]).
-export([assemble_timeout/0]).
%% Load/Unload
-export([
load/0,
unload/0
]).
%% callbacks for emqx_config_handler
-export([
pre_config_update/3,
post_config_update/5
]).
-type milliseconds() :: non_neg_integer().
-type seconds() :: non_neg_integer().
%%--------------------------------------------------------------------
%% Accessors
%%--------------------------------------------------------------------
-spec enabled() -> boolean().
enabled() ->
emqx_config:get([file_transfer, enable], false).
-spec storage() -> emqx_config:config().
storage() ->
emqx_config:get([file_transfer, storage]).
-spec gc_interval(emqx_ft_storage_fs:storage()) ->
emqx_maybe:t(milliseconds()).
gc_interval(Storage) ->
emqx_utils_maps:deep_get([segments, gc, interval], Storage, undefined).
-spec segments_ttl(emqx_ft_storage_fs:storage()) ->
emqx_maybe:t({_Min :: seconds(), _Max :: seconds()}).
segments_ttl(Storage) ->
Min = emqx_utils_maps:deep_get([segments, gc, minimum_segments_ttl], Storage, undefined),
Max = emqx_utils_maps:deep_get([segments, gc, maximum_segments_ttl], Storage, undefined),
case is_integer(Min) andalso is_integer(Max) of
true ->
{Min, Max};
false ->
undefined
end.
init_timeout() ->
emqx_config:get([file_transfer, init_timeout]).
assemble_timeout() ->
emqx_config:get([file_transfer, assemble_timeout]).
store_segment_timeout() ->
emqx_config:get([file_transfer, store_segment_timeout]).
%%--------------------------------------------------------------------
%% API
%%--------------------------------------------------------------------
-spec load() -> ok.
load() ->
ok = maybe_start(),
emqx_conf:add_handler([file_transfer], ?MODULE).
-spec unload() -> ok.
unload() ->
ok = stop(),
emqx_conf:remove_handler([file_transfer]).
%%--------------------------------------------------------------------
%% emqx_config_handler callbacks
%%--------------------------------------------------------------------
-spec pre_config_update(list(atom()), emqx_config:update_request(), emqx_config:raw_config()) ->
{ok, emqx_config:update_request()} | {error, term()}.
pre_config_update(_, Req, _Config) ->
{ok, Req}.
-spec post_config_update(
list(atom()),
emqx_config:update_request(),
emqx_config:config(),
emqx_config:config(),
emqx_config:app_envs()
) ->
ok | {ok, Result :: any()} | {error, Reason :: term()}.
post_config_update([file_transfer | _], _Req, NewConfig, OldConfig, _AppEnvs) ->
on_config_update(OldConfig, NewConfig).
on_config_update(#{enable := false}, #{enable := false}) ->
ok;
on_config_update(#{enable := true, storage := OldStorage}, #{enable := false}) ->
ok = emqx_ft_storage:on_config_update(OldStorage, undefined),
ok = emqx_ft:unhook();
on_config_update(#{enable := false}, #{enable := true, storage := NewStorage}) ->
ok = emqx_ft_storage:on_config_update(undefined, NewStorage),
ok = emqx_ft:hook();
on_config_update(#{enable := true, storage := OldStorage}, #{enable := true, storage := NewStorage}) ->
ok = emqx_ft_storage:on_config_update(OldStorage, NewStorage).
maybe_start() ->
case emqx_config:get([file_transfer]) of
#{enable := true, storage := Storage} ->
ok = emqx_ft_storage:on_config_update(undefined, Storage),
ok = emqx_ft:hook();
_ ->
ok
end.
stop() ->
ok = emqx_ft:unhook(),
ok = emqx_ft_storage:on_config_update(storage(), undefined).

View File

@ -0,0 +1,235 @@
%%--------------------------------------------------------------------
%% Copyright (c) 2023 EMQ Technologies Co., Ltd. All Rights Reserved.
%%
%% Licensed under the Apache License, Version 2.0 (the "License");
%% you may not use this file except in compliance with the License.
%% You may obtain a copy of the License at
%%
%% http://www.apache.org/licenses/LICENSE-2.0
%%
%% Unless required by applicable law or agreed to in writing, software
%% distributed under the License is distributed on an "AS IS" BASIS,
%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
%% See the License for the specific language governing permissions and
%% limitations under the License.
%%--------------------------------------------------------------------
-module(emqx_ft_fs_iterator).
-export([new/2]).
-export([next/1]).
-export([next_leaf/1]).
-export([seek/3]).
-export([fold/3]).
-export([fold_n/4]).
-export_type([t/0]).
-export_type([glob/0]).
-export_type([pathstack/0]).
-type root() :: file:name().
-type glob() :: ['*' | globfun()].
-type globfun() ::
fun((_Filename :: file:name()) -> boolean())
| fun((_Filename :: file:name(), pathstack()) -> boolean()).
% A path stack is a list of path components, in reverse order.
-type pathstack() :: [file:name(), ...].
-opaque t() :: #{
root := root(),
queue := [_PathStack :: [file:name()]],
head := glob(),
stack := [{[pathstack()], glob()}]
}.
-type entry() :: entry_leaf() | entry_node().
-type entry_leaf() ::
{leaf, file:name(), file:file_info() | {error, file:posix()}, pathstack()}.
-type entry_node() ::
{node, file:name(), {error, file:posix()}, pathstack()}.
-spec new(root(), glob()) ->
t().
new(Root, Glob) ->
#{
root => Root,
queue => [[]],
head => Glob,
stack => []
}.
-spec next(t()) ->
{entry(), t()} | none.
next(It = #{queue := [PathStack | Rest], head := []}) ->
{emit(PathStack, It), It#{queue => Rest}};
next(It = #{queue := [PathStack | Rest], head := [Pat | _], root := Root}) ->
Filepath = mk_filepath(PathStack),
case emqx_ft_fs_util:list_dir(filename:join(Root, Filepath)) of
{ok, Filenames} ->
Sorted = lists:sort(Filenames),
Matches = [[Fn | PathStack] || Fn <- Sorted, matches_glob(Pat, Fn, [Fn | PathStack])],
ItNext = windup(It),
next(ItNext#{queue => Matches});
{error, _} = Error ->
{{node, Filepath, Error, PathStack}, It#{queue => Rest}}
end;
next(It = #{queue := []}) ->
unwind(It).
windup(It = #{queue := [_ | Rest], head := [Pat | Glob], stack := Stack}) ->
% NOTE
% Preserve unfinished paths and glob in the stack, so that we can resume traversal
% when the lower levels of the tree are exhausted.
It#{
head => Glob,
stack => [{Rest, [Pat | Glob]} | Stack]
}.
unwind(It = #{stack := [{Queue, Glob} | StackRest]}) ->
% NOTE
% Resume traversal of unfinished paths from the upper levels of the tree.
next(It#{
queue => Queue,
head => Glob,
stack => StackRest
});
unwind(#{stack := []}) ->
none.
emit(PathStack, #{root := Root}) ->
Filepath = mk_filepath(PathStack),
case emqx_ft_fs_util:read_info(filename:join(Root, Filepath)) of
{ok, Fileinfo} ->
{leaf, Filepath, Fileinfo, PathStack};
{error, _} = Error ->
{leaf, Filepath, Error, PathStack}
end.
mk_filepath([]) ->
"";
mk_filepath(PathStack) ->
filename:join(lists:reverse(PathStack)).
matches_glob('*', _, _) ->
true;
matches_glob(FilterFun, Filename, _PathStack) when is_function(FilterFun, 1) ->
FilterFun(Filename);
matches_glob(FilterFun, Filename, PathStack) when is_function(FilterFun, 2) ->
FilterFun(Filename, PathStack).
%%
-spec next_leaf(t()) ->
{entry_leaf(), t()} | none.
next_leaf(It) ->
case next(It) of
{{leaf, _, _, _} = Leaf, ItNext} ->
{Leaf, ItNext};
{{node, _Filename, _Error, _PathStack}, ItNext} ->
% NOTE
% Intentionally skipping intermediate traversal errors here, for simplicity.
next_leaf(ItNext);
none ->
none
end.
%%
-spec seek([file:name()], root(), glob()) ->
t().
seek(PathSeek, Root, Glob) ->
SeekGlob = mk_seek_glob(PathSeek, Glob),
SeekStack = lists:reverse(PathSeek),
case next_leaf(new(Root, SeekGlob)) of
{{leaf, _Filepath, _Info, SeekStack}, It} ->
fixup_glob(Glob, It);
{{leaf, _Filepath, _Info, Successor}, It = #{queue := Queue}} ->
fixup_glob(Glob, It#{queue => [Successor | Queue]});
none ->
none(Root)
end.
mk_seek_glob(PathSeek, Glob) ->
% NOTE
% The seek glob is a glob that skips all the nodes / leaves that are lexicographically
% smaller than the seek path. For example, if the seek path is ["a", "b", "c"], and
% the glob is ['*', '*', '*', '*'], then the seek glob is:
% [ fun(Path) -> Path >= ["a"] end,
% fun(Path) -> Path >= ["a", "b"] end,
% fun(Path) -> Path >= ["a", "b", "c"] end,
% '*'
% ]
L = min(length(PathSeek), length(Glob)),
merge_glob([mk_seek_pat(lists:sublist(PathSeek, N)) || N <- lists:seq(1, L)], Glob).
mk_seek_pat(PathSeek) ->
% NOTE
% The `PathStack` and `PathSeek` are of the same length here.
fun(_Filename, PathStack) -> lists:reverse(PathStack) >= PathSeek end.
merge_glob([Pat | SeekRest], [PatOrig | Rest]) ->
[merge_pat(Pat, PatOrig) | merge_glob(SeekRest, Rest)];
merge_glob([], [PatOrig | Rest]) ->
[PatOrig | merge_glob([], Rest)];
merge_glob([], []) ->
[].
merge_pat(Pat, PatOrig) ->
fun(Filename, PathStack) ->
Pat(Filename, PathStack) andalso matches_glob(PatOrig, Filename, PathStack)
end.
fixup_glob(Glob, It = #{head := [], stack := Stack}) ->
% NOTE
% Restoring original glob through the stack. Strictly speaking, this is not usually
% necessary, it's a kind of optimization.
fixup_glob(Glob, lists:reverse(Stack), It#{stack => []}).
fixup_glob(Glob = [_ | Rest], [{Queue, _} | StackRest], It = #{stack := Stack}) ->
fixup_glob(Rest, StackRest, It#{stack => [{Queue, Glob} | Stack]});
fixup_glob(Rest, [], It) ->
It#{head => Rest}.
%%
-spec fold(fun((entry(), Acc) -> Acc), Acc, t()) ->
Acc.
fold(FoldFun, Acc, It) ->
case next(It) of
{Entry, ItNext} ->
fold(FoldFun, FoldFun(Entry, Acc), ItNext);
none ->
Acc
end.
%% NOTE
%% Passing negative `N` is allowed, in which case the iterator will be exhausted
%% completely, like in `fold/3`.
-spec fold_n(fun((entry(), Acc) -> Acc), Acc, t(), _N :: integer()) ->
{Acc, {more, t()} | none}.
fold_n(_FoldFun, Acc, It, 0) ->
{Acc, {more, It}};
fold_n(FoldFun, Acc, It, N) ->
case next(It) of
{Entry, ItNext} ->
fold_n(FoldFun, FoldFun(Entry, Acc), ItNext, N - 1);
none ->
{Acc, none}
end.
%%
-spec none(root()) ->
t().
none(Root) ->
% NOTE
% The _none_ iterator is a valid iterator, but it will never yield any entries.
#{
root => Root,
queue => [],
head => [],
stack => []
}.

View File

@ -0,0 +1,180 @@
%%--------------------------------------------------------------------
%% Copyright (c) 2023 EMQ Technologies Co., Ltd. All Rights Reserved.
%%
%% Licensed under the Apache License, Version 2.0 (the "License");
%% you may not use this file except in compliance with the License.
%% You may obtain a copy of the License at
%%
%% http://www.apache.org/licenses/LICENSE-2.0
%%
%% Unless required by applicable law or agreed to in writing, software
%% distributed under the License is distributed on an "AS IS" BASIS,
%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
%% See the License for the specific language governing permissions and
%% limitations under the License.
%%--------------------------------------------------------------------
-module(emqx_ft_fs_util).
-include_lib("snabbkaffe/include/trace.hrl").
-include_lib("kernel/include/file.hrl").
-export([is_filename_safe/1]).
-export([escape_filename/1]).
-export([unescape_filename/1]).
-export([read_decode_file/2]).
-export([read_info/1]).
-export([list_dir/1]).
-export([fold/4]).
-type foldfun(Acc) ::
fun(
(
_Filepath :: file:name(),
_Info :: file:file_info() | {error, file:posix()},
_Stack :: emqx_ft_fs_iterator:pathstack(),
Acc
) -> Acc
).
-define(IS_UNSAFE(C),
((C) =:= $% orelse
(C) =:= $: orelse
(C) =:= $\\ orelse
(C) =:= $/)
).
-define(IS_PRINTABLE(C),
% NOTE: See `io_lib:printable_unicode_list/1`
(((C) >= 32 andalso (C) =< 126) orelse
((C) >= 16#A0 andalso (C) < 16#D800) orelse
((C) > 16#DFFF andalso (C) < 16#FFFE) orelse
((C) > 16#FFFF andalso (C) =< 16#10FFFF))
).
%%
-spec is_filename_safe(file:filename_all()) -> ok | {error, atom()}.
is_filename_safe(FN) when is_binary(FN) ->
is_filename_safe(unicode:characters_to_list(FN));
is_filename_safe("") ->
{error, empty};
is_filename_safe(FN) when FN == "." orelse FN == ".." ->
{error, special};
is_filename_safe(FN) ->
verify_filename_safe(FN).
verify_filename_safe([$% | Rest]) ->
verify_filename_safe(Rest);
verify_filename_safe([C | _]) when ?IS_UNSAFE(C) ->
{error, unsafe};
verify_filename_safe([C | _]) when not ?IS_PRINTABLE(C) ->
{error, nonprintable};
verify_filename_safe([_ | Rest]) ->
verify_filename_safe(Rest);
verify_filename_safe([]) ->
ok.
-spec escape_filename(binary()) -> file:name().
escape_filename(Name) when Name == <<".">> orelse Name == <<"..">> ->
lists:reverse(percent_encode(Name, ""));
escape_filename(Name) ->
escape(Name, "").
escape(<<C/utf8, Rest/binary>>, Acc) when ?IS_UNSAFE(C) ->
escape(Rest, percent_encode(<<C/utf8>>, Acc));
escape(<<C/utf8, Rest/binary>>, Acc) when not ?IS_PRINTABLE(C) ->
escape(Rest, percent_encode(<<C/utf8>>, Acc));
escape(<<C/utf8, Rest/binary>>, Acc) ->
escape(Rest, [C | Acc]);
escape(<<>>, Acc) ->
lists:reverse(Acc).
-spec unescape_filename(file:name()) -> binary().
unescape_filename(Name) ->
unescape(Name, <<>>).
unescape([$%, A, B | Rest], Acc) ->
unescape(Rest, percent_decode(A, B, Acc));
unescape([C | Rest], Acc) ->
unescape(Rest, <<Acc/binary, C/utf8>>);
unescape([], Acc) ->
Acc.
percent_encode(<<A:4, B:4, Rest/binary>>, Acc) ->
percent_encode(Rest, [dec2hex(B), dec2hex(A), $% | Acc]);
percent_encode(<<>>, Acc) ->
Acc.
percent_decode(A, B, Acc) ->
<<Acc/binary, (hex2dec(A) * 16 + hex2dec(B))>>.
dec2hex(X) when (X >= 0) andalso (X =< 9) -> X + $0;
dec2hex(X) when (X >= 10) andalso (X =< 15) -> X + $A - 10.
hex2dec(X) when (X >= $0) andalso (X =< $9) -> X - $0;
hex2dec(X) when (X >= $A) andalso (X =< $F) -> X - $A + 10;
hex2dec(X) when (X >= $a) andalso (X =< $f) -> X - $a + 10;
hex2dec(_) -> error(badarg).
%%
-spec read_decode_file(file:name(), fun((binary()) -> Value)) ->
{ok, Value} | {error, _IoError}.
read_decode_file(Filepath, DecodeFun) ->
case file:read_file(Filepath) of
{ok, Content} ->
safe_decode(Content, DecodeFun);
{error, _} = Error ->
Error
end.
safe_decode(Content, DecodeFun) ->
try
{ok, DecodeFun(Content)}
catch
C:E:Stacktrace ->
?tp(warning, "safe_decode_failed", #{
class => C,
exception => E,
stacktrace => Stacktrace
}),
{error, corrupted}
end.
-spec read_info(file:name_all()) ->
{ok, file:file_info()} | {error, file:posix() | badarg}.
read_info(AbsPath) ->
% NOTE
% Be aware that this function is occasionally mocked in `emqx_ft_fs_util_SUITE`.
file:read_link_info(AbsPath, [{time, posix}, raw]).
-spec list_dir(file:name_all()) ->
{ok, [file:name()]} | {error, file:posix() | badarg}.
list_dir(AbsPath) ->
case ?MODULE:read_info(AbsPath) of
{ok, #file_info{type = directory}} ->
file:list_dir(AbsPath);
{ok, #file_info{}} ->
{error, enotdir};
{error, Reason} ->
{error, Reason}
end.
-spec fold(foldfun(Acc), Acc, _Root :: file:name(), emqx_ft_fs_iterator:glob()) ->
Acc.
fold(FoldFun, Acc, Root, Glob) ->
fold(FoldFun, Acc, emqx_ft_fs_iterator:new(Root, Glob)).
fold(FoldFun, Acc, It) ->
case emqx_ft_fs_iterator:next(It) of
{{node, _Path, {error, enotdir}, _PathStack}, ItNext} ->
fold(FoldFun, Acc, ItNext);
{{_Type, Path, Info, PathStack}, ItNext} ->
AccNext = FoldFun(Path, Info, PathStack, Acc),
fold(FoldFun, AccNext, ItNext);
none ->
Acc
end.

View File

@ -0,0 +1,116 @@
%%--------------------------------------------------------------------
%% Copyright (c) 2023 EMQ Technologies Co., Ltd. All Rights Reserved.
%%
%% Licensed under the Apache License, Version 2.0 (the "License");
%% you may not use this file except in compliance with the License.
%% You may obtain a copy of the License at
%%
%% http://www.apache.org/licenses/LICENSE-2.0
%%
%% Unless required by applicable law or agreed to in writing, software
%% distributed under the License is distributed on an "AS IS" BASIS,
%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
%% See the License for the specific language governing permissions and
%% limitations under the License.
%%--------------------------------------------------------------------
-module(emqx_ft_responder).
-behaviour(gen_server).
-include_lib("emqx/include/logger.hrl").
-include_lib("emqx/include/types.hrl").
-include_lib("snabbkaffe/include/snabbkaffe.hrl").
%% API
-export([start/3]).
-export([kickoff/2]).
-export([ack/2]).
%% Supervisor API
-export([start_link/3]).
-export([init/1, handle_call/3, handle_cast/2, handle_info/2, terminate/2]).
-define(REF(Key), {via, gproc, {n, l, {?MODULE, Key}}}).
-type key() :: term().
-type respfun() :: fun(({ack, _Result} | {down, _Result} | timeout) -> _SideEffect).
%%--------------------------------------------------------------------
%% API
%% -------------------------------------------------------------------
-spec start(key(), respfun(), timeout()) -> startlink_ret().
start(Key, RespFun, Timeout) ->
emqx_ft_responder_sup:start_child(Key, RespFun, Timeout).
-spec kickoff(key(), pid()) -> ok.
kickoff(Key, Pid) ->
gen_server:call(?REF(Key), {kickoff, Pid}).
-spec ack(key(), _Result) -> _Return.
ack(Key, Result) ->
% TODO: it's possible to avoid term copy
gen_server:call(?REF(Key), {ack, Result}, infinity).
-spec start_link(key(), timeout(), respfun()) -> startlink_ret().
start_link(Key, RespFun, Timeout) ->
gen_server:start_link(?REF(Key), ?MODULE, {Key, RespFun, Timeout}, []).
%%--------------------------------------------------------------------
%% gen_server callbacks
%% -------------------------------------------------------------------
init({Key, RespFun, Timeout}) ->
_ = erlang:process_flag(trap_exit, true),
_TRef = erlang:send_after(Timeout, self(), timeout),
{ok, {Key, RespFun}}.
handle_call({kickoff, Pid}, _From, St) ->
% TODO: more state?
_MRef = erlang:monitor(process, Pid),
_ = Pid ! kickoff,
{reply, ok, St};
handle_call({ack, Result}, _From, {Key, RespFun}) ->
Ret = apply(RespFun, [{ack, Result}]),
?tp(debug, ft_responder_ack, #{key => Key, result => Result, return => Ret}),
{stop, {shutdown, Ret}, Ret, undefined};
handle_call(Msg, _From, State) ->
?SLOG(warning, #{msg => "unknown_call", call_msg => Msg}),
{reply, {error, unknown_call}, State}.
handle_cast(Msg, State) ->
?SLOG(warning, #{msg => "unknown_cast", cast_msg => Msg}),
{noreply, State}.
handle_info(timeout, {Key, RespFun}) ->
Ret = apply(RespFun, [timeout]),
?tp(debug, ft_responder_timeout, #{key => Key, return => Ret}),
{stop, {shutdown, Ret}, undefined};
handle_info({'DOWN', _MRef, process, _Pid, Reason}, {Key, RespFun}) ->
Ret = apply(RespFun, [{down, map_down_reason(Reason)}]),
?tp(debug, ft_responder_procdown, #{key => Key, reason => Reason, return => Ret}),
{stop, {shutdown, Ret}, undefined};
handle_info(Msg, State) ->
?SLOG(warning, #{msg => "unknown_message", info_msg => Msg}),
{noreply, State}.
terminate(_Reason, undefined) ->
ok;
terminate(Reason, {Key, RespFun}) ->
Ret = apply(RespFun, [timeout]),
?tp(debug, ft_responder_shutdown, #{key => Key, reason => Reason, return => Ret}),
ok.
map_down_reason(normal) ->
ok;
map_down_reason(shutdown) ->
ok;
map_down_reason({shutdown, Result}) ->
Result;
map_down_reason(noproc) ->
{error, noproc};
map_down_reason(Error) ->
{error, {internal_error, Error}}.

View File

@ -0,0 +1,48 @@
%%--------------------------------------------------------------------
%% Copyright (c) 2023 EMQ Technologies Co., Ltd. All Rights Reserved.
%%
%% Licensed under the Apache License, Version 2.0 (the "License");
%% you may not use this file except in compliance with the License.
%% You may obtain a copy of the License at
%%
%% http://www.apache.org/licenses/LICENSE-2.0
%%
%% Unless required by applicable law or agreed to in writing, software
%% distributed under the License is distributed on an "AS IS" BASIS,
%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
%% See the License for the specific language governing permissions and
%% limitations under the License.
%%--------------------------------------------------------------------
-module(emqx_ft_responder_sup).
-export([start_link/0]).
-export([start_child/3]).
-behaviour(supervisor).
-export([init/1]).
-define(SUPERVISOR, ?MODULE).
%%
-spec start_link() -> {ok, pid()}.
start_link() ->
supervisor:start_link({local, ?SUPERVISOR}, ?MODULE, []).
start_child(Key, RespFun, Timeout) ->
supervisor:start_child(?SUPERVISOR, [Key, RespFun, Timeout]).
-spec init(_) -> {ok, {supervisor:sup_flags(), [supervisor:child_spec()]}}.
init(_) ->
Flags = #{
strategy => simple_one_for_one,
intensity => 100,
period => 100
},
ChildSpec = #{
id => responder,
start => {emqx_ft_responder, start_link, []},
restart => temporary
},
{ok, {Flags, [ChildSpec]}}.

View File

@ -0,0 +1,317 @@
%%--------------------------------------------------------------------
%% Copyright (c) 2023 EMQ Technologies Co., Ltd. All Rights Reserved.
%%
%% Licensed under the Apache License, Version 2.0 (the "License");
%% you may not use this file except in compliance with the License.
%% You may obtain a copy of the License at
%%
%% http://www.apache.org/licenses/LICENSE-2.0
%%
%% Unless required by applicable law or agreed to in writing, software
%% distributed under the License is distributed on an "AS IS" BASIS,
%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
%% See the License for the specific language governing permissions and
%% limitations under the License.
%%--------------------------------------------------------------------
-module(emqx_ft_schema).
-behaviour(hocon_schema).
-include_lib("hocon/include/hoconsc.hrl").
-include_lib("typerefl/include/types.hrl").
-export([namespace/0, roots/0, fields/1, tags/0, desc/1]).
-export([schema/1]).
-export([translate/1]).
-type json_value() ::
null
| boolean()
| binary()
| number()
| [json_value()]
| #{binary() => json_value()}.
-reflect_type([json_value/0]).
%% NOTE
%% This is rather conservative limit, mostly dictated by the filename limitations
%% on most filesystems. Even though, say, S3 does not have such limitations, it's
%% still useful to have a limit on the filename length, to avoid having to deal with
%% limits in the storage backends.
-define(MAX_FILENAME_BYTELEN, 255).
-import(hoconsc, [ref/2, mk/2]).
namespace() -> file_transfer.
tags() ->
[<<"File Transfer">>].
roots() -> [file_transfer].
fields(file_transfer) ->
[
{enable,
mk(
boolean(),
#{
desc => ?DESC("enable"),
required => false,
default => false
}
)},
{init_timeout,
mk(
emqx_schema:duration_ms(),
#{
desc => ?DESC("init_timeout"),
required => false,
default => "10s"
}
)},
{store_segment_timeout,
mk(
emqx_schema:duration_ms(),
#{
desc => ?DESC("store_segment_timeout"),
required => false,
default => "5m"
}
)},
{assemble_timeout,
mk(
emqx_schema:duration_ms(),
#{
desc => ?DESC("assemble_timeout"),
required => false,
default => "5m"
}
)},
{storage,
mk(
ref(storage_backend),
#{
desc => ?DESC("storage_backend"),
required => false,
validator => validator(backend),
default => #{
<<"local">> => #{}
}
}
)}
];
fields(storage_backend) ->
[
{local,
mk(
ref(local_storage),
#{
desc => ?DESC("local_storage"),
required => {false, recursively}
}
)}
];
fields(local_storage) ->
[
{segments,
mk(
ref(local_storage_segments),
#{
desc => ?DESC("local_storage_segments"),
required => false,
default => #{
<<"gc">> => #{}
}
}
)},
{exporter,
mk(
ref(local_storage_exporter_backend),
#{
desc => ?DESC("local_storage_exporter_backend"),
required => false,
validator => validator(backend),
default => #{
<<"local">> => #{}
}
}
)}
];
fields(local_storage_segments) ->
[
{root,
mk(
binary(),
#{
desc => ?DESC("local_storage_segments_root"),
required => false
}
)},
{gc,
mk(
ref(local_storage_segments_gc), #{
desc => ?DESC("local_storage_segments_gc"),
required => false
}
)}
];
fields(local_storage_exporter_backend) ->
[
{local,
mk(
ref(local_storage_exporter),
#{
desc => ?DESC("local_storage_exporter"),
required => {false, recursively}
}
)},
{s3,
mk(
ref(s3_exporter),
#{
desc => ?DESC("s3_exporter"),
required => {false, recursively}
}
)}
];
fields(local_storage_exporter) ->
[
{root,
mk(
binary(),
#{
desc => ?DESC("local_storage_exporter_root"),
required => false
}
)}
];
fields(s3_exporter) ->
emqx_s3_schema:fields(s3);
fields(local_storage_segments_gc) ->
[
{interval,
mk(
emqx_schema:duration_ms(),
#{
desc => ?DESC("storage_gc_interval"),
required => false,
default => "1h"
}
)},
{maximum_segments_ttl,
mk(
emqx_schema:duration_s(),
#{
desc => ?DESC("storage_gc_max_segments_ttl"),
required => false,
default => "24h"
}
)},
{minimum_segments_ttl,
mk(
emqx_schema:duration_s(),
#{
desc => ?DESC("storage_gc_min_segments_ttl"),
required => false,
default => "5m",
% NOTE
% This setting does not seem to be useful to an end-user.
hidden => true
}
)}
].
desc(file_transfer) ->
"File transfer settings";
desc(local_storage) ->
"File transfer local storage settings";
desc(local_storage_segments) ->
"File transfer local segments storage settings";
desc(local_storage_exporter) ->
"Local Exporter settings for the File transfer local storage backend";
desc(s3_exporter) ->
"S3 Exporter settings for the File transfer local storage backend";
desc(local_storage_segments_gc) ->
"Garbage collection settings for the File transfer local segments storage";
desc(local_storage_exporter_backend) ->
"Exporter for the local file system storage backend";
desc(storage_backend) ->
"Storage backend settings for file transfer";
desc(_) ->
undefined.
schema(filemeta) ->
#{
roots => [
{name,
hoconsc:mk(string(), #{
required => true,
validator => validator(filename),
converter => converter(unicode_string)
})},
{size, hoconsc:mk(non_neg_integer())},
{expire_at, hoconsc:mk(non_neg_integer())},
{checksum, hoconsc:mk({atom(), binary()}, #{converter => converter(checksum)})},
{segments_ttl, hoconsc:mk(pos_integer())},
{user_data, hoconsc:mk(json_value())}
]
}.
validator(filename) ->
[
fun(Value) ->
Bin = unicode:characters_to_binary(Value),
byte_size(Bin) =< ?MAX_FILENAME_BYTELEN orelse {error, max_length_exceeded}
end,
fun emqx_ft_fs_util:is_filename_safe/1
];
validator(backend) ->
fun(Config) ->
case maps:keys(Config) of
[_Type] ->
ok;
_Conflicts = [_ | _] ->
{error, multiple_conflicting_backends}
end
end.
converter(checksum) ->
fun
(undefined, #{}) ->
undefined;
({sha256, Bin}, #{make_serializable := true}) ->
_ = is_binary(Bin) orelse throw({expected_type, string}),
_ = byte_size(Bin) =:= 32 orelse throw({expected_length, 32}),
binary:encode_hex(Bin);
(Hex, #{}) ->
_ = is_binary(Hex) orelse throw({expected_type, string}),
_ = byte_size(Hex) =:= 64 orelse throw({expected_length, 64}),
{sha256, binary:decode_hex(Hex)}
end;
converter(unicode_string) ->
fun
(undefined, #{}) ->
undefined;
(Str, #{make_serializable := true}) ->
_ = is_list(Str) orelse throw({expected_type, string}),
unicode:characters_to_binary(Str);
(Str, #{}) ->
_ = is_binary(Str) orelse throw({expected_type, string}),
unicode:characters_to_list(Str)
end.
ref(Ref) ->
ref(?MODULE, Ref).
translate(Conf) ->
[Root] = roots(),
maps:get(
Root,
hocon_tconf:check_plain(
?MODULE, #{atom_to_binary(Root) => Conf}, #{atom_key => true}, [Root]
)
).

View File

@ -0,0 +1,195 @@
%%--------------------------------------------------------------------
%% Copyright (c) 2023 EMQ Technologies Co., Ltd. All Rights Reserved.
%%
%% Licensed under the Apache License, Version 2.0 (the "License");
%% you may not use this file except in compliance with the License.
%% You may obtain a copy of the License at
%%
%% http://www.apache.org/licenses/LICENSE-2.0
%%
%% Unless required by applicable law or agreed to in writing, software
%% distributed under the License is distributed on an "AS IS" BASIS,
%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
%% See the License for the specific language governing permissions and
%% limitations under the License.
%%--------------------------------------------------------------------
-module(emqx_ft_storage).
-export(
[
store_filemeta/2,
store_segment/2,
assemble/2,
files/0,
files/1,
with_storage_type/2,
with_storage_type/3,
backend/0,
on_config_update/2
]
).
-type type() :: local.
-type backend() :: {type(), storage()}.
-type storage() :: config().
-type config() :: emqx_config:config().
-export_type([backend/0]).
-export_type([assemble_callback/0]).
-export_type([query/1]).
-export_type([page/2]).
-export_type([file_info/0]).
-export_type([export_data/0]).
-export_type([reader/0]).
-type assemble_callback() :: fun((ok | {error, term()}) -> any()).
-type query(Cursor) ::
#{transfer => emqx_ft:transfer()}
| #{
limit => non_neg_integer(),
following => Cursor
}.
-type page(Item, Cursor) :: #{
items := [Item],
cursor => Cursor
}.
-type file_info() :: #{
transfer := emqx_ft:transfer(),
name := file:name(),
size := _Bytes :: non_neg_integer(),
timestamp := emqx_datetime:epoch_second(),
uri => uri_string:uri_string(),
meta => emqx_ft:filemeta()
}.
-type export_data() :: binary() | qlc:query_handle().
-type reader() :: pid().
%%--------------------------------------------------------------------
%% Behaviour
%%--------------------------------------------------------------------
%% NOTE
%% An async task will wait for a `kickoff` message to start processing, to give some time
%% to set up monitors, etc. Async task will not explicitly report the processing result,
%% you are expected to receive and handle exit reason of the process, which is
%% -type result() :: `{shutdown, ok | {error, _}}`.
-callback store_filemeta(storage(), emqx_ft:transfer(), emqx_ft:filemeta()) ->
ok | {async, pid()} | {error, term()}.
-callback store_segment(storage(), emqx_ft:transfer(), emqx_ft:segment()) ->
ok | {async, pid()} | {error, term()}.
-callback assemble(storage(), emqx_ft:transfer(), _Size :: emqx_ft:bytes()) ->
ok | {async, pid()} | {error, term()}.
-callback files(storage(), query(Cursor)) ->
{ok, page(file_info(), Cursor)} | {error, term()}.
-callback start(emqx_config:config()) -> any().
-callback stop(emqx_config:config()) -> any().
-callback on_config_update(_OldConfig :: emqx_config:config(), _NewConfig :: emqx_config:config()) ->
any().
%%--------------------------------------------------------------------
%% API
%%--------------------------------------------------------------------
-spec store_filemeta(emqx_ft:transfer(), emqx_ft:filemeta()) ->
ok | {async, pid()} | {error, term()}.
store_filemeta(Transfer, FileMeta) ->
dispatch(store_filemeta, [Transfer, FileMeta]).
-spec store_segment(emqx_ft:transfer(), emqx_ft:segment()) ->
ok | {async, pid()} | {error, term()}.
store_segment(Transfer, Segment) ->
dispatch(store_segment, [Transfer, Segment]).
-spec assemble(emqx_ft:transfer(), emqx_ft:bytes()) ->
ok | {async, pid()} | {error, term()}.
assemble(Transfer, Size) ->
dispatch(assemble, [Transfer, Size]).
-spec files() ->
{ok, page(file_info(), _)} | {error, term()}.
files() ->
files(#{}).
-spec files(query(Cursor)) ->
{ok, page(file_info(), Cursor)} | {error, term()}.
files(Query) ->
dispatch(files, [Query]).
-spec dispatch(atom(), list(term())) -> any().
dispatch(Fun, Args) when is_atom(Fun) ->
{Type, Storage} = backend(),
apply(mod(Type), Fun, [Storage | Args]).
%%
-spec with_storage_type(atom(), atom() | function()) -> any().
with_storage_type(Type, Fun) ->
with_storage_type(Type, Fun, []).
-spec with_storage_type(atom(), atom() | function(), list(term())) -> any().
with_storage_type(Type, Fun, Args) ->
case backend() of
{Type, Storage} when is_atom(Fun) ->
apply(mod(Type), Fun, [Storage | Args]);
{Type, Storage} when is_function(Fun) ->
apply(Fun, [Storage | Args]);
{_, _} = Backend ->
{error, {invalid_storage_backend, Backend}}
end.
%%
-spec backend() -> backend().
backend() ->
backend(emqx_ft_conf:storage()).
-spec on_config_update(_Old :: emqx_maybe:t(config()), _New :: emqx_maybe:t(config())) ->
ok.
on_config_update(ConfigOld, ConfigNew) ->
on_backend_update(
emqx_maybe:apply(fun backend/1, ConfigOld),
emqx_maybe:apply(fun backend/1, ConfigNew)
).
on_backend_update({Type, _} = Backend, {Type, _} = Backend) ->
ok;
on_backend_update({Type, StorageOld}, {Type, StorageNew}) ->
ok = (mod(Type)):on_config_update(StorageOld, StorageNew);
on_backend_update(BackendOld, BackendNew) when
(BackendOld =:= undefined orelse is_tuple(BackendOld)) andalso
(BackendNew =:= undefined orelse is_tuple(BackendNew))
->
_ = emqx_maybe:apply(fun on_storage_stop/1, BackendOld),
_ = emqx_maybe:apply(fun on_storage_start/1, BackendNew),
ok.
%%--------------------------------------------------------------------
%% Local API
%%--------------------------------------------------------------------
-spec backend(config()) -> backend().
backend(#{local := Storage}) ->
{local, Storage}.
on_storage_start({Type, Storage}) ->
(mod(Type)):start(Storage).
on_storage_stop({Type, Storage}) ->
(mod(Type)):stop(Storage).
mod(local) ->
emqx_ft_storage_fs.

View File

@ -0,0 +1,195 @@
%%--------------------------------------------------------------------
%% Copyright (c) 2023 EMQ Technologies Co., Ltd. All Rights Reserved.
%%
%% Licensed under the Apache License, Version 2.0 (the "License");
%% you may not use this file except in compliance with the License.
%% You may obtain a copy of the License at
%%
%% http://www.apache.org/licenses/LICENSE-2.0
%%
%% Unless required by applicable law or agreed to in writing, software
%% distributed under the License is distributed on an "AS IS" BASIS,
%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
%% See the License for the specific language governing permissions and
%% limitations under the License.
%%--------------------------------------------------------------------
%% Filesystem storage exporter
%%
%% This is conceptually a part of the Filesystem storage backend that defines
%% how and where complete transfers are assembled into files and stored.
-module(emqx_ft_storage_exporter).
%% Export API
-export([start_export/3]).
-export([write/2]).
-export([complete/1]).
-export([discard/1]).
%% Listing API
-export([list/2]).
%% Lifecycle API
-export([on_config_update/2]).
%% Internal API
-export([exporter/1]).
-export_type([export/0]).
-type storage() :: emxt_ft_storage_fs:storage().
-type transfer() :: emqx_ft:transfer().
-type filemeta() :: emqx_ft:filemeta().
-type checksum() :: emqx_ft:checksum().
-type exporter_conf() :: map().
-type export_st() :: term().
-type hash_state() :: term().
-opaque export() :: #{
mod := module(),
st := export_st(),
hash := hash_state(),
filemeta := filemeta()
}.
%%------------------------------------------------------------------------------
%% Behaviour
%%------------------------------------------------------------------------------
-callback start_export(exporter_conf(), transfer(), filemeta()) ->
{ok, export_st()} | {error, _Reason}.
%% Exprter must discard the export itself in case of error
-callback write(ExportSt :: export_st(), iodata()) ->
{ok, ExportSt :: export_st()} | {error, _Reason}.
-callback complete(_ExportSt :: export_st(), _Checksum :: checksum()) ->
ok | {error, _Reason}.
-callback discard(ExportSt :: export_st()) ->
ok | {error, _Reason}.
-callback list(exporter_conf(), emqx_ft_storage:query(Cursor)) ->
{ok, emqx_ft_storage:page(emqx_ft_storage:file_info(), Cursor)} | {error, _Reason}.
%% Lifecycle callbacks
-callback start(exporter_conf()) ->
ok | {error, _Reason}.
-callback stop(exporter_conf()) ->
ok.
-callback update(exporter_conf(), exporter_conf()) ->
ok | {error, _Reason}.
%%------------------------------------------------------------------------------
%% API
%%------------------------------------------------------------------------------
-spec start_export(storage(), transfer(), filemeta()) ->
{ok, export()} | {error, _Reason}.
start_export(Storage, Transfer, Filemeta) ->
{ExporterMod, ExporterConf} = exporter(Storage),
case ExporterMod:start_export(ExporterConf, Transfer, Filemeta) of
{ok, ExportSt} ->
{ok, #{
mod => ExporterMod,
st => ExportSt,
hash => init_checksum(Filemeta),
filemeta => Filemeta
}};
{error, _} = Error ->
Error
end.
-spec write(export(), iodata()) ->
{ok, export()} | {error, _Reason}.
write(#{mod := ExporterMod, st := ExportSt, hash := Hash} = Export, Content) ->
case ExporterMod:write(ExportSt, Content) of
{ok, ExportStNext} ->
{ok, Export#{
st := ExportStNext,
hash := update_checksum(Hash, Content)
}};
{error, _} = Error ->
Error
end.
-spec complete(export()) ->
ok | {error, _Reason}.
complete(#{mod := ExporterMod, st := ExportSt, hash := Hash, filemeta := Filemeta}) ->
case verify_checksum(Hash, Filemeta) of
{ok, Checksum} ->
ExporterMod:complete(ExportSt, Checksum);
{error, _} = Error ->
_ = ExporterMod:discard(ExportSt),
Error
end.
-spec discard(export()) ->
ok | {error, _Reason}.
discard(#{mod := ExporterMod, st := ExportSt}) ->
ExporterMod:discard(ExportSt).
-spec list(storage(), emqx_ft_storage:query(Cursor)) ->
{ok, emqx_ft_storage:page(emqx_ft_storage:file_info(), Cursor)} | {error, _Reason}.
list(Storage, Query) ->
{ExporterMod, ExporterOpts} = exporter(Storage),
ExporterMod:list(ExporterOpts, Query).
%% Lifecycle
-spec on_config_update(storage(), storage()) -> ok | {error, term()}.
on_config_update(StorageOld, StorageNew) ->
on_exporter_update(
emqx_maybe:apply(fun exporter/1, StorageOld),
emqx_maybe:apply(fun exporter/1, StorageNew)
).
on_exporter_update(Config, Config) ->
ok;
on_exporter_update({ExporterMod, ConfigOld}, {ExporterMod, ConfigNew}) ->
ExporterMod:update(ConfigOld, ConfigNew);
on_exporter_update(ExporterOld, ExporterNew) ->
_ = emqx_maybe:apply(fun stop/1, ExporterOld),
_ = emqx_maybe:apply(fun start/1, ExporterNew),
ok.
start({ExporterMod, ExporterOpts}) ->
ok = ExporterMod:start(ExporterOpts).
stop({ExporterMod, ExporterOpts}) ->
ok = ExporterMod:stop(ExporterOpts).
%%------------------------------------------------------------------------------
%% Internal functions
%%------------------------------------------------------------------------------
exporter(Storage) ->
case maps:get(exporter, Storage) of
#{local := Options} ->
{emqx_ft_storage_exporter_fs, Options};
#{s3 := Options} ->
{emqx_ft_storage_exporter_s3, Options}
end.
init_checksum(#{checksum := {Algo, _}}) ->
crypto:hash_init(Algo);
init_checksum(#{}) ->
crypto:hash_init(sha256).
update_checksum(Ctx, IoData) ->
crypto:hash_update(Ctx, IoData).
verify_checksum(Ctx, #{checksum := {Algo, Digest} = Checksum}) ->
case crypto:hash_final(Ctx) of
Digest ->
{ok, Checksum};
Mismatch ->
{error, {checksum, Algo, binary:encode_hex(Mismatch)}}
end;
verify_checksum(Ctx, #{}) ->
Digest = crypto:hash_final(Ctx),
{ok, {sha256, Digest}}.

View File

@ -0,0 +1,489 @@
%%--------------------------------------------------------------------
%% Copyright (c) 2023 EMQ Technologies Co., Ltd. All Rights Reserved.
%%
%% Licensed under the Apache License, Version 2.0 (the "License");
%% you may not use this file except in compliance with the License.
%% You may obtain a copy of the License at
%%
%% http://www.apache.org/licenses/LICENSE-2.0
%%
%% Unless required by applicable law or agreed to in writing, software
%% distributed under the License is distributed on an "AS IS" BASIS,
%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
%% See the License for the specific language governing permissions and
%% limitations under the License.
%%--------------------------------------------------------------------
-module(emqx_ft_storage_exporter_fs).
-include_lib("kernel/include/file.hrl").
-include_lib("emqx/include/logger.hrl").
%% Exporter API
-behaviour(emqx_ft_storage_exporter).
-export([start_export/3]).
-export([write/2]).
-export([complete/2]).
-export([discard/1]).
-export([list/1]).
-export([
start/1,
stop/1,
update/2
]).
%% Internal API for RPC
-export([list_local/1]).
-export([list_local/2]).
-export([list_local_transfer/2]).
-export([start_reader/3]).
-export([list/2]).
-export_type([export_st/0]).
-export_type([options/0]).
-type options() :: #{
root => file:name(),
_ => _
}.
-type query() :: emqx_ft_storage:query(cursor()).
-type page(T) :: emqx_ft_storage:page(T, cursor()).
-type cursor() :: iodata().
-type transfer() :: emqx_ft:transfer().
-type filemeta() :: emqx_ft:filemeta().
-type exportinfo() :: emqx_ft_storage:file_info().
-type file_error() :: emqx_ft_storage_fs:file_error().
-type export_st() :: #{
path := file:name(),
handle := io:device(),
result := file:name(),
meta := filemeta()
}.
-type reader() :: pid().
-define(TEMPDIR, "tmp").
-define(MANIFEST, ".MANIFEST.json").
%% NOTE
%% Bucketing of resulting files to accomodate the storage backend for considerably
%% large (e.g. > 10s of millions) amount of files.
-define(BUCKET_HASH, sha).
%% 2 symbols = at most 256 directories on the upper level
-define(BUCKET1_LEN, 2).
%% 2 symbols = at most 256 directories on the second level
-define(BUCKET2_LEN, 2).
%%--------------------------------------------------------------------
%% Exporter behaviour
%%--------------------------------------------------------------------
-spec start_export(options(), transfer(), filemeta()) ->
{ok, export_st()} | {error, file_error()}.
start_export(Options, Transfer, Filemeta = #{name := Filename}) ->
TempFilepath = mk_temp_absfilepath(Options, Transfer, Filename),
ResultFilepath = mk_absfilepath(Options, Transfer, result, Filename),
_ = filelib:ensure_dir(TempFilepath),
case file:open(TempFilepath, [write, raw, binary]) of
{ok, Handle} ->
{ok, #{
path => TempFilepath,
handle => Handle,
result => ResultFilepath,
meta => Filemeta
}};
{error, _} = Error ->
Error
end.
-spec write(export_st(), iodata()) ->
{ok, export_st()} | {error, file_error()}.
write(ExportSt = #{handle := Handle}, IoData) ->
case file:write(Handle, IoData) of
ok ->
{ok, ExportSt};
{error, _} = Error ->
_ = discard(ExportSt),
Error
end.
-spec complete(export_st(), emqx_ft:checksum()) ->
ok | {error, {checksum, _Algo, _Computed}} | {error, file_error()}.
complete(
#{
path := Filepath,
handle := Handle,
result := ResultFilepath,
meta := FilemetaIn
},
Checksum
) ->
Filemeta = FilemetaIn#{checksum => Checksum},
ok = file:close(Handle),
_ = filelib:ensure_dir(ResultFilepath),
_ = file:write_file(mk_manifest_filename(ResultFilepath), encode_filemeta(Filemeta)),
file:rename(Filepath, ResultFilepath).
-spec discard(export_st()) ->
ok.
discard(#{path := Filepath, handle := Handle}) ->
ok = file:close(Handle),
file:delete(Filepath).
%%--------------------------------------------------------------------
%% Exporter behaviour (lifecycle)
%%--------------------------------------------------------------------
%% FS Exporter does not have require any stateful entities,
%% so lifecycle callbacks are no-op.
-spec start(options()) -> ok.
start(_Options) -> ok.
-spec stop(options()) -> ok.
stop(_Options) -> ok.
-spec update(options(), options()) -> ok.
update(_OldOptions, _NewOptions) -> ok.
%%--------------------------------------------------------------------
%% Internal API
%%--------------------------------------------------------------------
-type local_query() :: emqx_ft_storage:query({transfer(), file:name()}).
-spec list_local_transfer(options(), transfer()) ->
{ok, [exportinfo()]} | {error, file_error()}.
list_local_transfer(Options, Transfer) ->
It = emqx_ft_fs_iterator:new(
mk_absdir(Options, Transfer, result),
[fun filter_manifest/1]
),
Result = emqx_ft_fs_iterator:fold(
fun
({leaf, _Path, Fileinfo = #file_info{type = regular}, [Filename | _]}, Acc) ->
RelFilepath = filename:join(mk_result_reldir(Transfer) ++ [Filename]),
Info = mk_exportinfo(Options, Filename, RelFilepath, Transfer, Fileinfo),
[Info | Acc];
({node, _Path, {error, Reason}, []}, []) ->
{error, Reason};
(Entry, Acc) ->
ok = log_invalid_entry(Options, Entry),
Acc
end,
[],
It
),
case Result of
Infos = [_ | _] ->
{ok, lists:reverse(Infos)};
[] ->
{error, enoent};
{error, Reason} ->
{error, Reason}
end.
-spec list_local(options()) ->
{ok, [exportinfo()]} | {error, file_error()}.
list_local(Options) ->
list_local(Options, #{}).
-spec list_local(options(), local_query()) ->
{ok, [exportinfo()]} | {error, file_error()}.
list_local(Options, #{transfer := Transfer}) ->
list_local_transfer(Options, Transfer);
list_local(Options, #{} = Query) ->
Root = get_storage_root(Options),
Glob = [
_Bucket1 = '*',
_Bucket2 = '*',
_Rest = '*',
_ClientId = '*',
_FileId = '*',
fun filter_manifest/1
],
It =
case Query of
#{following := Cursor} ->
emqx_ft_fs_iterator:seek(mk_path_seek(Cursor), Root, Glob);
#{} ->
emqx_ft_fs_iterator:new(Root, Glob)
end,
% NOTE
% In the rare case when some transfer contain more than one file, the paging mechanic
% here may skip over some files, when the cursor is transfer-only.
Limit = maps:get(limit, Query, -1),
{Exports, _} = emqx_ft_fs_iterator:fold_n(
fun(Entry, Acc) -> read_exportinfo(Options, Entry, Acc) end,
[],
It,
Limit
),
{ok, Exports}.
mk_path_seek(#{transfer := Transfer, name := Filename}) ->
mk_result_reldir(Transfer) ++ [Filename];
mk_path_seek(#{transfer := Transfer}) ->
% NOTE: Any bitstring is greater than any list.
mk_result_reldir(Transfer) ++ [<<>>].
%%--------------------------------------------------------------------
%% Helpers
%%--------------------------------------------------------------------
filter_manifest(?MANIFEST) ->
% Filename equals `?MANIFEST`, there should also be a manifest for it.
false;
filter_manifest(Filename) ->
?MANIFEST =/= string:find(Filename, ?MANIFEST, trailing).
read_exportinfo(
Options,
{leaf, RelFilepath, Fileinfo = #file_info{type = regular}, [Filename, FileId, ClientId | _]},
Acc
) ->
% NOTE
% There might be more than one file for a single transfer (though
% extremely bad luck is needed for that, e.g. concurrent assemblers with
% different filemetas from different nodes). This might be unexpected for a
% client given the current protocol, yet might be helpful in the future.
Transfer = dirnames_to_transfer(ClientId, FileId),
Info = mk_exportinfo(Options, Filename, RelFilepath, Transfer, Fileinfo),
[Info | Acc];
read_exportinfo(_Options, {node, _Root = "", {error, enoent}, []}, Acc) ->
% NOTE: Root directory does not exist, this is not an error.
Acc;
read_exportinfo(Options, Entry, Acc) ->
ok = log_invalid_entry(Options, Entry),
Acc.
mk_exportinfo(Options, Filename, RelFilepath, Transfer, Fileinfo) ->
Root = get_storage_root(Options),
try_read_filemeta(
filename:join(Root, mk_manifest_filename(RelFilepath)),
#{
transfer => Transfer,
name => Filename,
uri => mk_export_uri(RelFilepath),
timestamp => Fileinfo#file_info.mtime,
size => Fileinfo#file_info.size,
path => filename:join(Root, RelFilepath)
}
).
try_read_filemeta(Filepath, Info) ->
case emqx_ft_fs_util:read_decode_file(Filepath, fun decode_filemeta/1) of
{ok, Filemeta} ->
Info#{meta => Filemeta};
{error, Reason} ->
?SLOG(warning, "filemeta_inaccessible", #{
path => Filepath,
reason => Reason
}),
Info
end.
mk_export_uri(RelFilepath) ->
emqx_ft_storage_exporter_fs_api:mk_export_uri(node(), RelFilepath).
log_invalid_entry(Options, {_Type, RelFilepath, Fileinfo = #file_info{}, _Stack}) ->
?SLOG(notice, "filesystem_object_unexpected", #{
relpath => RelFilepath,
fileinfo => Fileinfo,
options => Options
});
log_invalid_entry(Options, {_Type, RelFilepath, {error, Reason}, _Stack}) ->
?SLOG(warning, "filesystem_object_inaccessible", #{
relpath => RelFilepath,
reason => Reason,
options => Options
}).
-spec start_reader(options(), file:name(), _Caller :: pid()) ->
{ok, reader()} | {error, enoent}.
start_reader(Options, RelFilepath, CallerPid) ->
Root = get_storage_root(Options),
case filelib:safe_relative_path(RelFilepath, Root) of
SafeFilepath when SafeFilepath /= unsafe ->
AbsFilepath = filename:join(Root, SafeFilepath),
emqx_ft_storage_fs_reader:start_supervised(CallerPid, AbsFilepath);
unsafe ->
{error, enoent}
end.
%%
-spec list(options(), query()) ->
{ok, page(exportinfo())} | {error, [{node(), _Reason}]}.
list(_Options, Query = #{transfer := _Transfer}) ->
case list(Query) of
#{items := Exports = [_ | _]} ->
{ok, #{items => Exports}};
#{items := [], errors := NodeErrors} ->
{error, NodeErrors};
#{items := []} ->
{ok, #{items => []}}
end;
list(_Options, Query) ->
Result = list(Query),
case Result of
#{errors := NodeErrors} ->
?SLOG(warning, "list_exports_errors", #{
query => Query,
errors => NodeErrors
});
#{} ->
ok
end,
case Result of
#{items := Exports, cursor := Cursor} ->
{ok, #{items => lists:reverse(Exports), cursor => encode_cursor(Cursor)}};
#{items := Exports} ->
{ok, #{items => lists:reverse(Exports)}}
end.
list(QueryIn) ->
{Nodes, NodeQuery} = decode_query(QueryIn, lists:sort(mria_mnesia:running_nodes())),
list_nodes(NodeQuery, Nodes, #{items => []}).
list_nodes(Query, Nodes = [Node | Rest], Acc) ->
case emqx_ft_storage_exporter_fs_proto_v1:list_exports([Node], Query) of
[{ok, Result}] ->
list_accumulate(Result, Query, Nodes, Acc);
[Failure] ->
?SLOG(warning, #{
msg => "list_remote_exports_failed",
node => Node,
query => Query,
failure => Failure
}),
list_next(Query, Rest, Acc)
end;
list_nodes(_Query, [], Acc) ->
Acc.
list_accumulate({ok, Exports}, Query, [Node | Rest], Acc = #{items := EAcc}) ->
NExports = length(Exports),
AccNext = Acc#{items := Exports ++ EAcc},
case Query of
#{limit := Limit} when NExports < Limit ->
list_next(Query#{limit => Limit - NExports}, Rest, AccNext);
#{limit := _} ->
AccNext#{cursor => mk_cursor(Node, Exports)};
#{} ->
list_next(Query, Rest, AccNext)
end;
list_accumulate({error, Reason}, Query, [Node | Rest], Acc) ->
EAcc = maps:get(errors, Acc, []),
list_next(Query, Rest, Acc#{errors => [{Node, Reason} | EAcc]}).
list_next(Query, Nodes, Acc) ->
list_nodes(maps:remove(following, Query), Nodes, Acc).
decode_query(Query = #{following := Cursor}, Nodes) ->
{Node, NodeCursor} = decode_cursor(Cursor),
{skip_query_nodes(Node, Nodes), Query#{following => NodeCursor}};
decode_query(Query = #{}, Nodes) ->
{Nodes, Query}.
skip_query_nodes(CNode, Nodes) ->
lists:dropwhile(fun(N) -> N < CNode end, Nodes).
mk_cursor(Node, [_Last = #{transfer := Transfer, name := Name} | _]) ->
{Node, #{transfer => Transfer, name => Name}}.
encode_cursor({Node, #{transfer := {ClientId, FileId}, name := Name}}) ->
emqx_utils_json:encode(#{
<<"n">> => Node,
<<"cid">> => ClientId,
<<"fid">> => FileId,
<<"fn">> => unicode:characters_to_binary(Name)
}).
decode_cursor(Cursor) ->
try
#{
<<"n">> := NodeIn,
<<"cid">> := ClientId,
<<"fid">> := FileId,
<<"fn">> := NameIn
} = emqx_utils_json:decode(Cursor),
true = is_binary(ClientId),
true = is_binary(FileId),
Node = binary_to_existing_atom(NodeIn),
Name = unicode:characters_to_list(NameIn),
true = is_list(Name),
{Node, #{transfer => {ClientId, FileId}, name => Name}}
catch
error:{_, invalid_json} ->
error({badarg, cursor});
error:{badmatch, _} ->
error({badarg, cursor});
error:badarg ->
error({badarg, cursor})
end.
%%
-define(PRELUDE(Vsn, Meta), [<<"filemeta">>, Vsn, Meta]).
encode_filemeta(Meta) ->
emqx_utils_json:encode(?PRELUDE(_Vsn = 1, emqx_ft:encode_filemeta(Meta))).
decode_filemeta(Binary) when is_binary(Binary) ->
?PRELUDE(_Vsn = 1, Map) = emqx_utils_json:decode(Binary, [return_maps]),
case emqx_ft:decode_filemeta(Map) of
{ok, Meta} ->
Meta;
{error, Reason} ->
error(Reason)
end.
mk_manifest_filename(Filename) when is_list(Filename) ->
Filename ++ ?MANIFEST;
mk_manifest_filename(Filename) when is_binary(Filename) ->
<<Filename/binary, ?MANIFEST>>.
mk_temp_absfilepath(Options, Transfer, Filename) ->
Unique = erlang:unique_integer([positive]),
TempFilename = integer_to_list(Unique) ++ "." ++ Filename,
filename:join(mk_absdir(Options, Transfer, temporary), TempFilename).
mk_absdir(Options, _Transfer, temporary) ->
filename:join([get_storage_root(Options), ?TEMPDIR]);
mk_absdir(Options, Transfer, result) ->
filename:join([get_storage_root(Options) | mk_result_reldir(Transfer)]).
mk_absfilepath(Options, Transfer, What, Filename) ->
filename:join(mk_absdir(Options, Transfer, What), Filename).
mk_result_reldir(Transfer = {ClientId, FileId}) ->
Hash = mk_transfer_hash(Transfer),
<<
Bucket1:?BUCKET1_LEN/binary,
Bucket2:?BUCKET2_LEN/binary,
BucketRest/binary
>> = binary:encode_hex(Hash),
[
binary_to_list(Bucket1),
binary_to_list(Bucket2),
binary_to_list(BucketRest),
emqx_ft_fs_util:escape_filename(ClientId),
emqx_ft_fs_util:escape_filename(FileId)
].
dirnames_to_transfer(ClientId, FileId) ->
{emqx_ft_fs_util:unescape_filename(ClientId), emqx_ft_fs_util:unescape_filename(FileId)}.
mk_transfer_hash(Transfer) ->
crypto:hash(?BUCKET_HASH, term_to_binary(Transfer)).
get_storage_root(Options) ->
maps:get(root, Options, filename:join([emqx:data_dir(), file_transfer, exports])).

View File

@ -0,0 +1,182 @@
%%--------------------------------------------------------------------
%% Copyright (c) 2023 EMQ Technologies Co., Ltd. All Rights Reserved.
%%
%% Licensed under the Apache License, Version 2.0 (the "License");
%% you may not use this file except in compliance with the License.
%% You may obtain a copy of the License at
%%
%% http://www.apache.org/licenses/LICENSE-2.0
%%
%% Unless required by applicable law or agreed to in writing, software
%% distributed under the License is distributed on an "AS IS" BASIS,
%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
%% See the License for the specific language governing permissions and
%% limitations under the License.
%%--------------------------------------------------------------------
-module(emqx_ft_storage_exporter_fs_api).
-behaviour(minirest_api).
-include_lib("typerefl/include/types.hrl").
-include_lib("hocon/include/hoconsc.hrl").
-include_lib("emqx/include/logger.hrl").
%% Swagger specs from hocon schema
-export([
api_spec/0,
paths/0,
schema/1,
namespace/0
]).
-export([
fields/1,
roots/0
]).
%% API callbacks
-export([
'/file_transfer/file'/2
]).
-export([mk_export_uri/2]).
%%
namespace() -> "file_transfer".
api_spec() ->
emqx_dashboard_swagger:spec(?MODULE, #{
check_schema => true, filter => fun emqx_ft_api:check_ft_enabled/2
}).
paths() ->
[
"/file_transfer/file"
].
schema("/file_transfer/file") ->
#{
'operationId' => '/file_transfer/file',
get => #{
tags => [<<"file_transfer">>],
summary => <<"Download a particular file">>,
description => ?DESC("file_get"),
parameters => [
hoconsc:ref(file_node),
hoconsc:ref(file_ref)
],
responses => #{
200 => <<"Operation success">>,
404 => emqx_dashboard_swagger:error_codes(['NOT_FOUND'], <<"Not found">>),
503 => emqx_dashboard_swagger:error_codes(
['SERVICE_UNAVAILABLE'], <<"Service unavailable">>
)
}
}
}.
roots() ->
[
file_node,
file_ref
].
-spec fields(hocon_schema:name()) -> hocon_schema:fields().
fields(file_ref) ->
[
{fileref,
hoconsc:mk(binary(), #{
in => query,
desc => <<"File reference">>,
example => <<"file1">>,
required => true
})}
];
fields(file_node) ->
[
{node,
hoconsc:mk(binary(), #{
in => query,
desc => <<"Node under which the file is located">>,
example => atom_to_list(node()),
required => true
})}
].
'/file_transfer/file'(get, #{query_string := Query}) ->
try
Node = parse_node(maps:get(<<"node">>, Query)),
Filepath = parse_filepath(maps:get(<<"fileref">>, Query)),
case emqx_ft_storage_exporter_fs_proto_v1:read_export_file(Node, Filepath, self()) of
{ok, ReaderPid} ->
FileData = emqx_ft_storage_fs_reader:table(ReaderPid),
{200,
#{
<<"content-type">> => <<"application/data">>,
<<"content-disposition">> => <<"attachment">>
},
FileData};
{error, enoent} ->
{404, error_msg('NOT_FOUND', <<"Not found">>)};
{error, Error} ->
?SLOG(warning, #{msg => "get_ready_transfer_fail", error => Error}),
{503, error_msg('SERVICE_UNAVAILABLE', <<"Service unavailable">>)}
end
catch
throw:{invalid, Param} ->
{404,
error_msg(
'NOT_FOUND',
iolist_to_binary(["Invalid query parameter: ", Param])
)};
error:{erpc, noconnection} ->
{503, error_msg('SERVICE_UNAVAILABLE', <<"Service unavailable">>)}
end.
error_msg(Code, Msg) ->
#{code => Code, message => emqx_utils:readable_error_msg(Msg)}.
-spec mk_export_uri(node(), file:name()) ->
uri_string:uri_string().
mk_export_uri(Node, Filepath) ->
emqx_dashboard_swagger:relative_uri([
"/file_transfer/file?",
uri_string:compose_query([
{"node", atom_to_list(Node)},
{"fileref", Filepath}
])
]).
%%
parse_node(NodeBin) ->
case emqx_utils:safe_to_existing_atom(NodeBin) of
{ok, Node} ->
Node;
{error, _} ->
throw({invalid, NodeBin})
end.
parse_filepath(PathBin) ->
case filename:pathtype(PathBin) of
relative ->
ok;
absolute ->
throw({invalid, PathBin})
end,
PathComponents = filename:split(PathBin),
case lists:any(fun is_special_component/1, PathComponents) of
false ->
filename:join(PathComponents);
true ->
throw({invalid, PathBin})
end.
is_special_component(<<".", _/binary>>) ->
true;
is_special_component([$. | _]) ->
true;
is_special_component(_) ->
false.

View File

@ -0,0 +1,50 @@
%%--------------------------------------------------------------------
%% Copyright (c) 2023 EMQ Technologies Co., Ltd. All Rights Reserved.
%%
%% Licensed under the Apache License, Version 2.0 (the "License");
%% you may not use this file except in compliance with the License.
%% You may obtain a copy of the License at
%%
%% http://www.apache.org/licenses/LICENSE-2.0
%%
%% Unless required by applicable law or agreed to in writing, software
%% distributed under the License is distributed on an "AS IS" BASIS,
%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
%% See the License for the specific language governing permissions and
%% limitations under the License.
%%--------------------------------------------------------------------
%% This methods are called via rpc by `emqx_ft_storage_exporter_fs`
%% They populate the call with actual storage which may be configured differently
%% on a concrete node.
-module(emqx_ft_storage_exporter_fs_proxy).
-export([
list_exports_local/1,
read_export_file_local/2
]).
list_exports_local(Query) ->
emqx_ft_storage:with_storage_type(local, fun(Storage) ->
case emqx_ft_storage_exporter:exporter(Storage) of
{emqx_ft_storage_exporter_fs, Options} ->
emqx_ft_storage_exporter_fs:list_local(Options, Query)
% NOTE
% This case clause is currently deemed unreachable by dialyzer.
% InvalidExporter ->
% {error, {invalid_exporter, InvalidExporter}}
end
end).
read_export_file_local(Filepath, CallerPid) ->
emqx_ft_storage:with_storage_type(local, fun(Storage) ->
case emqx_ft_storage_exporter:exporter(Storage) of
{emqx_ft_storage_exporter_fs, Options} ->
emqx_ft_storage_exporter_fs:start_reader(Options, Filepath, CallerPid)
% NOTE
% This case clause is currently deemed unreachable by dialyzer.
% InvalidExporter ->
% {error, {invalid_exporter, InvalidExporter}}
end
end).

View File

@ -0,0 +1,251 @@
%%--------------------------------------------------------------------
%% Copyright (c) 2023 EMQ Technologies Co., Ltd. All Rights Reserved.
%%
%% Licensed under the Apache License, Version 2.0 (the "License");
%% you may not use this file except in compliance with the License.
%% You may obtain a copy of the License at
%%
%% http://www.apache.org/licenses/LICENSE-2.0
%%
%% Unless required by applicable law or agreed to in writing, software
%% distributed under the License is distributed on an "AS IS" BASIS,
%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
%% See the License for the specific language governing permissions and
%% limitations under the License.
%%--------------------------------------------------------------------
-module(emqx_ft_storage_exporter_s3).
-include_lib("emqx/include/logger.hrl").
%% Exporter API
-export([start_export/3]).
-export([write/2]).
-export([complete/2]).
-export([discard/1]).
-export([list/2]).
-export([
start/1,
stop/1,
update/2
]).
-type options() :: emqx_s3:profile_config().
-type transfer() :: emqx_ft:transfer().
-type filemeta() :: emqx_ft:filemeta().
-type exportinfo() :: #{
transfer := transfer(),
name := file:name(),
uri := uri_string:uri_string(),
timestamp := emqx_datetime:epoch_second(),
size := _Bytes :: non_neg_integer(),
filemeta => filemeta()
}.
-type query() :: emqx_ft_storage:query(cursor()).
-type page(T) :: emqx_ft_storage:page(T, cursor()).
-type cursor() :: iodata().
-type export_st() :: #{
pid := pid(),
filemeta := filemeta(),
transfer := transfer()
}.
-define(S3_PROFILE_ID, ?MODULE).
-define(FILEMETA_VSN, <<"1">>).
-define(S3_LIST_LIMIT, 500).
%%--------------------------------------------------------------------
%% Exporter behaviour
%%--------------------------------------------------------------------
-spec start_export(options(), transfer(), filemeta()) ->
{ok, export_st()} | {error, term()}.
start_export(_Options, Transfer, Filemeta) ->
Options = #{
key => s3_key(Transfer, Filemeta),
headers => s3_headers(Transfer, Filemeta)
},
case emqx_s3:start_uploader(?S3_PROFILE_ID, Options) of
{ok, Pid} ->
true = erlang:link(Pid),
{ok, #{filemeta => Filemeta, pid => Pid}};
{error, _Reason} = Error ->
Error
end.
-spec write(export_st(), iodata()) ->
{ok, export_st()} | {error, term()}.
write(#{pid := Pid} = ExportSt, IoData) ->
case emqx_s3_uploader:write(Pid, IoData) of
ok ->
{ok, ExportSt};
{error, _Reason} = Error ->
Error
end.
-spec complete(export_st(), emqx_ft:checksum()) ->
ok | {error, term()}.
complete(#{pid := Pid} = _ExportSt, _Checksum) ->
emqx_s3_uploader:complete(Pid).
-spec discard(export_st()) ->
ok.
discard(#{pid := Pid} = _ExportSt) ->
emqx_s3_uploader:abort(Pid).
-spec list(options(), query()) ->
{ok, page(exportinfo())} | {error, term()}.
list(Options, Query) ->
emqx_s3:with_client(?S3_PROFILE_ID, fun(Client) -> list(Client, Options, Query) end).
%%--------------------------------------------------------------------
%% Exporter behaviour (lifecycle)
%%--------------------------------------------------------------------
-spec start(options()) -> ok | {error, term()}.
start(Options) ->
emqx_s3:start_profile(?S3_PROFILE_ID, Options).
-spec stop(options()) -> ok.
stop(_Options) ->
ok = emqx_s3:stop_profile(?S3_PROFILE_ID).
-spec update(options(), options()) -> ok.
update(_OldOptions, NewOptions) ->
emqx_s3:update_profile(?S3_PROFILE_ID, NewOptions).
%%--------------------------------------------------------------------
%% Internal functions
%% -------------------------------------------------------------------
s3_key(Transfer, #{name := Filename}) ->
s3_prefix(Transfer) ++ "/" ++ Filename.
s3_prefix({ClientId, FileId} = _Transfer) ->
emqx_ft_fs_util:escape_filename(ClientId) ++ "/" ++ emqx_ft_fs_util:escape_filename(FileId).
s3_headers({ClientId, FileId}, Filemeta) ->
#{
%% The ClientID MUST be a UTF-8 Encoded String
<<"x-amz-meta-clientid">> => ClientId,
%% It [Topic Name] MUST be a UTF-8 Encoded String
<<"x-amz-meta-fileid">> => FileId,
<<"x-amz-meta-filemeta">> => s3_header_filemeta(Filemeta),
<<"x-amz-meta-filemeta-vsn">> => ?FILEMETA_VSN
}.
s3_header_filemeta(Filemeta) ->
emqx_utils_json:encode(emqx_ft:encode_filemeta(Filemeta), [force_utf8, uescape]).
list(Client, _Options, #{transfer := Transfer}) ->
case list_key_info(Client, [{prefix, s3_prefix(Transfer)}, {max_keys, ?S3_LIST_LIMIT}]) of
{ok, {Exports, _Marker}} ->
{ok, #{items => Exports}};
{error, _Reason} = Error ->
Error
end;
list(Client, _Options, Query) ->
Limit = maps:get(limit, Query, undefined),
Marker = emqx_maybe:apply(fun decode_cursor/1, maps:get(following, Query, undefined)),
case list_pages(Client, Marker, Limit, []) of
{ok, {Exports, undefined}} ->
{ok, #{items => Exports}};
{ok, {Exports, NextMarker}} ->
{ok, #{items => Exports, cursor => encode_cursor(NextMarker)}};
{error, _Reason} = Error ->
Error
end.
list_pages(Client, Marker, Limit, Acc) ->
MaxKeys = min(?S3_LIST_LIMIT, Limit),
ListOptions = [{marker, Marker} || Marker =/= undefined],
case list_key_info(Client, [{max_keys, MaxKeys} | ListOptions]) of
{ok, {Exports, NextMarker}} ->
list_accumulate(Client, Limit, NextMarker, [Exports | Acc]);
{error, _Reason} = Error ->
Error
end.
list_accumulate(_Client, _Limit, undefined, Acc) ->
{ok, {flatten_pages(Acc), undefined}};
list_accumulate(Client, undefined, Marker, Acc) ->
list_pages(Client, Marker, undefined, Acc);
list_accumulate(Client, Limit, Marker, Acc = [Exports | _]) ->
case Limit - length(Exports) of
0 ->
{ok, {flatten_pages(Acc), Marker}};
Left ->
list_pages(Client, Marker, Left, Acc)
end.
flatten_pages(Pages) ->
lists:append(lists:reverse(Pages)).
list_key_info(Client, ListOptions) ->
case emqx_s3_client:list(Client, ListOptions) of
{ok, Result} ->
?SLOG(debug, #{msg => "list_key_info", result => Result}),
KeyInfos = proplists:get_value(contents, Result, []),
Exports = lists:filtermap(
fun(KeyInfo) -> key_info_to_exportinfo(Client, KeyInfo) end, KeyInfos
),
Marker =
case proplists:get_value(is_truncated, Result, false) of
true ->
next_marker(KeyInfos);
false ->
undefined
end,
{ok, {Exports, Marker}};
{error, _Reason} = Error ->
Error
end.
encode_cursor(Key) ->
unicode:characters_to_binary(Key).
decode_cursor(Cursor) ->
case unicode:characters_to_list(Cursor) of
Key when is_list(Key) ->
Key;
_ ->
error({badarg, cursor})
end.
next_marker(KeyInfos) ->
proplists:get_value(key, lists:last(KeyInfos)).
key_info_to_exportinfo(Client, KeyInfo) ->
Key = proplists:get_value(key, KeyInfo),
case parse_transfer_and_name(Key) of
{ok, {Transfer, Name}} ->
{true, #{
transfer => Transfer,
name => unicode:characters_to_binary(Name),
uri => emqx_s3_client:uri(Client, Key),
timestamp => datetime_to_epoch_second(proplists:get_value(last_modified, KeyInfo)),
size => proplists:get_value(size, KeyInfo)
}};
{error, _Reason} ->
false
end.
-define(EPOCH_START, 62167219200).
datetime_to_epoch_second(DateTime) ->
calendar:datetime_to_gregorian_seconds(DateTime) - ?EPOCH_START.
parse_transfer_and_name(Key) ->
case string:split(Key, "/", all) of
[ClientId, FileId, Name] ->
Transfer = {
emqx_ft_fs_util:unescape_filename(ClientId),
emqx_ft_fs_util:unescape_filename(FileId)
},
{ok, {Transfer, Name}};
_ ->
{error, invalid_key}
end.

View File

@ -0,0 +1,506 @@
%%--------------------------------------------------------------------
%% Copyright (c) 2020-2023 EMQ Technologies Co., Ltd. All Rights Reserved.
%%
%% Licensed under the Apache License, Version 2.0 (the "License");
%% you may not use this file except in compliance with the License.
%% You may obtain a copy of the License at
%%
%% http://www.apache.org/licenses/LICENSE-2.0
%%
%% Unless required by applicable law or agreed to in writing, software
%% distributed under the License is distributed on an "AS IS" BASIS,
%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
%% See the License for the specific language governing permissions and
%% limitations under the License.
%%--------------------------------------------------------------------
%% Filesystem storage backend
%%
%% NOTE
%% If you plan to change storage layout please consult `emqx_ft_storage_fs_gc`
%% to see how much it would break or impair GC.
-module(emqx_ft_storage_fs).
-behaviour(emqx_ft_storage).
-include_lib("emqx/include/logger.hrl").
-include_lib("snabbkaffe/include/trace.hrl").
-export([child_spec/1]).
% Segments-related API
-export([store_filemeta/3]).
-export([store_segment/3]).
-export([read_filemeta/2]).
-export([list/3]).
-export([pread/5]).
-export([lookup_local_assembler/1]).
-export([assemble/3]).
-export([transfers/1]).
% GC API
% TODO: This is quickly becomes hairy.
-export([get_root/1]).
-export([get_subdir/2]).
-export([get_subdir/3]).
-export([files/2]).
-export([on_config_update/2]).
-export([start/1]).
-export([stop/1]).
-export_type([storage/0]).
-export_type([filefrag/1]).
-export_type([filefrag/0]).
-export_type([transferinfo/0]).
-export_type([file_error/0]).
-type transfer() :: emqx_ft:transfer().
-type offset() :: emqx_ft:offset().
-type filemeta() :: emqx_ft:filemeta().
-type segment() :: emqx_ft:segment().
-type segmentinfo() :: #{
offset := offset(),
size := _Bytes :: non_neg_integer()
}.
-type transferinfo() :: #{
filemeta => filemeta()
}.
% TODO naming
-type filefrag(T) :: #{
path := file:name(),
timestamp := emqx_datetime:epoch_second(),
size := _Bytes :: non_neg_integer(),
fragment := T
}.
-type filefrag() :: filefrag(
{filemeta, filemeta()}
| {segment, segmentinfo()}
).
-define(FRAGDIR, frags).
-define(TEMPDIR, tmp).
-define(MANIFEST, "MANIFEST.json").
-define(SEGMENT, "SEG").
-type segments() :: #{
root := file:name(),
gc := #{
interval := non_neg_integer(),
maximum_segments_ttl := non_neg_integer(),
minimum_segments_ttl := non_neg_integer()
}
}.
-type storage() :: #{
type := 'local',
segments := segments(),
exporter := emqx_ft_storage_exporter:exporter()
}.
-type file_error() ::
file:posix()
%% Filename is incompatible with the backing filesystem.
| badarg
%% System limit (e.g. number of ports) reached.
| system_limit.
%% Related resources childspecs
-spec child_spec(storage()) ->
[supervisor:child_spec()].
child_spec(Storage) ->
[
#{
id => emqx_ft_storage_fs_gc,
start => {emqx_ft_storage_fs_gc, start_link, [Storage]},
restart => permanent
}
].
%% Store manifest in the backing filesystem.
%% Atomic operation.
-spec store_filemeta(storage(), transfer(), filemeta()) ->
% Quota? Some lower level errors?
ok | {error, conflict} | {error, file_error()}.
store_filemeta(Storage, Transfer, Meta) ->
Filepath = mk_filepath(Storage, Transfer, get_subdirs_for(fragment), ?MANIFEST),
case read_file(Filepath, fun decode_filemeta/1) of
{ok, Meta} ->
_ = touch_file(Filepath),
ok;
{ok, Conflict} ->
?SLOG(warning, #{
msg => "filemeta_conflict", transfer => Transfer, new => Meta, old => Conflict
}),
% TODO
% We won't see conflicts in case of concurrent `store_filemeta`
% requests. It's rather odd scenario so it's fine not to worry
% about it too much now.
{error, conflict};
{error, Reason} when Reason =:= notfound; Reason =:= corrupted; Reason =:= enoent ->
write_file_atomic(Storage, Transfer, Filepath, encode_filemeta(Meta));
{error, _} = Error ->
Error
end.
%% Store a segment in the backing filesystem.
%% Atomic operation.
-spec store_segment(storage(), transfer(), segment()) ->
% Where is the checksum gets verified? Upper level probably.
% Quota? Some lower level errors?
ok | {error, file_error()}.
store_segment(Storage, Transfer, Segment = {_Offset, Content}) ->
Filename = mk_segment_filename(Segment),
Filepath = mk_filepath(Storage, Transfer, get_subdirs_for(fragment), Filename),
write_file_atomic(Storage, Transfer, Filepath, Content).
-spec read_filemeta(storage(), transfer()) ->
{ok, filemeta()} | {error, corrupted} | {error, file_error()}.
read_filemeta(Storage, Transfer) ->
Filepath = mk_filepath(Storage, Transfer, get_subdirs_for(fragment), ?MANIFEST),
read_file(Filepath, fun decode_filemeta/1).
-spec list(storage(), transfer(), _What :: fragment) ->
% Some lower level errors? {error, notfound}?
% Result will contain zero or only one filemeta.
{ok, [filefrag({filemeta, filemeta()} | {segment, segmentinfo()})]}
| {error, file_error()}.
list(Storage, Transfer, What = fragment) ->
Dirname = mk_filedir(Storage, Transfer, get_subdirs_for(What)),
case file:list_dir(Dirname) of
{ok, Filenames} ->
% TODO
% In case of `What = result` there might be more than one file (though
% extremely bad luck is needed for that, e.g. concurrent assemblers with
% different filemetas from different nodes). This might be unexpected for a
% client given the current protocol, yet might be helpful in the future.
{ok, filtermap_files(fun mk_filefrag/2, Dirname, Filenames)};
{error, enoent} ->
{ok, []};
{error, _} = Error ->
Error
end.
-spec pread(storage(), transfer(), filefrag(), offset(), _Size :: non_neg_integer()) ->
{ok, _Content :: iodata()} | {error, eof} | {error, file_error()}.
pread(_Storage, _Transfer, Frag, Offset, Size) ->
Filepath = maps:get(path, Frag),
case file:open(Filepath, [read, raw, binary]) of
{ok, IoDevice} ->
% NOTE
% Reading empty file is always `eof`.
Read = file:pread(IoDevice, Offset, Size),
ok = file:close(IoDevice),
case Read of
{ok, Content} ->
{ok, Content};
eof ->
{error, eof};
{error, Reason} ->
{error, Reason}
end;
{error, Reason} ->
{error, Reason}
end.
-spec assemble(storage(), transfer(), emqx_ft:bytes()) ->
{async, _Assembler :: pid()} | ok | {error, _TODO}.
assemble(Storage, Transfer, Size) ->
LookupSources = [
fun() -> lookup_local_assembler(Transfer) end,
fun() -> lookup_remote_assembler(Transfer) end,
fun() -> check_if_already_exported(Storage, Transfer) end,
fun() -> ensure_local_assembler(Storage, Transfer, Size) end
],
lookup_assembler(LookupSources).
%%
files(Storage, Query) ->
emqx_ft_storage_exporter:list(Storage, Query).
%%
on_config_update(StorageOld, StorageNew) ->
% NOTE: this will reset GC timer, frequent changes would postpone GC indefinitely
ok = emqx_ft_storage_fs_gc:reset(StorageNew),
emqx_ft_storage_exporter:on_config_update(StorageOld, StorageNew).
start(Storage) ->
ok = lists:foreach(
fun(ChildSpec) ->
{ok, _Child} = supervisor:start_child(emqx_ft_sup, ChildSpec)
end,
child_spec(Storage)
),
ok = emqx_ft_storage_exporter:on_config_update(undefined, Storage),
ok.
stop(Storage) ->
ok = emqx_ft_storage_exporter:on_config_update(Storage, undefined),
ok = lists:foreach(
fun(#{id := ChildId}) ->
_ = supervisor:terminate_child(emqx_ft_sup, ChildId),
ok = supervisor:delete_child(emqx_ft_sup, ChildId)
end,
child_spec(Storage)
),
ok.
%%
lookup_assembler([LastSource]) ->
LastSource();
lookup_assembler([Source | Sources]) ->
case Source() of
{error, not_found} -> lookup_assembler(Sources);
Result -> Result
end.
check_if_already_exported(Storage, Transfer) ->
case files(Storage, #{transfer => Transfer}) of
{ok, #{items := [_ | _]}} -> ok;
_ -> {error, not_found}
end.
lookup_local_assembler(Transfer) ->
case emqx_ft_assembler:where(Transfer) of
Pid when is_pid(Pid) -> {async, Pid};
_ -> {error, not_found}
end.
lookup_remote_assembler(Transfer) ->
Nodes = emqx:running_nodes() -- [node()],
Assemblers = lists:flatmap(
fun
({ok, {async, Pid}}) -> [Pid];
(_) -> []
end,
emqx_ft_storage_fs_proto_v1:list_assemblers(Nodes, Transfer)
),
case Assemblers of
[Pid | _] -> {async, Pid};
_ -> {error, not_found}
end.
ensure_local_assembler(Storage, Transfer, Size) ->
{ok, Pid} = emqx_ft_assembler_sup:ensure_child(Storage, Transfer, Size),
{async, Pid}.
-spec transfers(storage()) ->
{ok, #{transfer() => transferinfo()}}.
transfers(Storage) ->
% TODO `Continuation`
% There might be millions of transfers on the node, we need a protocol and
% storage schema to iterate through them effectively.
ClientIds = try_list_dir(get_root(Storage)),
{ok,
lists:foldl(
fun(ClientId, Acc) -> transfers(Storage, ClientId, Acc) end,
#{},
ClientIds
)}.
transfers(Storage, ClientId, AccIn) ->
Dirname = filename:join(get_root(Storage), ClientId),
case file:list_dir(Dirname) of
{ok, FileIds} ->
lists:foldl(
fun(FileId, Acc) ->
Transfer = dirnames_to_transfer(ClientId, FileId),
read_transferinfo(Storage, Transfer, Acc)
end,
AccIn,
FileIds
);
{error, _Reason} ->
?tp(warning, "list_dir_failed", #{
storage => Storage,
directory => Dirname
}),
AccIn
end.
read_transferinfo(Storage, Transfer, Acc) ->
case read_filemeta(Storage, Transfer) of
{ok, Filemeta} ->
Acc#{Transfer => #{filemeta => Filemeta}};
{error, enoent} ->
Acc#{Transfer => #{}};
{error, Reason} ->
?tp(warning, "read_transferinfo_failed", #{
storage => Storage,
transfer => Transfer,
reason => Reason
}),
Acc
end.
-spec get_root(storage()) ->
file:name().
get_root(Storage) ->
case emqx_utils_maps:deep_find([segments, root], Storage) of
{ok, Root} ->
Root;
{not_found, _, _} ->
filename:join([emqx:data_dir(), file_transfer, segments])
end.
-spec get_subdir(storage(), transfer()) ->
file:name().
get_subdir(Storage, Transfer) ->
mk_filedir(Storage, Transfer, []).
-spec get_subdir(storage(), transfer(), fragment | temporary) ->
file:name().
get_subdir(Storage, Transfer, What) ->
mk_filedir(Storage, Transfer, get_subdirs_for(What)).
get_subdirs_for(fragment) ->
[?FRAGDIR];
get_subdirs_for(temporary) ->
[?TEMPDIR].
-define(PRELUDE(Vsn, Meta), [<<"filemeta">>, Vsn, Meta]).
encode_filemeta(Meta) ->
emqx_utils_json:encode(?PRELUDE(_Vsn = 1, emqx_ft:encode_filemeta(Meta))).
decode_filemeta(Binary) when is_binary(Binary) ->
?PRELUDE(_Vsn = 1, Map) = emqx_utils_json:decode(Binary, [return_maps]),
case emqx_ft:decode_filemeta(Map) of
{ok, Meta} ->
Meta;
{error, Reason} ->
error(Reason)
end.
mk_segment_filename({Offset, Content}) ->
lists:concat([?SEGMENT, ".", Offset, ".", byte_size(Content)]).
break_segment_filename(Filename) ->
Regex = "^" ?SEGMENT "[.]([0-9]+)[.]([0-9]+)$",
Result = re:run(Filename, Regex, [{capture, all_but_first, list}]),
case Result of
{match, [Offset, Size]} ->
{ok, #{offset => list_to_integer(Offset), size => list_to_integer(Size)}};
nomatch ->
{error, invalid}
end.
mk_filedir(Storage, {ClientId, FileId}, SubDirs) ->
filename:join([
get_root(Storage),
emqx_ft_fs_util:escape_filename(ClientId),
emqx_ft_fs_util:escape_filename(FileId)
| SubDirs
]).
dirnames_to_transfer(ClientId, FileId) ->
{emqx_ft_fs_util:unescape_filename(ClientId), emqx_ft_fs_util:unescape_filename(FileId)}.
mk_filepath(Storage, Transfer, SubDirs, Filename) ->
filename:join(mk_filedir(Storage, Transfer, SubDirs), Filename).
try_list_dir(Dirname) ->
case file:list_dir(Dirname) of
{ok, List} -> List;
{error, _} -> []
end.
-include_lib("kernel/include/file.hrl").
read_file(Filepath, DecodeFun) ->
emqx_ft_fs_util:read_decode_file(Filepath, DecodeFun).
write_file_atomic(Storage, Transfer, Filepath, Content) when is_binary(Content) ->
TempFilepath = mk_temp_filepath(Storage, Transfer, filename:basename(Filepath)),
Result = emqx_utils:pipeline(
[
fun filelib:ensure_dir/1,
fun write_contents/2,
fun(_) -> mv_temp_file(TempFilepath, Filepath) end
],
TempFilepath,
Content
),
case Result of
{ok, _, _} ->
_ = file:delete(TempFilepath),
ok;
{error, Reason, _} ->
{error, Reason}
end.
mk_temp_filepath(Storage, Transfer, Filename) ->
Unique = erlang:unique_integer([positive]),
filename:join(get_subdir(Storage, Transfer, temporary), mk_filename([Unique, ".", Filename])).
mk_filename(Comps) ->
lists:append(lists:map(fun mk_filename_component/1, Comps)).
mk_filename_component(I) when is_integer(I) -> integer_to_list(I);
mk_filename_component(A) when is_atom(A) -> atom_to_list(A);
mk_filename_component(B) when is_binary(B) -> unicode:characters_to_list(B);
mk_filename_component(S) when is_list(S) -> S.
write_contents(Filepath, Content) ->
file:write_file(Filepath, Content).
mv_temp_file(TempFilepath, Filepath) ->
_ = filelib:ensure_dir(Filepath),
file:rename(TempFilepath, Filepath).
touch_file(Filepath) ->
Now = erlang:localtime(),
file:change_time(Filepath, _Mtime = Now, _Atime = Now).
filtermap_files(Fun, Dirname, Filenames) ->
lists:filtermap(fun(Filename) -> Fun(Dirname, Filename) end, Filenames).
mk_filefrag(Dirname, Filename = ?MANIFEST) ->
mk_filefrag(Dirname, Filename, filemeta, fun read_frag_filemeta/2);
mk_filefrag(Dirname, Filename = ?SEGMENT ++ _) ->
mk_filefrag(Dirname, Filename, segment, fun read_frag_segmentinfo/2);
mk_filefrag(_Dirname, _Filename) ->
?tp(warning, "rogue_file_found", #{
directory => _Dirname,
filename => _Filename
}),
false.
mk_filefrag(Dirname, Filename, Tag, Fun) ->
Filepath = filename:join(Dirname, Filename),
% TODO error handling?
{ok, Fileinfo} = file:read_file_info(Filepath),
case Fun(Filename, Filepath) of
{ok, Frag} ->
{true, #{
path => Filepath,
timestamp => Fileinfo#file_info.mtime,
size => Fileinfo#file_info.size,
fragment => {Tag, Frag}
}};
{error, _Reason} ->
?tp(warning, "mk_filefrag_failed", #{
directory => Dirname,
filename => Filename,
type => Tag,
reason => _Reason
}),
false
end.
read_frag_filemeta(_Filename, Filepath) ->
read_file(Filepath, fun decode_filemeta/1).
read_frag_segmentinfo(Filename, _Filepath) ->
break_segment_filename(Filename).

View File

@ -0,0 +1,393 @@
%%--------------------------------------------------------------------
%% Copyright (c) 2023 EMQ Technologies Co., Ltd. All Rights Reserved.
%%
%% Licensed under the Apache License, Version 2.0 (the "License");
%% you may not use this file except in compliance with the License.
%% You may obtain a copy of the License at
%%
%% http://www.apache.org/licenses/LICENSE-2.0
%%
%% Unless required by applicable law or agreed to in writing, software
%% distributed under the License is distributed on an "AS IS" BASIS,
%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
%% See the License for the specific language governing permissions and
%% limitations under the License.
%%--------------------------------------------------------------------
%% Filesystem storage GC
%%
%% This is conceptually a part of the Filesystem storage backend, even
%% though it's tied to the backend module with somewhat narrow interface.
-module(emqx_ft_storage_fs_gc).
-include_lib("emqx_ft/include/emqx_ft_storage_fs.hrl").
-include_lib("emqx/include/logger.hrl").
-include_lib("emqx/include/types.hrl").
-include_lib("kernel/include/file.hrl").
-include_lib("snabbkaffe/include/trace.hrl").
-export([start_link/1]).
-export([collect/0]).
-export([collect/3]).
-export([reset/0]).
-export([reset/1]).
-behaviour(gen_server).
-export([init/1]).
-export([handle_call/3]).
-export([handle_cast/2]).
-export([handle_info/2]).
-record(st, {
next_gc_timer :: maybe(reference()),
last_gc :: maybe(gcstats())
}).
-type gcstats() :: #gcstats{}.
-define(IS_ENABLED(INTERVAL), (is_integer(INTERVAL) andalso INTERVAL > 0)).
%%
start_link(Storage) ->
gen_server:start_link(mk_server_ref(global), ?MODULE, Storage, []).
-spec collect() -> gcstats().
collect() ->
gen_server:call(mk_server_ref(global), {collect, erlang:system_time()}, infinity).
-spec reset() -> ok | {error, _}.
reset() ->
emqx_ft_storage:with_storage_type(local, fun reset/1).
-spec reset(emqx_ft_storage_fs:storage()) -> ok.
reset(Storage) ->
gen_server:cast(mk_server_ref(global), {reset, gc_interval(Storage)}).
collect(Storage, Transfer, Nodes) ->
gc_enabled(Storage) andalso cast_collect(mk_server_ref(global), Storage, Transfer, Nodes).
mk_server_ref(Name) ->
% TODO
{via, gproc, {n, l, {?MODULE, Name}}}.
%%
init(Storage) ->
St = #st{},
{ok, start_timer(gc_interval(Storage), St)}.
handle_call({collect, CalledAt}, _From, St) ->
StNext = maybe_collect_garbage(CalledAt, St),
{reply, StNext#st.last_gc, StNext};
handle_call(Call, From, St) ->
?SLOG(error, #{msg => "unexpected_call", call => Call, from => From}),
{noreply, St}.
handle_cast({collect, Storage, Transfer, [Node | Rest]}, St) ->
ok = do_collect_transfer(Storage, Transfer, Node, St),
case Rest of
[_ | _] ->
cast_collect(self(), Storage, Transfer, Rest);
[] ->
ok
end,
{noreply, St};
handle_cast({reset, Interval}, St) ->
{noreply, start_timer(Interval, cancel_timer(St))};
handle_cast(Cast, St) ->
?SLOG(error, #{msg => "unexpected_cast", cast => Cast}),
{noreply, St}.
handle_info({timeout, TRef, collect}, St = #st{next_gc_timer = TRef}) ->
StNext = do_collect_garbage(St),
{noreply, start_timer(StNext#st{next_gc_timer = undefined})}.
do_collect_transfer(Storage, Transfer, Node, St = #st{}) when Node == node() ->
Stats = try_collect_transfer(Storage, Transfer, complete, init_gcstats()),
ok = maybe_report(Stats, St),
ok;
do_collect_transfer(_Storage, _Transfer, _Node, _St = #st{}) ->
% TODO
ok.
cast_collect(Ref, Storage, Transfer, Nodes) ->
gen_server:cast(Ref, {collect, Storage, Transfer, Nodes}).
maybe_collect_garbage(_CalledAt, St = #st{last_gc = undefined}) ->
do_collect_garbage(St);
maybe_collect_garbage(CalledAt, St = #st{last_gc = #gcstats{finished_at = FinishedAt}}) ->
case FinishedAt > CalledAt of
true ->
St;
false ->
start_timer(do_collect_garbage(cancel_timer(St)))
end.
do_collect_garbage(St = #st{}) ->
emqx_ft_storage:with_storage_type(local, fun(Storage) ->
Stats = collect_garbage(Storage),
ok = maybe_report(Stats, Storage),
St#st{last_gc = Stats}
end).
maybe_report(#gcstats{errors = Errors}, Storage) when map_size(Errors) > 0 ->
?tp(warning, "garbage_collection_errors", #{errors => Errors, storage => Storage});
maybe_report(#gcstats{} = _Stats, _Storage) ->
?tp(garbage_collection, #{stats => _Stats, storage => _Storage}).
start_timer(St) ->
Interval = emqx_ft_storage:with_storage_type(local, fun gc_interval/1),
start_timer(Interval, St).
start_timer(Interval, St = #st{next_gc_timer = undefined}) when ?IS_ENABLED(Interval) ->
St#st{next_gc_timer = emqx_utils:start_timer(Interval, collect)};
start_timer(Interval, St) ->
?SLOG(warning, #{msg => "periodic_gc_disabled", interval => Interval}),
St.
cancel_timer(St = #st{next_gc_timer = undefined}) ->
St;
cancel_timer(St = #st{next_gc_timer = TRef}) ->
ok = emqx_utils:cancel_timer(TRef),
St#st{next_gc_timer = undefined}.
gc_enabled(Storage) ->
?IS_ENABLED(gc_interval(Storage)).
gc_interval(Storage) ->
emqx_ft_conf:gc_interval(Storage).
%%
collect_garbage(Storage) ->
Stats = init_gcstats(),
{ok, Transfers} = emqx_ft_storage_fs:transfers(Storage),
collect_garbage(Storage, Transfers, Stats).
collect_garbage(Storage, Transfers, Stats) ->
finish_gcstats(
maps:fold(
fun(Transfer, TransferInfo, StatsAcc) ->
% TODO: throttling?
try_collect_transfer(Storage, Transfer, TransferInfo, StatsAcc)
end,
Stats,
Transfers
)
).
try_collect_transfer(Storage, Transfer, TransferInfo = #{}, Stats) ->
% File transfer might still be incomplete.
% Any outdated fragments and temporary files should be collectable. As a kind of
% heuristic we only delete transfer directory itself only if it is also outdated
% _and was empty at the start of GC_, as a precaution against races between
% writers and GCs.
Cutoff =
case get_segments_ttl(Storage, TransferInfo) of
TTL when is_integer(TTL) ->
erlang:system_time(second) - TTL;
undefined ->
0
end,
{FragCleaned, Stats1} = collect_outdated_fragments(Storage, Transfer, Cutoff, Stats),
{TempCleaned, Stats2} = collect_outdated_tempfiles(Storage, Transfer, Cutoff, Stats1),
% TODO: collect empty directories separately
case FragCleaned and TempCleaned of
true ->
collect_transfer_directory(Storage, Transfer, Cutoff, Stats2);
false ->
Stats2
end;
try_collect_transfer(Storage, Transfer, complete, Stats) ->
% File transfer is complete.
% We should be good to delete fragments and temporary files with their respective
% directories altogether.
{_, Stats1} = collect_fragments(Storage, Transfer, Stats),
{_, Stats2} = collect_tempfiles(Storage, Transfer, Stats1),
Stats2.
collect_fragments(Storage, Transfer, Stats) ->
Dirname = emqx_ft_storage_fs:get_subdir(Storage, Transfer, fragment),
maybe_collect_directory(Dirname, true, Stats).
collect_tempfiles(Storage, Transfer, Stats) ->
Dirname = emqx_ft_storage_fs:get_subdir(Storage, Transfer, temporary),
maybe_collect_directory(Dirname, true, Stats).
collect_outdated_fragments(Storage, Transfer, Cutoff, Stats) ->
Dirname = emqx_ft_storage_fs:get_subdir(Storage, Transfer, fragment),
maybe_collect_directory(Dirname, filter_older_than(Cutoff), Stats).
collect_outdated_tempfiles(Storage, Transfer, Cutoff, Stats) ->
Dirname = emqx_ft_storage_fs:get_subdir(Storage, Transfer, temporary),
maybe_collect_directory(Dirname, filter_older_than(Cutoff), Stats).
collect_transfer_directory(Storage, Transfer, Cutoff, Stats) ->
Dirname = emqx_ft_storage_fs:get_subdir(Storage, Transfer),
Filter =
case Stats of
#gcstats{directories = 0} ->
% Nothing were collected, this is a leftover from a past complete transfer GC.
filter_older_than(Cutoff);
#gcstats{} ->
% Usual incomplete transfer GC, collect directories unconditionally.
true
end,
case collect_empty_directory(Dirname, Filter, Stats) of
{true, StatsNext} ->
collect_parents(Dirname, get_segments_root(Storage), StatsNext);
{false, StatsNext} ->
StatsNext
end.
filter_older_than(Cutoff) ->
fun(_Filepath, #file_info{mtime = ModifiedAt}) -> ModifiedAt =< Cutoff end.
collect_parents(Dirname, Until, Stats) ->
Parent = filename:dirname(Dirname),
case is_same_filepath(Parent, Until) orelse file:del_dir(Parent) of
true ->
Stats;
ok ->
?tp(garbage_collected_directory, #{path => Dirname}),
collect_parents(Parent, Until, account_gcstat_directory(Stats));
{error, eexist} ->
Stats;
{error, Reason} ->
register_gcstat_error({directory, Parent}, Reason, Stats)
end.
maybe_collect_directory(Dirpath, Filter, Stats) ->
case filelib:is_dir(Dirpath) of
true ->
collect_filepath(Dirpath, Filter, Stats);
false ->
{true, Stats}
end.
-spec collect_filepath(file:name(), Filter, gcstats()) -> {boolean(), gcstats()} when
Filter :: boolean() | fun((file:name(), file:file_info()) -> boolean()).
collect_filepath(Filepath, Filter, Stats) ->
case file:read_link_info(Filepath, [{time, posix}, raw]) of
{ok, Fileinfo} ->
collect_filepath(Filepath, Fileinfo, Filter, Stats);
{error, Reason} ->
{Reason == enoent, register_gcstat_error({path, Filepath}, Reason, Stats)}
end.
collect_filepath(Filepath, #file_info{type = directory} = Fileinfo, Filter, Stats) ->
collect_directory(Filepath, Fileinfo, Filter, Stats);
collect_filepath(Filepath, #file_info{type = regular} = Fileinfo, Filter, Stats) ->
case filter_filepath(Filter, Filepath, Fileinfo) andalso file:delete(Filepath, [raw]) of
false ->
{false, Stats};
ok ->
?tp(garbage_collected_file, #{path => Filepath}),
{true, account_gcstat(Fileinfo, Stats)};
{error, Reason} ->
{Reason == enoent, register_gcstat_error({file, Filepath}, Reason, Stats)}
end;
collect_filepath(Filepath, Fileinfo, _Filter, Stats) ->
{false, register_gcstat_error({file, Filepath}, {unexpected, Fileinfo}, Stats)}.
collect_directory(Dirpath, Fileinfo, Filter, Stats) ->
case file:list_dir(Dirpath) of
{ok, Filenames} ->
{Clean, StatsNext} = collect_files(Dirpath, Filenames, Filter, Stats),
case Clean of
true ->
collect_empty_directory(Dirpath, Fileinfo, Filter, StatsNext);
false ->
{false, StatsNext}
end;
{error, Reason} ->
{false, register_gcstat_error({directory, Dirpath}, Reason, Stats)}
end.
collect_files(Dirname, Filenames, Filter, Stats) ->
lists:foldl(
fun(Filename, {Complete, StatsAcc}) ->
Filepath = filename:join(Dirname, Filename),
{Collected, StatsNext} = collect_filepath(Filepath, Filter, StatsAcc),
{Collected andalso Complete, StatsNext}
end,
{true, Stats},
Filenames
).
collect_empty_directory(Dirpath, Filter, Stats) ->
case file:read_link_info(Dirpath, [{time, posix}, raw]) of
{ok, Dirinfo} ->
collect_empty_directory(Dirpath, Dirinfo, Filter, Stats);
{error, Reason} ->
{Reason == enoent, register_gcstat_error({directory, Dirpath}, Reason, Stats)}
end.
collect_empty_directory(Dirpath, Dirinfo, Filter, Stats) ->
case filter_filepath(Filter, Dirpath, Dirinfo) andalso file:del_dir(Dirpath) of
false ->
{false, Stats};
ok ->
?tp(garbage_collected_directory, #{path => Dirpath}),
{true, account_gcstat_directory(Stats)};
{error, Reason} ->
{false, register_gcstat_error({directory, Dirpath}, Reason, Stats)}
end.
filter_filepath(Filter, _, _) when is_boolean(Filter) ->
Filter;
filter_filepath(Filter, Filepath, Fileinfo) when is_function(Filter) ->
Filter(Filepath, Fileinfo).
is_same_filepath(P1, P2) when is_binary(P1) andalso is_binary(P2) ->
filename:absname(P1) == filename:absname(P2);
is_same_filepath(P1, P2) when is_list(P1) andalso is_list(P2) ->
filename:absname(P1) == filename:absname(P2);
is_same_filepath(P1, P2) when is_binary(P1) ->
is_same_filepath(P1, filepath_to_binary(P2)).
filepath_to_binary(S) ->
unicode:characters_to_binary(S, unicode, file:native_name_encoding()).
get_segments_ttl(Storage, TransferInfo) ->
clamp(emqx_ft_conf:segments_ttl(Storage), try_get_filemeta_ttl(TransferInfo)).
try_get_filemeta_ttl(#{filemeta := Filemeta}) ->
maps:get(segments_ttl, Filemeta, undefined);
try_get_filemeta_ttl(#{}) ->
undefined.
clamp({Min, Max}, V) ->
min(Max, max(Min, V));
clamp(undefined, V) ->
V.
%%
init_gcstats() ->
#gcstats{started_at = erlang:system_time()}.
finish_gcstats(Stats) ->
Stats#gcstats{finished_at = erlang:system_time()}.
account_gcstat(Fileinfo, Stats = #gcstats{files = Files, space = Space}) ->
Stats#gcstats{
files = Files + 1,
space = Space + Fileinfo#file_info.size
}.
account_gcstat_directory(Stats = #gcstats{directories = Directories}) ->
Stats#gcstats{
directories = Directories + 1
}.
register_gcstat_error(Subject, Error, Stats = #gcstats{errors = Errors}) ->
Stats#gcstats{errors = Errors#{Subject => Error}}.
%%
get_segments_root(Storage) ->
emqx_ft_storage_fs:get_root(Storage).

View File

@ -0,0 +1,36 @@
%%--------------------------------------------------------------------
%% Copyright (c) 2023 EMQ Technologies Co., Ltd. All Rights Reserved.
%%
%% Licensed under the Apache License, Version 2.0 (the "License");
%% you may not use this file except in compliance with the License.
%% You may obtain a copy of the License at
%%
%% http://www.apache.org/licenses/LICENSE-2.0
%%
%% Unless required by applicable law or agreed to in writing, software
%% distributed under the License is distributed on an "AS IS" BASIS,
%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
%% See the License for the specific language governing permissions and
%% limitations under the License.
%%--------------------------------------------------------------------
%% This methods are called via rpc by `emqx_ft_storage_fs`
%% They populate the call with actual storage which may be configured differently
%% on a concrete node.
-module(emqx_ft_storage_fs_proxy).
-export([
list_local/2,
pread_local/4,
lookup_local_assembler/1
]).
list_local(Transfer, What) ->
emqx_ft_storage:with_storage_type(local, list, [Transfer, What]).
pread_local(Transfer, Frag, Offset, Size) ->
emqx_ft_storage:with_storage_type(local, pread, [Transfer, Frag, Offset, Size]).
lookup_local_assembler(Transfer) ->
emqx_ft_storage:with_storage_type(local, lookup_local_assembler, [Transfer]).

View File

@ -0,0 +1,139 @@
%%--------------------------------------------------------------------
%% Copyright (c) 2023 EMQ Technologies Co., Ltd. All Rights Reserved.
%%
%% Licensed under the Apache License, Version 2.0 (the "License");
%% you may not use this file except in compliance with the License.
%% You may obtain a copy of the License at
%%
%% http://www.apache.org/licenses/LICENSE-2.0
%%
%% Unless required by applicable law or agreed to in writing, software
%% distributed under the License is distributed on an "AS IS" BASIS,
%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
%% See the License for the specific language governing permissions and
%% limitations under the License.
%%--------------------------------------------------------------------
-module(emqx_ft_storage_fs_reader).
-behaviour(gen_server).
-include_lib("emqx/include/logger.hrl").
-include_lib("emqx/include/types.hrl").
%% API
-export([
start_link/2,
start_supervised/2,
table/1,
table/2,
read/2
]).
%% gen_server callbacks
-export([
init/1,
handle_call/3,
handle_cast/2,
handle_info/2,
terminate/2,
code_change/3
]).
-define(DEFAULT_CHUNK_SIZE, 1024).
-define(IS_FILENAME(Filename), (is_list(Filename) or is_binary(Filename))).
%%--------------------------------------------------------------------
%% API
%%--------------------------------------------------------------------
-spec table(pid()) -> qlc:query_handle().
table(ReaderPid) when is_pid(ReaderPid) ->
table(ReaderPid, ?DEFAULT_CHUNK_SIZE).
-spec table(pid(), pos_integer()) -> qlc:query_handle().
table(ReaderPid, Bytes) when is_pid(ReaderPid) andalso is_integer(Bytes) andalso Bytes > 0 ->
NextFun = fun NextFun(Pid) ->
case emqx_ft_storage_fs_reader_proto_v1:read(node(Pid), Pid, Bytes) of
eof ->
[];
{ok, Data} ->
[Data] ++ fun() -> NextFun(Pid) end;
{ErrorKind, Reason} when ErrorKind =:= badrpc; ErrorKind =:= error ->
?SLOG(warning, #{msg => "file_read_error", kind => ErrorKind, reason => Reason}),
[]
end
end,
qlc:table(fun() -> NextFun(ReaderPid) end, []).
-spec start_link(pid(), filename:filename()) -> startlink_ret().
start_link(CallerPid, Filename) when
is_pid(CallerPid) andalso
?IS_FILENAME(Filename)
->
gen_server:start_link(?MODULE, [CallerPid, Filename], []).
-spec start_supervised(pid(), filename:filename()) -> startlink_ret().
start_supervised(CallerPid, Filename) when
is_pid(CallerPid) andalso
?IS_FILENAME(Filename)
->
emqx_ft_storage_fs_reader_sup:start_child(CallerPid, Filename).
-spec read(pid(), pos_integer()) -> {ok, binary()} | eof | {error, term()}.
read(Pid, Bytes) when
is_pid(Pid) andalso
is_integer(Bytes) andalso
Bytes > 0
->
gen_server:call(Pid, {read, Bytes}).
%%--------------------------------------------------------------------
%% gen_server callbacks
%%--------------------------------------------------------------------
init([CallerPid, Filename]) ->
MRef = erlang:monitor(process, CallerPid),
case file:open(Filename, [read, raw, binary]) of
{ok, File} ->
{ok, #{
filename => Filename,
file => File,
caller_pid => CallerPid,
mref => MRef
}};
{error, Reason} ->
{stop, Reason}
end.
handle_call({read, Bytes}, _From, #{file := File} = State) ->
case file:read(File, Bytes) of
{ok, Data} ->
?SLOG(debug, #{msg => "read", bytes => byte_size(Data)}),
{reply, {ok, Data}, State};
eof ->
?SLOG(debug, #{msg => "read", eof => true}),
{stop, normal, eof, State};
{error, Reason} = Error ->
{stop, Reason, Error, State}
end;
handle_call(Msg, _From, State) ->
{reply, {error, {bad_call, Msg}}, State}.
handle_info(
{'DOWN', MRef, process, CallerPid, _Reason}, #{mref := MRef, caller_pid := CallerPid} = State
) ->
{stop, {caller_down, CallerPid}, State};
handle_info(Msg, State) ->
?SLOG(warning, #{msg => "unexpected_message", info_msg => Msg}),
{noreply, State}.
handle_cast(Msg, State) ->
?SLOG(warning, #{msg => "unexpected_message", case_msg => Msg}),
{noreply, State}.
terminate(_Reason, _State) ->
ok.
code_change(_OldVsn, State, _Extra) ->
{ok, State}.

View File

@ -0,0 +1,49 @@
%%--------------------------------------------------------------------
%% Copyright (c) 2023 EMQ Technologies Co., Ltd. All Rights Reserved.
%%
%% Licensed under the Apache License, Version 2.0 (the "License");
%% you may not use this file except in compliance with the License.
%% You may obtain a copy of the License at
%%
%% http://www.apache.org/licenses/LICENSE-2.0
%%
%% Unless required by applicable law or agreed to in writing, software
%% distributed under the License is distributed on an "AS IS" BASIS,
%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
%% See the License for the specific language governing permissions and
%% limitations under the License.
%%--------------------------------------------------------------------
-module(emqx_ft_storage_fs_reader_sup).
-behaviour(supervisor).
-export([
init/1,
start_link/0,
start_child/2
]).
start_link() ->
supervisor:start_link({local, ?MODULE}, ?MODULE, []).
start_child(CallerPid, Filename) ->
Childspec = #{
id => {CallerPid, Filename},
start => {emqx_ft_storage_fs_reader, start_link, [CallerPid, Filename]},
restart => temporary
},
case supervisor:start_child(?MODULE, Childspec) of
{ok, Pid} ->
{ok, Pid};
{error, {Reason, _Child}} ->
{error, Reason}
end.
init(_) ->
SupFlags = #{
strategy => one_for_one,
intensity => 10,
period => 1000
},
{ok, {SupFlags, []}}.

View File

@ -0,0 +1,65 @@
%%--------------------------------------------------------------------
%% Copyright (c) 2023 EMQ Technologies Co., Ltd. All Rights Reserved.
%%
%% Licensed under the Apache License, Version 2.0 (the "License");
%% you may not use this file except in compliance with the License.
%% You may obtain a copy of the License at
%%
%% http://www.apache.org/licenses/LICENSE-2.0
%%
%% Unless required by applicable law or agreed to in writing, software
%% distributed under the License is distributed on an "AS IS" BASIS,
%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
%% See the License for the specific language governing permissions and
%% limitations under the License.
%%--------------------------------------------------------------------
-module(emqx_ft_sup).
-behaviour(supervisor).
-export([start_link/0]).
-export([init/1]).
-define(SERVER, ?MODULE).
start_link() ->
supervisor:start_link({local, ?SERVER}, ?MODULE, []).
init([]) ->
SupFlags = #{
strategy => one_for_one,
intensity => 100,
period => 10
},
AssemblerSup = #{
id => emqx_ft_assembler_sup,
start => {emqx_ft_assembler_sup, start_link, []},
restart => permanent,
shutdown => infinity,
type => supervisor,
modules => [emqx_ft_assembler_sup]
},
FileReaderSup = #{
id => emqx_ft_storage_fs_reader_sup,
start => {emqx_ft_storage_fs_reader_sup, start_link, []},
restart => permanent,
shutdown => infinity,
type => supervisor,
modules => [emqx_ft_storage_fs_reader_sup]
},
Responder = #{
id => emqx_ft_responder_sup,
start => {emqx_ft_responder_sup, start_link, []},
restart => permanent,
shutdown => infinity,
type => worker,
modules => [emqx_ft_responder_sup]
},
ChildSpecs = [Responder, AssemblerSup, FileReaderSup],
{ok, {SupFlags, ChildSpecs}}.

View File

@ -0,0 +1,54 @@
%%--------------------------------------------------------------------
%% Copyright (c) 2023 EMQ Technologies Co., Ltd. All Rights Reserved.
%%
%% Licensed under the Apache License, Version 2.0 (the "License");
%% you may not use this file except in compliance with the License.
%% You may obtain a copy of the License at
%%
%% http://www.apache.org/licenses/LICENSE-2.0
%%
%% Unless required by applicable law or agreed to in writing, software
%% distributed under the License is distributed on an "AS IS" BASIS,
%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
%% See the License for the specific language governing permissions and
%% limitations under the License.
%%--------------------------------------------------------------------
-module(emqx_ft_storage_exporter_fs_proto_v1).
-behaviour(emqx_bpapi).
-export([introduced_in/0]).
-export([list_exports/2]).
-export([read_export_file/3]).
-include_lib("emqx/include/bpapi.hrl").
introduced_in() ->
"5.0.17".
-spec list_exports([node()], emqx_ft_storage:query(_LocalCursor)) ->
emqx_rpc:erpc_multicall(
{ok, [emqx_ft_storage:file_info()]}
| {error, file:posix() | disabled | {invalid_storage_type, _}}
).
list_exports(Nodes, Query) ->
erpc:multicall(
Nodes,
emqx_ft_storage_exporter_fs_proxy,
list_exports_local,
[Query]
).
-spec read_export_file(node(), file:name(), pid()) ->
{ok, emqx_ft_storage:reader()}
| {error, term()}
| no_return().
read_export_file(Node, Filepath, CallerPid) ->
erpc:call(
Node,
emqx_ft_storage_exporter_fs_proxy,
read_export_file_local,
[Filepath, CallerPid]
).

View File

@ -0,0 +1,49 @@
%%--------------------------------------------------------------------
%% Copyright (c) 2023 EMQ Technologies Co., Ltd. All Rights Reserved.
%%
%% Licensed under the Apache License, Version 2.0 (the "License");
%% you may not use this file except in compliance with the License.
%% You may obtain a copy of the License at
%%
%% http://www.apache.org/licenses/LICENSE-2.0
%%
%% Unless required by applicable law or agreed to in writing, software
%% distributed under the License is distributed on an "AS IS" BASIS,
%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
%% See the License for the specific language governing permissions and
%% limitations under the License.
%%--------------------------------------------------------------------
-module(emqx_ft_storage_fs_proto_v1).
-behaviour(emqx_bpapi).
-export([introduced_in/0]).
-export([multilist/3]).
-export([pread/5]).
-export([list_assemblers/2]).
-type offset() :: emqx_ft:offset().
-type transfer() :: emqx_ft:transfer().
-type filefrag() :: emqx_ft_storage_fs:filefrag().
-include_lib("emqx/include/bpapi.hrl").
introduced_in() ->
"5.0.17".
-spec multilist([node()], transfer(), fragment | result) ->
emqx_rpc:erpc_multicall({ok, [filefrag()]} | {error, term()}).
multilist(Nodes, Transfer, What) ->
erpc:multicall(Nodes, emqx_ft_storage_fs_proxy, list_local, [Transfer, What]).
-spec pread(node(), transfer(), filefrag(), offset(), _Size :: non_neg_integer()) ->
{ok, [filefrag()]} | {error, term()} | no_return().
pread(Node, Transfer, Frag, Offset, Size) ->
erpc:call(Node, emqx_ft_storage_fs_proxy, pread_local, [Transfer, Frag, Offset, Size]).
-spec list_assemblers([node()], transfer()) ->
emqx_rpc:erpc_multicall([pid()]).
list_assemblers(Nodes, Transfer) ->
erpc:multicall(Nodes, emqx_ft_storage_fs_proxy, lookup_local_assembler, [Transfer]).

View File

@ -0,0 +1,35 @@
%%--------------------------------------------------------------------
%% Copyright (c) 2023 EMQ Technologies Co., Ltd. All Rights Reserved.
%%
%% Licensed under the Apache License, Version 2.0 (the "License");
%% you may not use this file except in compliance with the License.
%% You may obtain a copy of the License at
%%
%% http://www.apache.org/licenses/LICENSE-2.0
%%
%% Unless required by applicable law or agreed to in writing, software
%% distributed under the License is distributed on an "AS IS" BASIS,
%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
%% See the License for the specific language governing permissions and
%% limitations under the License.
%%--------------------------------------------------------------------
-module(emqx_ft_storage_fs_reader_proto_v1).
-behaviour(emqx_bpapi).
-export([introduced_in/0]).
-export([read/3]).
-include_lib("emqx/include/bpapi.hrl").
introduced_in() ->
"5.0.17".
-spec read(node(), pid(), pos_integer()) ->
{ok, binary()} | eof | {error, term()} | no_return().
read(Node, Pid, Bytes) when
is_atom(Node) andalso is_pid(Pid) andalso is_integer(Bytes) andalso Bytes > 0
->
emqx_rpc:call(Node, emqx_ft_storage_fs_reader, read, [Pid, Bytes]).

View File

@ -0,0 +1,782 @@
%%--------------------------------------------------------------------
%% Copyright (c) 2023 EMQ Technologies Co., Ltd. All Rights Reserved.
%%
%% Licensed under the Apache License, Version 2.0 (the "License");
%% you may not use this file except in compliance with the License.
%% You may obtain a copy of the License at
%%
%% http://www.apache.org/licenses/LICENSE-2.0
%%
%% Unless required by applicable law or agreed to in writing, software
%% distributed under the License is distributed on an "AS IS" BASIS,
%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
%% See the License for the specific language governing permissions and
%% limitations under the License.
%%--------------------------------------------------------------------
-module(emqx_ft_SUITE).
-compile(export_all).
-compile(nowarn_export_all).
-include_lib("common_test/include/ct.hrl").
-include_lib("stdlib/include/assert.hrl").
-define(assertRCName(RCName, PublishRes),
?assertMatch(
{ok, #{reason_code_name := RCName}},
PublishRes
)
).
all() ->
[
{group, single_node},
{group, cluster}
].
groups() ->
[
{single_node, [parallel], [
t_assemble_crash,
t_corrupted_segment_retry,
t_invalid_checksum,
t_invalid_fileid,
t_invalid_filename,
t_invalid_meta,
t_invalid_topic_format,
t_meta_conflict,
t_nasty_clientids_fileids,
t_no_meta,
t_no_segment,
t_simple_transfer
]},
{cluster, [], [
t_switch_node,
t_unreliable_migrating_client,
{g_concurrent_fins, [{repeat_until_any_fail, 8}], [
t_concurrent_fins
]}
]}
].
init_per_suite(Config) ->
ok = emqx_common_test_helpers:start_apps([emqx_ft], set_special_configs(Config)),
Config.
end_per_suite(_Config) ->
ok = emqx_common_test_helpers:stop_apps([emqx_ft]),
ok.
set_special_configs(Config) ->
fun
(emqx_ft) ->
% NOTE
% Inhibit local fs GC to simulate it isn't fast enough to collect
% complete transfers.
Storage = emqx_utils_maps:deep_merge(
emqx_ft_test_helpers:local_storage(Config),
#{<<"local">> => #{<<"segments">> => #{<<"gc">> => #{<<"interval">> => 0}}}}
),
emqx_ft_test_helpers:load_config(#{
<<"enable">> => true,
<<"storage">> => Storage
});
(_) ->
ok
end.
init_per_testcase(Case, Config) ->
ClientId = atom_to_binary(Case),
case ?config(group, Config) of
cluster ->
[{clientid, ClientId} | Config];
_ ->
{ok, C} = emqtt:start_link([{proto_ver, v5}, {clientid, ClientId}]),
{ok, _} = emqtt:connect(C),
[{client, C}, {clientid, ClientId} | Config]
end.
end_per_testcase(_Case, Config) ->
_ = [ok = emqtt:stop(C) || {client, C} <- Config],
ok.
init_per_group(Group = cluster, Config) ->
Cluster = mk_cluster_specs(Config),
ct:pal("Starting ~p", [Cluster]),
Nodes = [
emqx_common_test_helpers:start_slave(Name, Opts#{join_to => node()})
|| {Name, Opts} <- Cluster
],
[{group, Group}, {cluster_nodes, Nodes} | Config];
init_per_group(Group, Config) ->
[{group, Group} | Config].
end_per_group(cluster, Config) ->
ok = lists:foreach(
fun emqx_ft_test_helpers:stop_additional_node/1,
?config(cluster_nodes, Config)
);
end_per_group(_Group, _Config) ->
ok.
mk_cluster_specs(Config) ->
Specs = [
{core, emqx_ft_SUITE1, #{listener_ports => [{tcp, 2883}]}},
{core, emqx_ft_SUITE2, #{listener_ports => [{tcp, 3883}]}}
],
CommOpts = [
{env, [{emqx, boot_modules, [broker, listeners]}]},
{apps, [emqx_ft]},
{conf, [{[listeners, Proto, default, enabled], false} || Proto <- [ssl, ws, wss]]},
{env_handler, set_special_configs(Config)}
],
emqx_common_test_helpers:emqx_cluster(
Specs,
CommOpts
).
%%--------------------------------------------------------------------
%% Tests
%%--------------------------------------------------------------------
t_invalid_topic_format(Config) ->
C = ?config(client, Config),
?assertRCName(
unspecified_error,
emqtt:publish(C, <<"$file/fileid">>, <<>>, 1)
),
?assertRCName(
unspecified_error,
emqtt:publish(C, <<"$file/fileid/">>, <<>>, 1)
),
?assertRCName(
unspecified_error,
emqtt:publish(C, <<"$file/fileid/offset">>, <<>>, 1)
),
?assertRCName(
unspecified_error,
emqtt:publish(C, <<"$file/fileid/fin/offset">>, <<>>, 1)
),
?assertRCName(
unspecified_error,
emqtt:publish(C, <<"$file/">>, <<>>, 1)
),
?assertRCName(
unspecified_error,
emqtt:publish(C, <<"$file/X/Y/Z">>, <<>>, 1)
),
%% should not be handled by `emqx_ft`
?assertRCName(
no_matching_subscribers,
emqtt:publish(C, <<"$file">>, <<>>, 1)
).
t_invalid_fileid(Config) ->
C = ?config(client, Config),
?assertRCName(
unspecified_error,
emqtt:publish(C, <<"$file//init">>, <<>>, 1)
).
t_invalid_filename(Config) ->
C = ?config(client, Config),
?assertRCName(
unspecified_error,
emqtt:publish(C, mk_init_topic(<<"f1">>), encode_meta(meta(".", <<>>)), 1)
),
?assertRCName(
unspecified_error,
emqtt:publish(C, mk_init_topic(<<"f2">>), encode_meta(meta("..", <<>>)), 1)
),
?assertRCName(
unspecified_error,
emqtt:publish(C, mk_init_topic(<<"f2">>), encode_meta(meta("../nice", <<>>)), 1)
),
?assertRCName(
unspecified_error,
emqtt:publish(C, mk_init_topic(<<"f3">>), encode_meta(meta("/etc/passwd", <<>>)), 1)
),
?assertRCName(
unspecified_error,
emqtt:publish(
C,
mk_init_topic(<<"f4">>),
encode_meta(meta(lists:duplicate(1000, $A), <<>>)),
1
)
),
?assertRCName(
success,
emqtt:publish(C, mk_init_topic(<<"f5">>), encode_meta(meta("146%", <<>>)), 1)
).
t_simple_transfer(Config) ->
C = ?config(client, Config),
Filename = "topsecret.pdf",
FileId = <<"f1">>,
Data = [<<"first">>, <<"second">>, <<"third">>],
Meta = #{size := Filesize} = meta(Filename, Data),
?assertRCName(
success,
emqtt:publish(C, mk_init_topic(FileId), encode_meta(Meta), 1)
),
lists:foreach(
fun({Chunk, Offset}) ->
?assertRCName(
success,
emqtt:publish(C, mk_segment_topic(FileId, Offset), Chunk, 1)
)
end,
with_offsets(Data)
),
?assertRCName(
success,
emqtt:publish(C, mk_fin_topic(FileId, Filesize), <<>>, 1)
),
[Export] = list_files(?config(clientid, Config)),
?assertEqual(
{ok, iolist_to_binary(Data)},
read_export(Export)
).
t_nasty_clientids_fileids(_Config) ->
Transfers = [
{<<".">>, <<".">>},
{<<"🌚"/utf8>>, <<"🌝"/utf8>>},
{<<"../..">>, <<"😤"/utf8>>},
{<<"/etc/passwd">>, <<"whitehat">>},
{<<"; rm -rf / ;">>, <<"whitehat">>}
],
ok = lists:foreach(
fun({ClientId, FileId}) ->
ok = emqx_ft_test_helpers:upload_file(ClientId, FileId, "justfile", ClientId),
[Export] = list_files(ClientId),
?assertEqual({ok, ClientId}, read_export(Export))
end,
Transfers
).
t_meta_conflict(Config) ->
C = ?config(client, Config),
Filename = "topsecret.pdf",
FileId = <<"f1">>,
Meta = meta(Filename, [<<"x">>]),
?assertRCName(
success,
emqtt:publish(C, mk_init_topic(FileId), encode_meta(Meta), 1)
),
ConflictMeta = Meta#{name => "conflict.pdf"},
?assertRCName(
unspecified_error,
emqtt:publish(C, mk_init_topic(FileId), encode_meta(ConflictMeta), 1)
).
t_no_meta(Config) ->
C = ?config(client, Config),
FileId = <<"f1">>,
Data = <<"first">>,
?assertRCName(
success,
emqtt:publish(C, mk_segment_topic(FileId, 0), Data, 1)
),
?assertRCName(
unspecified_error,
emqtt:publish(C, mk_fin_topic(FileId, 42), <<>>, 1)
).
t_no_segment(Config) ->
C = ?config(client, Config),
Filename = "topsecret.pdf",
FileId = <<"f1">>,
Data = [<<"first">>, <<"second">>, <<"third">>],
Meta = #{size := Filesize} = meta(Filename, Data),
?assertRCName(
success,
emqtt:publish(C, mk_init_topic(FileId), encode_meta(Meta), 1)
),
lists:foreach(
fun({Chunk, Offset}) ->
?assertRCName(
success,
emqtt:publish(C, mk_segment_topic(FileId, Offset), Chunk, 1)
)
end,
%% Skip the first segment
tl(with_offsets(Data))
),
?assertRCName(
unspecified_error,
emqtt:publish(C, mk_fin_topic(FileId, Filesize), <<>>, 1)
).
t_invalid_meta(Config) ->
C = ?config(client, Config),
FileId = <<"f1">>,
%% Invalid schema
Meta = #{foo => <<"bar">>},
MetaPayload = emqx_utils_json:encode(Meta),
?assertRCName(
unspecified_error,
emqtt:publish(C, mk_init_topic(FileId), MetaPayload, 1)
),
%% Invalid JSON
?assertRCName(
unspecified_error,
emqtt:publish(C, mk_init_topic(FileId), <<"{oops;">>, 1)
).
t_invalid_checksum(Config) ->
C = ?config(client, Config),
Filename = "topsecret.pdf",
FileId = <<"f1">>,
Data = [<<"first">>, <<"second">>, <<"third">>],
Meta = #{size := Filesize} = meta(Filename, Data),
MetaPayload = encode_meta(Meta#{checksum => {sha256, sha256(<<"invalid">>)}}),
?assertRCName(
success,
emqtt:publish(C, mk_init_topic(FileId), MetaPayload, 1)
),
lists:foreach(
fun({Chunk, Offset}) ->
?assertRCName(
success,
emqtt:publish(C, mk_segment_topic(FileId, Offset), Chunk, 1)
)
end,
with_offsets(Data)
),
?assertRCName(
unspecified_error,
emqtt:publish(C, mk_fin_topic(FileId, Filesize), <<>>, 1)
).
t_corrupted_segment_retry(Config) ->
C = ?config(client, Config),
Filename = "corruption.pdf",
FileId = <<"4242-4242">>,
Data = [<<"first">>, <<"second">>, <<"third">>],
[
{Seg1, Offset1},
{Seg2, Offset2},
{Seg3, Offset3}
] = with_offsets(Data),
[
Checksum1,
Checksum2,
Checksum3
] = [binary:encode_hex(sha256(S)) || S <- Data],
Meta = #{size := Filesize} = meta(Filename, Data),
?assertRCName(success, emqtt:publish(C, mk_init_topic(FileId), encode_meta(Meta), 1)),
?assertRCName(
success,
emqtt:publish(C, mk_segment_topic(FileId, Offset1, Checksum1), Seg1, 1)
),
% segment is corrupted
?assertRCName(
unspecified_error,
emqtt:publish(C, mk_segment_topic(FileId, Offset2, Checksum2), <<Seg2/binary, 42>>, 1)
),
% retry
?assertRCName(
success,
emqtt:publish(C, mk_segment_topic(FileId, Offset2, Checksum2), Seg2, 1)
),
?assertRCName(
success,
emqtt:publish(C, mk_segment_topic(FileId, Offset3, Checksum3), Seg3, 1)
),
?assertRCName(
success,
emqtt:publish(C, mk_fin_topic(FileId, Filesize), <<>>, 1)
).
t_switch_node(Config) ->
[Node | _] = ?config(cluster_nodes, Config),
AdditionalNodePort = emqx_ft_test_helpers:tcp_port(Node),
ClientId = <<"t_switch_node-migrating_client">>,
{ok, C1} = emqtt:start_link([{proto_ver, v5}, {clientid, ClientId}, {port, AdditionalNodePort}]),
{ok, _} = emqtt:connect(C1),
Filename = "multinode_upload.txt",
FileId = <<"f1">>,
Data = [<<"first">>, <<"second">>, <<"third">>],
[{Data0, Offset0}, {Data1, Offset1}, {Data2, Offset2}] = with_offsets(Data),
%% First, publist metadata and the first segment to the additional node
Meta = #{size := Filesize} = meta(Filename, Data),
?assertRCName(
success,
emqtt:publish(C1, mk_init_topic(FileId), encode_meta(Meta), 1)
),
?assertRCName(
success,
emqtt:publish(C1, mk_segment_topic(FileId, Offset0), Data0, 1)
),
%% Then, switch the client to the main node
%% and publish the rest of the segments
ok = emqtt:stop(C1),
{ok, C2} = emqtt:start_link([{proto_ver, v5}, {clientid, ClientId}]),
{ok, _} = emqtt:connect(C2),
?assertRCName(
success,
emqtt:publish(C2, mk_segment_topic(FileId, Offset1), Data1, 1)
),
?assertRCName(
success,
emqtt:publish(C2, mk_segment_topic(FileId, Offset2), Data2, 1)
),
?assertRCName(
success,
emqtt:publish(C2, mk_fin_topic(FileId, Filesize), <<>>, 1)
),
ok = emqtt:stop(C2),
%% Now check consistency of the file
[Export] = list_files(ClientId),
?assertEqual(
{ok, iolist_to_binary(Data)},
read_export(Export)
).
t_assemble_crash(Config) ->
C = ?config(client, Config),
meck:new(emqx_ft_storage_fs),
meck:expect(emqx_ft_storage_fs, assemble, fun(_, _, _) -> meck:exception(error, oops) end),
?assertRCName(
unspecified_error,
emqtt:publish(C, <<"$file/someid/fin">>, <<>>, 1)
).
t_unreliable_migrating_client(Config) ->
NodeSelf = node(),
[Node1, Node2] = ?config(cluster_nodes, Config),
ClientId = ?config(clientid, Config),
FileId = emqx_guid:to_hexstr(emqx_guid:gen()),
Filename = "migratory-birds-in-southern-hemisphere-2013.pdf",
Filesize = 1000,
Gen = emqx_ft_content_gen:new({{ClientId, FileId}, Filesize}, 16),
Payload = iolist_to_binary(emqx_ft_content_gen:consume(Gen, fun({Chunk, _, _}) -> Chunk end)),
Meta = meta(Filename, Payload),
Context = #{
clientid => ClientId,
fileid => FileId,
filesize => Filesize,
payload => Payload
},
Commands = [
% Connect to the broker on the current node
{fun connect_mqtt_client/2, [NodeSelf]},
% Send filemeta and 3 initial segments
% (assuming client chose 100 bytes as a desired segment size)
{fun send_filemeta/2, [Meta]},
{fun send_segment/3, [0, 100]},
{fun send_segment/3, [100, 100]},
{fun send_segment/3, [200, 100]},
% Disconnect the client cleanly
{fun stop_mqtt_client/1, []},
% Connect to the broker on `Node1`
{fun connect_mqtt_client/2, [Node1]},
% Connect to the broker on `Node2` without first disconnecting from `Node1`
% Client forgot the state for some reason and started the transfer again.
% (assuming this is usual for a client on a device that was rebooted)
{fun connect_mqtt_client/2, [Node2]},
{fun send_filemeta/2, [Meta]},
% This time it chose 200 bytes as a segment size
{fun send_segment/3, [0, 200]},
{fun send_segment/3, [200, 200]},
% But now it downscaled back to 100 bytes segments
{fun send_segment/3, [400, 100]},
% Client lost connectivity and reconnected
% (also had last few segments unacked and decided to resend them)
{fun connect_mqtt_client/2, [Node2]},
{fun send_segment/3, [200, 200]},
{fun send_segment/3, [400, 200]},
% Client lost connectivity and reconnected, this time to another node
% (also had last segment unacked and decided to resend it)
{fun connect_mqtt_client/2, [Node1]},
{fun send_segment/3, [400, 200]},
{fun send_segment/3, [600, eof]},
{fun send_finish/1, []},
% Client lost connectivity and reconnected, this time to the current node
% (client had `fin` unacked and decided to resend it)
{fun connect_mqtt_client/2, [NodeSelf]},
{fun send_finish/1, []}
],
_Context = run_commands(Commands, Context),
Exports = list_files(?config(clientid, Config)),
Node1Str = atom_to_list(Node1),
% TODO: this testcase is specific to local fs storage backend
?assertMatch(
[#{"node" := Node1Str}],
fs_exported_file_attributes(Exports)
),
[
?assertEqual({ok, Payload}, read_export(Export))
|| Export <- Exports
].
t_concurrent_fins(Config) ->
ct:timetrap({seconds, 10}),
NodeSelf = node(),
[Node1, Node2] = ?config(cluster_nodes, Config),
ClientId = iolist_to_binary([
?config(clientid, Config),
integer_to_list(erlang:unique_integer())
]),
FileId = emqx_guid:to_hexstr(emqx_guid:gen()),
Filename = "migratory-birds-in-southern-hemisphere-2013.pdf",
Filesize = 100,
Gen = emqx_ft_content_gen:new({{ClientId, FileId}, Filesize}, 16),
Payload = iolist_to_binary(emqx_ft_content_gen:consume(Gen, fun({Chunk, _, _}) -> Chunk end)),
Meta = meta(Filename, Payload),
%% Send filemeta and segments to Node1
Context0 = #{
clientid => ClientId,
fileid => FileId,
filesize => Filesize,
payload => Payload
},
Context1 = run_commands(
[
{fun connect_mqtt_client/2, [Node1]},
{fun send_filemeta/2, [Meta]},
{fun send_segment/3, [0, 100]},
{fun stop_mqtt_client/1, []}
],
Context0
),
%% Now send fins concurrently to the 3 nodes
Nodes = [Node1, Node2, NodeSelf],
SendFin = fun(Node) ->
run_commands(
[
{fun connect_mqtt_client/2, [Node]},
{fun send_finish/1, []}
],
Context1
)
end,
PidMons = lists:map(
fun(Node) ->
erlang:spawn_monitor(fun F() ->
_ = erlang:process_flag(trap_exit, true),
try
SendFin(Node)
catch
C:E ->
% NOTE: random delay to avoid livelock conditions
ct:pal("Node ~p did not send finish successfully: ~p:~p", [Node, C, E]),
ok = timer:sleep(rand:uniform(10)),
F()
end
end)
end,
Nodes
),
ok = lists:foreach(
fun({Pid, MRef}) ->
receive
{'DOWN', MRef, process, Pid, normal} -> ok
end
end,
PidMons
),
%% Only one node should have the file
Exports = list_files(ClientId),
case fs_exported_file_attributes(Exports) of
[#{"node" := _Node}] ->
ok;
[#{"node" := _Node} | _] = Files ->
% ...But we can't really guarantee that
ct:comment({multiple_files_on_different_nodes, Files})
end.
%%------------------------------------------------------------------------------
%% Command helpers
%%------------------------------------------------------------------------------
%% Command runners
run_commands(Commands, Context) ->
lists:foldl(fun run_command/2, Context, Commands).
run_command({Command, Args}, Context) ->
ct:pal("COMMAND ~p ~p", [erlang:fun_info(Command, name), Args]),
erlang:apply(Command, Args ++ [Context]).
%% Commands
connect_mqtt_client(Node, ContextIn) ->
Context = #{clientid := ClientId} = disown_mqtt_client(ContextIn),
NodePort = emqx_ft_test_helpers:tcp_port(Node),
{ok, Client} = emqtt:start_link([{proto_ver, v5}, {clientid, ClientId}, {port, NodePort}]),
{ok, _} = emqtt:connect(Client),
Context#{client => Client}.
stop_mqtt_client(Context = #{client := Client}) ->
_ = emqtt:stop(Client),
maps:remove(client, Context).
disown_mqtt_client(Context = #{client := Client}) ->
_ = erlang:unlink(Client),
maps:remove(client, Context);
disown_mqtt_client(Context = #{}) ->
Context.
send_filemeta(Meta, Context = #{client := Client, fileid := FileId}) ->
?assertRCName(
success,
emqtt:publish(Client, mk_init_topic(FileId), encode_meta(Meta), 1)
),
Context.
send_segment(Offset, Size, Context = #{client := Client, fileid := FileId, payload := Payload}) ->
Data =
case Size of
eof ->
binary:part(Payload, Offset, byte_size(Payload) - Offset);
N ->
binary:part(Payload, Offset, N)
end,
?assertRCName(
success,
emqtt:publish(Client, mk_segment_topic(FileId, Offset), Data, 1)
),
Context.
send_finish(Context = #{client := Client, fileid := FileId, filesize := Filesize}) ->
?assertRCName(
success,
emqtt:publish(Client, mk_fin_topic(FileId, Filesize), <<>>, 1)
),
Context.
%%------------------------------------------------------------------------------
%% Helpers
%%------------------------------------------------------------------------------
fs_exported_file_attributes(FSExports) ->
lists:map(
fun(#{uri := URIString}) ->
#{query := QS} = uri_string:parse(URIString),
maps:from_list(uri_string:dissect_query(QS))
end,
lists:sort(FSExports)
).
mk_init_topic(FileId) ->
<<"$file/", FileId/binary, "/init">>.
mk_segment_topic(FileId, Offset) when is_integer(Offset) ->
mk_segment_topic(FileId, integer_to_binary(Offset));
mk_segment_topic(FileId, Offset) when is_binary(Offset) ->
<<"$file/", FileId/binary, "/", Offset/binary>>.
mk_segment_topic(FileId, Offset, Checksum) when is_integer(Offset) ->
mk_segment_topic(FileId, integer_to_binary(Offset), Checksum);
mk_segment_topic(FileId, Offset, Checksum) when is_binary(Offset) ->
<<"$file/", FileId/binary, "/", Offset/binary, "/", Checksum/binary>>.
mk_fin_topic(FileId, Size) when is_integer(Size) ->
mk_fin_topic(FileId, integer_to_binary(Size));
mk_fin_topic(FileId, Size) when is_binary(Size) ->
<<"$file/", FileId/binary, "/fin/", Size/binary>>.
with_offsets(Items) ->
{List, _} = lists:mapfoldl(
fun(Item, Offset) ->
{{Item, integer_to_binary(Offset)}, Offset + byte_size(Item)}
end,
0,
Items
),
List.
sha256(Data) ->
crypto:hash(sha256, Data).
meta(FileName, Data) ->
FullData = iolist_to_binary(Data),
#{
name => FileName,
checksum => {sha256, sha256(FullData)},
expire_at => erlang:system_time(_Unit = second) + 3600,
size => byte_size(FullData)
}.
encode_meta(Meta) ->
emqx_utils_json:encode(emqx_ft:encode_filemeta(Meta)).
list_files(ClientId) ->
{ok, #{items := Files}} = emqx_ft_storage:files(),
[File || File = #{transfer := {CId, _}} <- Files, CId == ClientId].
read_export(#{path := AbsFilepath}) ->
% TODO: only works for the local filesystem exporter right now
file:read_file(AbsFilepath).

View File

@ -0,0 +1,304 @@
%%--------------------------------------------------------------------
%% Copyright (c) 2020-2023 EMQ Technologies Co., Ltd. All Rights Reserved.
%%
%% Licensed under the Apache License, Version 2.0 (the "License");
%% you may not use this file except in compliance with the License.
%% You may obtain a copy of the License at
%%
%% http://www.apache.org/licenses/LICENSE-2.0
%%
%% Unless required by applicable law or agreed to in writing, software
%% distributed under the License is distributed on an "AS IS" BASIS,
%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
%% See the License for the specific language governing permissions and
%% limitations under the License.
%%--------------------------------------------------------------------
-module(emqx_ft_api_SUITE).
-compile(export_all).
-compile(nowarn_export_all).
-include_lib("common_test/include/ct.hrl").
-include_lib("stdlib/include/assert.hrl").
-import(emqx_dashboard_api_test_helpers, [host/0, uri/1]).
all() ->
[
{group, single},
{group, cluster}
].
groups() ->
[
{single, [], emqx_common_test_helpers:all(?MODULE)},
{cluster, [], emqx_common_test_helpers:all(?MODULE)}
].
init_per_suite(Config) ->
ok = emqx_mgmt_api_test_util:init_suite(
[emqx_conf, emqx_ft], emqx_ft_test_helpers:env_handler(Config)
),
{ok, _} = emqx:update_config([rpc, port_discovery], manual),
Config.
end_per_suite(_Config) ->
ok = emqx_mgmt_api_test_util:end_suite([emqx_ft, emqx_conf]),
ok.
init_per_group(Group = cluster, Config) ->
Cluster = mk_cluster_specs(Config),
ct:pal("Starting ~p", [Cluster]),
Nodes = [
emqx_common_test_helpers:start_slave(Name, Opts#{join_to => node()})
|| {Name, Opts} <- Cluster
],
[{group, Group}, {cluster_nodes, Nodes} | Config];
init_per_group(Group, Config) ->
[{group, Group} | Config].
end_per_group(cluster, Config) ->
ok = lists:foreach(
fun emqx_ft_test_helpers:stop_additional_node/1,
?config(cluster_nodes, Config)
);
end_per_group(_Group, _Config) ->
ok.
mk_cluster_specs(Config) ->
Specs = [
{core, emqx_ft_api_SUITE1, #{listener_ports => [{tcp, 2883}]}},
{core, emqx_ft_api_SUITE2, #{listener_ports => [{tcp, 3883}]}}
],
CommOpts = [
{env, [{emqx, boot_modules, [broker, listeners]}]},
{apps, [emqx_ft]},
{conf, [{[listeners, Proto, default, enabled], false} || Proto <- [ssl, ws, wss]]},
{env_handler, emqx_ft_test_helpers:env_handler(Config)}
],
emqx_common_test_helpers:emqx_cluster(
Specs,
CommOpts
).
init_per_testcase(Case, Config) ->
[{tc, Case} | Config].
end_per_testcase(t_ft_disabled, _Config) ->
emqx_config:put([file_transfer, enable], true);
end_per_testcase(_Case, _Config) ->
ok.
%%--------------------------------------------------------------------
%% Tests
%%--------------------------------------------------------------------
t_list_files(Config) ->
ClientId = client_id(Config),
FileId = <<"f1">>,
Node = lists:last(cluster(Config)),
ok = emqx_ft_test_helpers:upload_file(ClientId, FileId, "f1", <<"data">>, Node),
{ok, 200, #{<<"files">> := Files}} =
request_json(get, uri(["file_transfer", "files"])),
?assertMatch(
[#{<<"clientid">> := ClientId, <<"fileid">> := <<"f1">>}],
[File || File = #{<<"clientid">> := CId} <- Files, CId == ClientId]
),
{ok, 200, #{<<"files">> := FilesTransfer}} =
request_json(get, uri(["file_transfer", "files", ClientId, FileId])),
?assertMatch(
[#{<<"clientid">> := ClientId, <<"fileid">> := <<"f1">>}],
FilesTransfer
),
?assertMatch(
{ok, 404, #{<<"code">> := <<"FILES_NOT_FOUND">>}},
request_json(get, uri(["file_transfer", "files", ClientId, <<"no-such-file">>]))
).
t_download_transfer(Config) ->
ClientId = client_id(Config),
FileId = <<"f1">>,
Node = lists:last(cluster(Config)),
ok = emqx_ft_test_helpers:upload_file(ClientId, FileId, "f1", <<"data">>, Node),
?assertMatch(
{ok, 400, #{<<"code">> := <<"BAD_REQUEST">>}},
request_json(
get,
uri(["file_transfer", "file"]) ++ query(#{fileref => FileId})
)
),
?assertMatch(
{ok, 503, _},
request(
get,
uri(["file_transfer", "file"]) ++
query(#{
fileref => FileId,
node => <<"nonode@nohost">>
})
)
),
?assertMatch(
{ok, 404, _},
request(
get,
uri(["file_transfer", "file"]) ++
query(#{
fileref => <<"unknown_file">>,
node => node()
})
)
),
{ok, 200, #{<<"files">> := [File]}} =
request_json(get, uri(["file_transfer", "files", ClientId, FileId])),
{ok, 200, Response} = request(get, host() ++ maps:get(<<"uri">>, File)),
?assertEqual(
<<"data">>,
Response
).
t_list_files_paging(Config) ->
ClientId = client_id(Config),
NFiles = 20,
Nodes = cluster(Config),
Uploads = [
{mk_file_id("file:", N), mk_file_name(N), pick(N, Nodes)}
|| N <- lists:seq(1, NFiles)
],
ok = lists:foreach(
fun({FileId, Name, Node}) ->
ok = emqx_ft_test_helpers:upload_file(ClientId, FileId, Name, <<"data">>, Node)
end,
Uploads
),
?assertMatch(
{ok, 200, #{<<"files">> := [_, _, _], <<"cursor">> := _}},
request_json(get, uri(["file_transfer", "files"]) ++ query(#{limit => 3}))
),
{ok, 200, #{<<"files">> := Files}} =
request_json(get, uri(["file_transfer", "files"]) ++ query(#{limit => 100})),
?assert(length(Files) >= NFiles),
?assertNotMatch(
{ok, 200, #{<<"cursor">> := _}},
request_json(get, uri(["file_transfer", "files"]) ++ query(#{limit => 100}))
),
?assertMatch(
{ok, 400, #{<<"code">> := <<"BAD_REQUEST">>}},
request_json(get, uri(["file_transfer", "files"]) ++ query(#{limit => 0}))
),
?assertMatch(
{ok, 400, #{<<"code">> := <<"BAD_REQUEST">>}},
request_json(
get,
uri(["file_transfer", "files"]) ++ query(#{following => <<"whatsthat!?">>})
)
),
PageThrough = fun PageThrough(Query, Acc) ->
case request_json(get, uri(["file_transfer", "files"]) ++ query(Query)) of
{ok, 200, #{<<"files">> := FilesPage, <<"cursor">> := Cursor}} ->
PageThrough(Query#{following => Cursor}, Acc ++ FilesPage);
{ok, 200, #{<<"files">> := FilesPage}} ->
Acc ++ FilesPage
end
end,
?assertEqual(Files, PageThrough(#{limit => 1}, [])),
?assertEqual(Files, PageThrough(#{limit => 8}, [])),
?assertEqual(Files, PageThrough(#{limit => NFiles}, [])).
t_ft_disabled(_Config) ->
?assertMatch(
{ok, 200, _},
request_json(get, uri(["file_transfer", "files"]))
),
?assertMatch(
{ok, 400, _},
request_json(
get,
uri(["file_transfer", "file"]) ++ query(#{fileref => <<"f1">>})
)
),
ok = emqx_config:put([file_transfer, enable], false),
?assertMatch(
{ok, 503, _},
request_json(get, uri(["file_transfer", "files"]))
),
?assertMatch(
{ok, 503, _},
request_json(
get,
uri(["file_transfer", "file"]) ++ query(#{fileref => <<"f1">>, node => node()})
)
).
%%--------------------------------------------------------------------
%% Helpers
%%--------------------------------------------------------------------
cluster(Config) ->
[node() | proplists:get_value(cluster_nodes, Config, [])].
client_id(Config) ->
iolist_to_binary(io_lib:format("~s.~s", [?config(group, Config), ?config(tc, Config)])).
mk_file_id(Prefix, N) ->
iolist_to_binary([Prefix, integer_to_list(N)]).
mk_file_name(N) ->
"file." ++ integer_to_list(N).
request(Method, Url) ->
emqx_mgmt_api_test_util:request(Method, Url, []).
request_json(Method, Url) ->
case emqx_mgmt_api_test_util:request(Method, Url, []) of
{ok, Code, Body} ->
{ok, Code, json(Body)};
Otherwise ->
Otherwise
end.
json(Body) when is_binary(Body) ->
emqx_utils_json:decode(Body, [return_maps]).
query(Params) ->
KVs = lists:map(fun({K, V}) -> uri_encode(K) ++ "=" ++ uri_encode(V) end, maps:to_list(Params)),
"?" ++ string:join(KVs, "&").
uri_encode(T) ->
emqx_http_lib:uri_encode(to_list(T)).
to_list(A) when is_atom(A) ->
atom_to_list(A);
to_list(A) when is_integer(A) ->
integer_to_list(A);
to_list(B) when is_binary(B) ->
binary_to_list(B);
to_list(L) when is_list(L) ->
L.
pick(N, List) ->
lists:nth(1 + (N rem length(List)), List).

View File

@ -0,0 +1,265 @@
%%--------------------------------------------------------------------
%% Copyright (c) 2020-2023 EMQ Technologies Co., Ltd. All Rights Reserved.
%%
%% Licensed under the Apache License, Version 2.0 (the "License");
%% you may not use this file except in compliance with the License.
%% You may obtain a copy of the License at
%%
%% http://www.apache.org/licenses/LICENSE-2.0
%%
%% Unless required by applicable law or agreed to in writing, software
%% distributed under the License is distributed on an "AS IS" BASIS,
%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
%% See the License for the specific language governing permissions and
%% limitations under the License.
%%--------------------------------------------------------------------
-module(emqx_ft_assembler_SUITE).
-compile(export_all).
-compile(nowarn_export_all).
-include_lib("common_test/include/ct.hrl").
-include_lib("stdlib/include/assert.hrl").
-include_lib("kernel/include/file.hrl").
all() ->
[
t_assemble_empty_transfer,
t_assemble_complete_local_transfer,
t_assemble_incomplete_transfer,
t_assemble_no_meta,
% NOTE
% It depends on the side effects of all previous testcases.
t_list_transfers
].
init_per_suite(Config) ->
Apps = application:ensure_all_started(gproc),
[{suite_apps, Apps} | Config].
end_per_suite(_Config) ->
ok.
init_per_testcase(TC, Config) ->
ok = snabbkaffe:start_trace(),
{ok, Pid} = emqx_ft_assembler_sup:start_link(),
[
{storage_root, <<"file_transfer_root">>},
{exports_root, <<"file_transfer_exports">>},
{file_id, atom_to_binary(TC)},
{assembler_sup, Pid}
| Config
].
end_per_testcase(_TC, Config) ->
ok = inspect_storage_root(Config),
ok = gen:stop(?config(assembler_sup, Config)),
ok = snabbkaffe:stop(),
ok.
%%
-define(CLIENTID1, <<"thatsme">>).
-define(CLIENTID2, <<"thatsnotme">>).
t_assemble_empty_transfer(Config) ->
Storage = storage(Config),
Transfer = {?CLIENTID1, ?config(file_id, Config)},
Filename = "important.pdf",
Meta = #{
name => Filename,
size => 0,
expire_at => 42
},
ok = emqx_ft_storage_fs:store_filemeta(Storage, Transfer, Meta),
?assertMatch(
{ok, [
#{
path := _,
timestamp := {{_, _, _}, {_, _, _}},
fragment := {filemeta, Meta}
}
]},
emqx_ft_storage_fs:list(Storage, Transfer, fragment)
),
Status = complete_assemble(Storage, Transfer, 0),
?assertEqual({shutdown, ok}, Status),
{ok, [_Result = #{size := _Size = 0}]} = list_exports(Config, Transfer),
% ?assertEqual(
% {error, eof},
% emqx_ft_storage_fs:pread(Storage, Transfer, Result, 0, Size)
% ),
ok.
t_assemble_complete_local_transfer(Config) ->
Storage = storage(Config),
Transfer = {?CLIENTID2, ?config(file_id, Config)},
Filename = "topsecret.pdf",
TransferSize = 10000 + rand:uniform(50000),
SegmentSize = 4096,
Gen = emqx_ft_content_gen:new({Transfer, TransferSize}, SegmentSize),
Hash = emqx_ft_content_gen:hash(Gen, crypto:hash_init(sha256)),
Meta = #{
name => Filename,
checksum => {sha256, Hash},
expire_at => 42
},
ok = emqx_ft_storage_fs:store_filemeta(Storage, Transfer, Meta),
_ = emqx_ft_content_gen:consume(
Gen,
fun({Content, SegmentNum, _Meta}) ->
Offset = (SegmentNum - 1) * SegmentSize,
?assertEqual(
ok,
emqx_ft_storage_fs:store_segment(Storage, Transfer, {Offset, Content})
)
end
),
{ok, Fragments} = emqx_ft_storage_fs:list(Storage, Transfer, fragment),
?assertEqual((TransferSize div SegmentSize) + 1 + 1, length(Fragments)),
?assertEqual(
[Meta],
[FM || #{fragment := {filemeta, FM}} <- Fragments],
Fragments
),
Status = complete_assemble(Storage, Transfer, TransferSize),
?assertEqual({shutdown, ok}, Status),
?assertMatch(
{ok, [
#{
size := TransferSize,
meta := #{}
}
]},
list_exports(Config, Transfer)
),
{ok, [#{path := AssemblyFilename}]} = list_exports(Config, Transfer),
?assertMatch(
{ok, #file_info{type = regular, size = TransferSize}},
file:read_file_info(AssemblyFilename)
),
ok = emqx_ft_content_gen:check_file_consistency(
{Transfer, TransferSize},
100,
AssemblyFilename
).
t_assemble_incomplete_transfer(Config) ->
Storage = storage(Config),
Transfer = {?CLIENTID2, ?config(file_id, Config)},
Filename = "incomplete.pdf",
TransferSize = 10000 + rand:uniform(50000),
SegmentSize = 4096,
Gen = emqx_ft_content_gen:new({Transfer, TransferSize}, SegmentSize),
Hash = emqx_ft_content_gen:hash(Gen, crypto:hash_init(sha256)),
Meta = #{
name => Filename,
checksum => {sha256, Hash},
size => TransferSize,
expire_at => 42
},
ok = emqx_ft_storage_fs:store_filemeta(Storage, Transfer, Meta),
Status = complete_assemble(Storage, Transfer, TransferSize),
?assertMatch({shutdown, {error, _}}, Status).
t_assemble_no_meta(Config) ->
Storage = storage(Config),
Transfer = {?CLIENTID2, ?config(file_id, Config)},
Status = complete_assemble(Storage, Transfer, 42),
?assertMatch({shutdown, {error, {incomplete, _}}}, Status).
complete_assemble(Storage, Transfer, Size) ->
complete_assemble(Storage, Transfer, Size, 1000).
complete_assemble(Storage, Transfer, Size, Timeout) ->
{async, Pid} = emqx_ft_storage_fs:assemble(Storage, Transfer, Size),
MRef = erlang:monitor(process, Pid),
Pid ! kickoff,
receive
{'DOWN', MRef, process, Pid, Result} ->
Result
after Timeout ->
ct:fail("Assembler did not finish in time")
end.
%%
t_list_transfers(Config) ->
{ok, Exports} = list_exports(Config),
?assertMatch(
[
#{
transfer := {?CLIENTID2, <<"t_assemble_complete_local_transfer">>},
path := _,
size := Size,
meta := #{name := "topsecret.pdf"}
},
#{
transfer := {?CLIENTID1, <<"t_assemble_empty_transfer">>},
path := _,
size := 0,
meta := #{name := "important.pdf"}
}
] when Size > 0,
lists:sort(Exports)
).
%%
-include_lib("kernel/include/file.hrl").
inspect_storage_root(Config) ->
inspect_dir(?config(storage_root, Config)).
inspect_dir(Dir) ->
FileInfos = filelib:fold_files(
Dir,
".*",
true,
fun(Filename, Acc) -> orddict:store(Filename, inspect_file(Filename), Acc) end,
orddict:new()
),
ct:pal("inspect '~s': ~p", [Dir, FileInfos]).
inspect_file(Filename) ->
{ok, Info} = file:read_file_info(Filename),
{Info#file_info.type, Info#file_info.size, Info#file_info.mtime}.
mk_fileid() ->
integer_to_binary(erlang:system_time(millisecond)).
list_exports(Config) ->
{emqx_ft_storage_exporter_fs, Options} = exporter(Config),
emqx_ft_storage_exporter_fs:list_local(Options).
list_exports(Config, Transfer) ->
{emqx_ft_storage_exporter_fs, Options} = exporter(Config),
emqx_ft_storage_exporter_fs:list_local_transfer(Options, Transfer).
exporter(Config) ->
emqx_ft_storage_exporter:exporter(storage(Config)).
storage(Config) ->
emqx_utils_maps:deep_get(
[storage, local],
emqx_ft_schema:translate(#{
<<"storage">> => #{
<<"local">> => #{
<<"segments">> => #{
<<"root">> => ?config(storage_root, Config)
},
<<"exporter">> => #{
<<"local">> => #{
<<"root">> => ?config(exports_root, Config)
}
}
}
}
})
).

View File

@ -0,0 +1,249 @@
%%--------------------------------------------------------------------
%% Copyright (c) 2020-2023 EMQ Technologies Co., Ltd. All Rights Reserved.
%%
%% Licensed under the Apache License, Version 2.0 (the "License");
%% you may not use this file except in compliance with the License.
%% You may obtain a copy of the License at
%%
%% http://www.apache.org/licenses/LICENSE-2.0
%%
%% Unless required by applicable law or agreed to in writing, software
%% distributed under the License is distributed on an "AS IS" BASIS,
%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
%% See the License for the specific language governing permissions and
%% limitations under the License.
%%--------------------------------------------------------------------
-module(emqx_ft_conf_SUITE).
-compile(export_all).
-compile(nowarn_export_all).
-include_lib("common_test/include/ct.hrl").
-include_lib("stdlib/include/assert.hrl").
-include_lib("snabbkaffe/include/test_macros.hrl").
all() -> emqx_common_test_helpers:all(?MODULE).
init_per_suite(Config) ->
Config.
end_per_suite(_Config) ->
ok.
init_per_testcase(_Case, Config) ->
_ = emqx_config:save_schema_mod_and_names(emqx_ft_schema),
ok = emqx_common_test_helpers:start_apps(
[emqx_conf, emqx_ft], fun
(emqx_ft) ->
emqx_ft_test_helpers:load_config(#{});
(_) ->
ok
end
),
{ok, _} = emqx:update_config([rpc, port_discovery], manual),
Config.
end_per_testcase(_Case, _Config) ->
ok = emqx_common_test_helpers:stop_apps([emqx_ft, emqx_conf]),
ok = emqx_config:erase(file_transfer).
%%--------------------------------------------------------------------
%% Tests
%%--------------------------------------------------------------------
t_update_config(_Config) ->
?assertMatch(
{error, #{kind := validation_error}},
emqx_conf:update(
[file_transfer],
#{<<"storage">> => #{<<"unknown">> => #{<<"foo">> => 42}}},
#{}
)
),
?assertMatch(
{ok, _},
emqx_conf:update(
[file_transfer],
#{
<<"enable">> => true,
<<"storage">> => #{
<<"local">> => #{
<<"segments">> => #{
<<"root">> => <<"/tmp/path">>,
<<"gc">> => #{
<<"interval">> => <<"5m">>
}
},
<<"exporter">> => #{
<<"local">> => #{
<<"root">> => <<"/tmp/exports">>
}
}
}
}
},
#{}
)
),
?assertEqual(
<<"/tmp/path">>,
emqx_config:get([file_transfer, storage, local, segments, root])
),
?assertEqual(
5 * 60 * 1000,
emqx_ft_storage:with_storage_type(local, fun emqx_ft_conf:gc_interval/1)
),
?assertEqual(
{5 * 60, 24 * 60 * 60},
emqx_ft_storage:with_storage_type(local, fun emqx_ft_conf:segments_ttl/1)
).
t_disable_restore_config(Config) ->
?assertMatch(
{ok, _},
emqx_conf:update(
[file_transfer],
#{<<"enable">> => true, <<"storage">> => #{<<"local">> => #{}}},
#{}
)
),
?assertEqual(
60 * 60 * 1000,
emqx_ft_storage:with_storage_type(local, fun emqx_ft_conf:gc_interval/1)
),
% Verify that transfers work
ok = emqx_ft_test_helpers:upload_file(gen_clientid(), <<"f1">>, "f1", <<?MODULE_STRING>>),
% Verify that clearing storage settings reverts config to defaults
?assertMatch(
{ok, _},
emqx_conf:update(
[file_transfer],
#{<<"enable">> => false, <<"storage">> => undefined},
#{}
)
),
?assertEqual(
false,
emqx_ft_conf:enabled()
),
?assertMatch(
#{local := #{exporter := #{local := _}}},
emqx_ft_conf:storage()
),
ClientId = gen_clientid(),
Client = emqx_ft_test_helpers:start_client(ClientId),
% Verify that transfers fail cleanly when storage is disabled
?check_trace(
?assertMatch(
{ok, #{reason_code_name := no_matching_subscribers}},
emqtt:publish(
Client,
<<"$file/f2/init">>,
emqx_utils_json:encode(emqx_ft:encode_filemeta(#{name => "f2", size => 42})),
1
)
),
fun(Trace) ->
?assertMatch([], ?of_kind("file_transfer_init", Trace))
end
),
ok = emqtt:stop(Client),
% Restore local storage backend
Root = iolist_to_binary(emqx_ft_test_helpers:root(Config, node(), [segments])),
?assertMatch(
{ok, _},
emqx_conf:update(
[file_transfer],
#{
<<"enable">> => true,
<<"storage">> => #{
<<"local">> => #{
<<"segments">> => #{
<<"root">> => Root,
<<"gc">> => #{<<"interval">> => <<"1s">>}
}
}
}
},
#{}
)
),
% Verify that GC is getting triggered eventually
?check_trace(
?block_until(#{?snk_kind := garbage_collection}, 5000, 0),
fun(Trace) ->
?assertMatch(
[
#{
?snk_kind := garbage_collection,
storage := #{segments := #{root := Root}}
}
],
?of_kind(garbage_collection, Trace)
)
end
),
% Verify that transfers work again
ok = emqx_ft_test_helpers:upload_file(gen_clientid(), <<"f1">>, "f1", <<?MODULE_STRING>>).
t_switch_exporter(_Config) ->
?assertMatch(
{ok, _},
emqx_conf:update(
[file_transfer],
#{<<"enable">> => true},
#{}
)
),
?assertMatch(
#{local := #{exporter := #{local := _}}},
emqx_ft_conf:storage()
),
% Verify that switching to a different exporter works
?assertMatch(
{ok, _},
emqx_conf:update(
[file_transfer, storage, local, exporter],
#{
<<"s3">> => #{
<<"bucket">> => <<"emqx">>,
<<"host">> => <<"https://localhost">>,
<<"port">> => 9000,
<<"transport_options">> => #{
<<"ipv6_probe">> => false
}
}
},
#{}
)
),
?assertMatch(
#{local := #{exporter := #{s3 := _}}},
emqx_ft_conf:storage()
),
% Verify that switching back to local exporter works
?assertMatch(
{ok, _},
emqx_conf:remove(
[file_transfer, storage, local, exporter],
#{}
)
),
?assertMatch(
{ok, _},
emqx_conf:update(
[file_transfer, storage, local, exporter],
#{<<"local">> => #{}},
#{}
)
),
?assertMatch(
#{local := #{exporter := #{local := #{}}}},
emqx_ft_conf:storage()
),
% Verify that transfers work
ok = emqx_ft_test_helpers:upload_file(gen_clientid(), <<"f1">>, "f1", <<?MODULE_STRING>>).
gen_clientid() ->
emqx_base62:encode(emqx_guid:gen()).

View File

@ -0,0 +1,232 @@
%%--------------------------------------------------------------------
%% Copyright (c) 2020-2023 EMQ Technologies Co., Ltd. All Rights Reserved.
%%
%% Licensed under the Apache License, Version 2.0 (the "License");
%% you may not use this file except in compliance with the License.
%% You may obtain a copy of the License at
%%
%% http://www.apache.org/licenses/LICENSE-2.0
%%
%% Unless required by applicable law or agreed to in writing, software
%% distributed under the License is distributed on an "AS IS" BASIS,
%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
%% See the License for the specific language governing permissions and
%% limitations under the License.
%%--------------------------------------------------------------------
%% Inspired by
%% https://github.com/kafka4beam/kflow/blob/master/src/testbed/payload_gen.erl
-module(emqx_ft_content_gen).
-include_lib("eunit/include/eunit.hrl").
-dialyzer(no_improper_lists).
-export([new/2]).
-export([generate/3]).
-export([next/1]).
-export([consume/1]).
-export([consume/2]).
-export([fold/3]).
-export([hash/2]).
-export([check_file_consistency/3]).
-export_type([cont/1]).
-export_type([stream/1]).
-export_type([binary_payload/0]).
-define(hash_size, 16).
-type payload() :: {Seed :: term(), Size :: integer()}.
-type binary_payload() :: {
binary(), _ChunkNum :: non_neg_integer(), _Meta :: #{}
}.
-type cont(Data) ::
fun(() -> stream(Data))
| stream(Data).
-type stream(Data) ::
maybe_improper_list(Data, cont(Data))
| eos.
-record(chunk_state, {
seed :: term(),
payload_size :: non_neg_integer(),
offset :: non_neg_integer(),
chunk_size :: non_neg_integer()
}).
-type chunk_state() :: #chunk_state{}.
%% -----------------------------------------------------------------------------
%% Generic streams
%% -----------------------------------------------------------------------------
%% @doc Consume one element from the stream.
-spec next(cont(A)) -> stream(A).
next(Fun) when is_function(Fun, 0) ->
Fun();
next(L) ->
L.
%% @doc Consume all elements of the stream and feed them into a
%% callback (e.g. brod:produce)
-spec consume(cont(A), fun((A) -> Ret)) -> [Ret].
consume([Data | Cont], Callback) ->
[Callback(Data) | consume(next(Cont), Callback)];
consume(Cont, Callback) when is_function(Cont, 0) ->
consume(next(Cont), Callback);
consume(eos, _Callback) ->
[].
%% @equiv consume(Stream, fun(A) -> A end)
-spec consume(cont(A)) -> [A].
consume(Stream) ->
consume(Stream, fun(A) -> A end).
-spec fold(fun((A, Acc) -> Acc), Acc, cont(A)) -> Acc.
fold(Fun, Acc, [Data | Cont]) ->
fold(Fun, Fun(Data, Acc), next(Cont));
fold(Fun, Acc, Cont) when is_function(Cont, 0) ->
fold(Fun, Acc, next(Cont));
fold(_Fun, Acc, eos) ->
Acc.
%% -----------------------------------------------------------------------------
%% Binary streams
%% -----------------------------------------------------------------------------
%% @doc Stream of binary chunks.
%% Limitation: `ChunkSize' should be dividable by `?hash_size'
-spec new(payload(), integer()) -> cont(binary_payload()).
new({Seed, Size}, ChunkSize) when ChunkSize rem ?hash_size =:= 0 ->
fun() ->
generate_next_chunk(#chunk_state{
seed = Seed,
payload_size = Size,
chunk_size = ChunkSize,
offset = 0
})
end.
%% @doc Generate chunks of data and feed them into
%% `Callback'
-spec generate(payload(), integer(), fun((binary_payload()) -> A)) -> [A].
generate(Payload, ChunkSize, Callback) ->
consume(new(Payload, ChunkSize), Callback).
-spec hash(cont(binary_payload()), crypto:hash_state()) -> binary().
hash(Stream, HashCtxIn) ->
crypto:hash_final(
fold(
fun({Chunk, _, _}, HashCtx) ->
crypto:hash_update(HashCtx, Chunk)
end,
HashCtxIn,
Stream
)
).
-spec check_consistency(
payload(),
integer(),
fun((integer()) -> {ok, binary()} | undefined)
) -> ok.
check_consistency({Seed, Size}, SampleSize, Callback) ->
SeedHash = seed_hash(Seed),
Random = [rand:uniform(Size) - 1 || _ <- lists:seq(1, SampleSize)],
%% Always check first and last bytes, and one that should not exist:
Samples = [0, Size - 1, Size | Random],
lists:foreach(
fun
(N) when N < Size ->
Expected = do_get_byte(N, SeedHash),
?assertEqual(
{N, {ok, Expected}},
{N, Callback(N)}
);
(N) ->
?assertMatch(undefined, Callback(N))
end,
Samples
).
-spec check_file_consistency(
payload(),
integer(),
file:filename()
) -> ok.
check_file_consistency(Payload, SampleSize, FileName) ->
{ok, FD} = file:open(FileName, [read, raw]),
try
Fun = fun(N) ->
case file:pread(FD, [{N, 1}]) of
{ok, [[X]]} -> {ok, X};
{ok, [eof]} -> undefined
end
end,
check_consistency(Payload, SampleSize, Fun)
after
file:close(FD)
end.
%% =============================================================================
%% Internal functions
%% =============================================================================
%% @doc Continue generating chunks
-spec generate_next_chunk(chunk_state()) -> stream(binary()).
generate_next_chunk(#chunk_state{offset = Offset, payload_size = Size}) when Offset >= Size ->
eos;
generate_next_chunk(State0 = #chunk_state{offset = Offset, chunk_size = ChunkSize}) ->
State = State0#chunk_state{offset = Offset + ChunkSize},
Payload = generate_chunk(
State#chunk_state.seed,
Offset,
ChunkSize,
State#chunk_state.payload_size
),
[Payload | fun() -> generate_next_chunk(State) end].
generate_chunk(Seed, Offset, ChunkSize, Size) ->
SeedHash = seed_hash(Seed),
To = min(Offset + ChunkSize, Size) - 1,
Payload = iolist_to_binary([
generator_fun(I, SeedHash)
|| I <- lists:seq(Offset div 16, To div 16)
]),
ChunkNum = Offset div ChunkSize + 1,
Meta = #{
chunk_size => ChunkSize,
chunk_count => ceil(Size / ChunkSize)
},
Chunk =
case Offset + ChunkSize of
NextOffset when NextOffset > Size ->
binary:part(Payload, 0, Size rem ChunkSize);
_ ->
Payload
end,
{Chunk, ChunkNum, Meta}.
%% @doc First argument is a chunk number, the second one is a seed.
%% This implementation is hardly efficient, but it was chosen for
%% clarity reasons
-spec generator_fun(integer(), binary()) -> binary().
generator_fun(N, Seed) ->
crypto:hash(md5, <<N:32, Seed/binary>>).
%% @doc Hash any term
-spec seed_hash(term()) -> binary().
seed_hash(Seed) ->
crypto:hash(md5, term_to_binary(Seed)).
%% @private Get byte at offset `N'
-spec do_get_byte(integer(), binary()) -> byte().
do_get_byte(N, Seed) ->
Chunk = generator_fun(N div ?hash_size, Seed),
binary:at(Chunk, N rem ?hash_size).

View File

@ -0,0 +1,250 @@
%%--------------------------------------------------------------------
%% Copyright (c) 2020-2023 EMQ Technologies Co., Ltd. All Rights Reserved.
%%
%% Licensed under the Apache License, Version 2.0 (the "License");
%% you may not use this file except in compliance with the License.
%% You may obtain a copy of the License at
%%
%% http://www.apache.org/licenses/LICENSE-2.0
%%
%% Unless required by applicable law or agreed to in writing, software
%% distributed under the License is distributed on an "AS IS" BASIS,
%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
%% See the License for the specific language governing permissions and
%% limitations under the License.
%%--------------------------------------------------------------------
-module(emqx_ft_fs_util_SUITE).
-compile(export_all).
-compile(nowarn_export_all).
-include_lib("common_test/include/ct.hrl").
-include_lib("stdlib/include/assert.hrl").
-include_lib("kernel/include/file.hrl").
all() ->
emqx_common_test_helpers:all(?MODULE).
t_fold_single_level(Config) ->
Root = ?config(data_dir, Config),
?assertMatch(
[
{"a", #file_info{type = directory}, ["a"]},
{"c", #file_info{type = directory}, ["c"]},
{"d", #file_info{type = directory}, ["d"]}
],
sort(fold(fun cons/4, [], Root, ['*']))
).
t_fold_multi_level(Config) ->
Root = ?config(data_dir, Config),
?assertMatch(
[
{"a/b/foo/42", #file_info{type = regular}, ["42", "foo", "b", "a"]},
{"a/b/foo/Я", #file_info{type = regular}, ["Я", "foo", "b", "a"]},
{"d/e/baz/needle", #file_info{type = regular}, ["needle", "baz", "e", "d"]}
],
sort(fold(fun cons/4, [], Root, ['*', '*', '*', '*']))
),
?assertMatch(
[
{"a/b/foo", #file_info{type = directory}, ["foo", "b", "a"]},
{"c/bar/中文", #file_info{type = regular}, ["中文", "bar", "c"]},
{"d/e/baz", #file_info{type = directory}, ["baz", "e", "d"]}
],
sort(fold(fun cons/4, [], Root, ['*', '*', '*']))
).
t_fold_no_glob(Config) ->
Root = ?config(data_dir, Config),
?assertMatch(
[{"", #file_info{type = directory}, []}],
sort(fold(fun cons/4, [], Root, []))
).
t_fold_glob_too_deep(Config) ->
Root = ?config(data_dir, Config),
?assertMatch(
[],
sort(fold(fun cons/4, [], Root, ['*', '*', '*', '*', '*']))
).
t_fold_invalid_root(Config) ->
Root = ?config(data_dir, Config),
?assertMatch(
[],
sort(fold(fun cons/4, [], filename:join([Root, "a", "link"]), ['*']))
),
?assertMatch(
[],
sort(fold(fun cons/4, [], filename:join([Root, "d", "haystack"]), ['*']))
).
t_fold_filter_unicode(Config) ->
Root = ?config(data_dir, Config),
?assertMatch(
[
{"a/b/foo/42", #file_info{type = regular}, ["42", "foo", "b", "a"]},
{"d/e/baz/needle", #file_info{type = regular}, ["needle", "baz", "e", "d"]}
],
sort(fold(fun cons/4, [], Root, ['*', '*', '*', fun is_latin1/1]))
),
?assertMatch(
[
{"a/b/foo/Я", #file_info{type = regular}, ["Я", "foo", "b", "a"]}
],
sort(fold(fun cons/4, [], Root, ['*', '*', '*', is_not(fun is_latin1/1)]))
).
t_fold_filter_levels(Config) ->
Root = ?config(data_dir, Config),
?assertMatch(
[
{"a/b/foo", #file_info{type = directory}, ["foo", "b", "a"]},
{"d/e/baz", #file_info{type = directory}, ["baz", "e", "d"]}
],
sort(fold(fun cons/4, [], Root, [fun is_letter/1, fun is_letter/1, '*']))
).
t_fold_errors(Config) ->
Root = ?config(data_dir, Config),
ok = meck:new(emqx_ft_fs_util, [passthrough]),
ok = meck:expect(emqx_ft_fs_util, read_info, fun(AbsFilepath) ->
ct:pal("read_info(~p)", [AbsFilepath]),
Filename = filename:basename(AbsFilepath),
case Filename of
"b" -> {error, eacces};
"link" -> {error, enotsup};
"bar" -> {error, enotdir};
"needle" -> {error, ebusy};
_ -> meck:passthrough([AbsFilepath])
end
end),
?assertMatch(
[
{"a/b", {error, eacces}, ["b", "a"]},
{"a/link", {error, enotsup}, ["link", "a"]},
{"c/link", {error, enotsup}, ["link", "c"]},
{"d/e/baz/needle", {error, ebusy}, ["needle", "baz", "e", "d"]}
],
sort(fold(fun cons/4, [], Root, ['*', '*', '*', '*']))
).
t_seek_fold(Config) ->
Root = ?config(data_dir, Config),
?assertMatch(
[
{leaf, "a/b/foo/42", #file_info{type = regular}, ["42", "foo", "b", "a"]},
{leaf, "a/b/foo/Я", #file_info{type = regular}, ["Я", "foo", "b", "a"]},
{leaf, "d/e/baz/needle", #file_info{type = regular}, ["needle", "baz", "e", "d"]}
| _Nodes
],
sort(
emqx_ft_fs_iterator:fold(
fun cons/2,
[],
emqx_ft_fs_iterator:seek(["a", "a"], Root, ['*', '*', '*', '*'])
)
)
),
?assertMatch(
[
{leaf, "a/b/foo/Я", #file_info{type = regular}, ["Я", "foo", "b", "a"]},
{leaf, "d/e/baz/needle", #file_info{type = regular}, ["needle", "baz", "e", "d"]}
| _Nodes
],
sort(
emqx_ft_fs_iterator:fold(
fun cons/2,
[],
emqx_ft_fs_iterator:seek(["a", "b", "foo", "42"], Root, ['*', '*', '*', '*'])
)
)
),
?assertMatch(
[
{leaf, "d/e/baz/needle", #file_info{type = regular}, ["needle", "baz", "e", "d"]}
| _Nodes
],
sort(
emqx_ft_fs_iterator:fold(
fun cons/2,
[],
emqx_ft_fs_iterator:seek(["c", "d", "e", "f"], Root, ['*', '*', '*', '*'])
)
)
).
t_seek_empty(Config) ->
Root = ?config(data_dir, Config),
?assertEqual(
emqx_ft_fs_iterator:fold(
fun cons/2,
[],
emqx_ft_fs_iterator:new(Root, ['*', '*', '*', '*'])
),
emqx_ft_fs_iterator:fold(
fun cons/2,
[],
emqx_ft_fs_iterator:seek([], Root, ['*', '*', '*', '*'])
)
).
t_seek_past_end(Config) ->
Root = ?config(data_dir, Config),
?assertEqual(
none,
emqx_ft_fs_iterator:next(
emqx_ft_fs_iterator:seek(["g", "h"], Root, ['*', '*', '*', '*'])
)
).
t_seek_with_filter(Config) ->
Root = ?config(data_dir, Config),
?assertMatch(
[
{leaf, "d/e/baz", #file_info{type = directory}, ["baz", "e", "d"]}
| _Nodes
],
sort(
emqx_ft_fs_iterator:fold(
fun cons/2,
[],
emqx_ft_fs_iterator:seek(["a", "link"], Root, ['*', fun is_letter/1, '*'])
)
)
).
%%
fold(FoldFun, Acc, Root, Glob) ->
emqx_ft_fs_util:fold(FoldFun, Acc, Root, Glob).
is_not(F) ->
fun(X) -> not F(X) end.
is_latin1(Filename) ->
case unicode:characters_to_binary(Filename, unicode, latin1) of
{error, _, _} ->
false;
_ ->
true
end.
is_letter(Filename) ->
case Filename of
[_] ->
true;
_ ->
false
end.
cons(Path, Info, Stack, Acc) ->
[{Path, Info, Stack} | Acc].
cons(Entry, Acc) ->
[Entry | Acc].
sort(L) when is_list(L) ->
lists:sort(L).

View File

@ -0,0 +1 @@
Ты

View File

@ -0,0 +1 @@
../c

View File

@ -0,0 +1 @@
Zhōngwén

View File

@ -0,0 +1 @@
../a

View File

@ -0,0 +1 @@
haystack

View File

@ -0,0 +1 @@
needle

View File

@ -0,0 +1,65 @@
%%--------------------------------------------------------------------
%% Copyright (c) 2020-2023 EMQ Technologies Co., Ltd. All Rights Reserved.
%%
%% Licensed under the Apache License, Version 2.0 (the "License");
%% you may not use this file except in compliance with the License.
%% You may obtain a copy of the License at
%%
%% http://www.apache.org/licenses/LICENSE-2.0
%%
%% Unless required by applicable law or agreed to in writing, software
%% distributed under the License is distributed on an "AS IS" BASIS,
%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
%% See the License for the specific language governing permissions and
%% limitations under the License.
%%--------------------------------------------------------------------
-module(emqx_ft_fs_util_tests).
-include_lib("eunit/include/eunit.hrl").
filename_safe_test_() ->
[
?_assertEqual(ok, emqx_ft_fs_util:is_filename_safe("im.safe")),
?_assertEqual(ok, emqx_ft_fs_util:is_filename_safe(<<"im.safe">>)),
?_assertEqual(ok, emqx_ft_fs_util:is_filename_safe(<<".safe.100%">>)),
?_assertEqual(ok, emqx_ft_fs_util:is_filename_safe(<<"safe.as.🦺"/utf8>>))
].
filename_unsafe_test_() ->
[
?_assertEqual({error, empty}, emqx_ft_fs_util:is_filename_safe("")),
?_assertEqual({error, special}, emqx_ft_fs_util:is_filename_safe(".")),
?_assertEqual({error, special}, emqx_ft_fs_util:is_filename_safe("..")),
?_assertEqual({error, special}, emqx_ft_fs_util:is_filename_safe(<<"..">>)),
?_assertEqual({error, unsafe}, emqx_ft_fs_util:is_filename_safe(<<".././..">>)),
?_assertEqual({error, unsafe}, emqx_ft_fs_util:is_filename_safe("/etc/passwd")),
?_assertEqual({error, unsafe}, emqx_ft_fs_util:is_filename_safe("../cookie")),
?_assertEqual({error, unsafe}, emqx_ft_fs_util:is_filename_safe("C:$cookie")),
?_assertEqual({error, nonprintable}, emqx_ft_fs_util:is_filename_safe([1, 2, 3])),
?_assertEqual({error, nonprintable}, emqx_ft_fs_util:is_filename_safe(<<4, 5, 6>>)),
?_assertEqual({error, nonprintable}, emqx_ft_fs_util:is_filename_safe([$a, 16#7F, $z]))
].
-define(NAMES, [
{"just.file", <<"just.file">>},
{".hidden", <<".hidden">>},
{".~what", <<".~what">>},
{"100%25.file", <<"100%.file">>},
{"%2E%2E", <<"..">>},
{"...", <<"...">>},
{"%2Fetc%2Fpasswd", <<"/etc/passwd">>},
{"%01%02%0A ", <<1, 2, 10, 32>>}
]).
escape_filename_test_() ->
[
?_assertEqual(Filename, emqx_ft_fs_util:escape_filename(Input))
|| {Filename, Input} <- ?NAMES
].
unescape_filename_test_() ->
[
?_assertEqual(Input, emqx_ft_fs_util:unescape_filename(Filename))
|| {Filename, Input} <- ?NAMES
].

View File

@ -0,0 +1,84 @@
%%--------------------------------------------------------------------
%% Copyright (c) 2020-2023 EMQ Technologies Co., Ltd. All Rights Reserved.
%%
%% Licensed under the Apache License, Version 2.0 (the "License");
%% you may not use this file except in compliance with the License.
%% You may obtain a copy of the License at
%%
%% http://www.apache.org/licenses/LICENSE-2.0
%%
%% Unless required by applicable law or agreed to in writing, software
%% distributed under the License is distributed on an "AS IS" BASIS,
%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
%% See the License for the specific language governing permissions and
%% limitations under the License.
%%--------------------------------------------------------------------
-module(emqx_ft_responder_SUITE).
-compile(export_all).
-compile(nowarn_export_all).
-include_lib("stdlib/include/assert.hrl").
all() -> emqx_common_test_helpers:all(?MODULE).
init_per_suite(Config) ->
ok = emqx_common_test_helpers:start_apps([emqx_ft], emqx_ft_test_helpers:env_handler(Config)),
Config.
end_per_suite(_Config) ->
ok = emqx_common_test_helpers:stop_apps([emqx_ft]),
ok.
init_per_testcase(_Case, Config) ->
Config.
end_per_testcase(_Case, _Config) ->
ok.
t_start_ack(_Config) ->
Key = <<"test">>,
DefaultAction = fun({ack, Ref}) -> Ref end,
?assertMatch(
{ok, _Pid},
emqx_ft_responder:start(Key, DefaultAction, 1000)
),
?assertMatch(
{error, {already_started, _Pid}},
emqx_ft_responder:start(Key, DefaultAction, 1000)
),
Ref = make_ref(),
?assertEqual(
Ref,
emqx_ft_responder:ack(Key, Ref)
),
?assertExit(
{noproc, _},
emqx_ft_responder:ack(Key, Ref)
).
t_timeout(_Config) ->
Key = <<"test">>,
Self = self(),
DefaultAction = fun(timeout) -> Self ! {timeout, Key} end,
{ok, _Pid} = emqx_ft_responder:start(Key, DefaultAction, 20),
receive
{timeout, Key} ->
ok
after 100 ->
ct:fail("emqx_ft_responder not called")
end,
?assertExit(
{noproc, _},
emqx_ft_responder:ack(Key, oops)
).
t_unknown_msgs(_Config) ->
{ok, Pid} = emqx_ft_responder:start(make_ref(), fun(_) -> ok end, 100),
Pid ! {unknown_msg, <<"test">>},
ok = gen_server:cast(Pid, {unknown_msg, <<"test">>}),
?assertEqual(
{error, unknown_call},
gen_server:call(Pid, {unknown_call, <<"test">>})
).

View File

@ -0,0 +1,199 @@
%%--------------------------------------------------------------------
%% Copyright (c) 2020-2023 EMQ Technologies Co., Ltd. All Rights Reserved.
%%
%% Licensed under the Apache License, Version 2.0 (the "License");
%% you may not use this file except in compliance with the License.
%% You may obtain a copy of the License at
%%
%% http://www.apache.org/licenses/LICENSE-2.0
%%
%% Unless required by applicable law or agreed to in writing, software
%% distributed under the License is distributed on an "AS IS" BASIS,
%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
%% See the License for the specific language governing permissions and
%% limitations under the License.
%%--------------------------------------------------------------------
-module(emqx_ft_storage_exporter_s3_SUITE).
-compile(export_all).
-compile(nowarn_export_all).
-include_lib("common_test/include/ct.hrl").
-include_lib("stdlib/include/assert.hrl").
-define(assertS3Data(Data, Url),
case httpc:request(Url) of
{ok, {{_StatusLine, 200, "OK"}, _Headers, Body}} ->
?assertEqual(Data, list_to_binary(Body), "S3 data mismatch");
OtherResponse ->
ct:fail("Unexpected response: ~p", [OtherResponse])
end
).
all() -> emqx_common_test_helpers:all(?MODULE).
init_per_suite(Config) ->
Config.
end_per_suite(_Config) ->
ok.
set_special_configs(Config) ->
fun
(emqx_ft) ->
Storage = emqx_ft_test_helpers:local_storage(Config, #{
exporter => s3, bucket_name => ?config(bucket_name, Config)
}),
emqx_ft_test_helpers:load_config(#{<<"enable">> => true, <<"storage">> => Storage});
(_) ->
ok
end.
init_per_testcase(Case, Config0) ->
ClientId = atom_to_binary(Case),
BucketName = create_bucket(),
Config1 = [{bucket_name, BucketName}, {clientid, ClientId} | Config0],
ok = emqx_common_test_helpers:start_apps([emqx_conf, emqx_ft], set_special_configs(Config1)),
Config1.
end_per_testcase(_Case, _Config) ->
ok = emqx_common_test_helpers:stop_apps([emqx_ft, emqx_conf]),
ok.
%%--------------------------------------------------------------------
%% Test Cases
%%-------------------------------------------------------------------
t_happy_path(Config) ->
ClientId = ?config(clientid, Config),
FileId = <<"🌚"/utf8>>,
Name = "cool_name",
Data = <<"data"/utf8>>,
?assertEqual(
ok,
emqx_ft_test_helpers:upload_file(ClientId, FileId, Name, Data)
),
{ok, #{items := [#{uri := Uri}]}} = emqx_ft_storage:files(),
?assertS3Data(Data, Uri),
Key = binary_to_list(ClientId) ++ "/" ++ binary_to_list(FileId) ++ "/" ++ Name,
Meta = erlcloud_s3:get_object_metadata(
?config(bucket_name, Config), Key, emqx_ft_test_helpers:aws_config()
),
?assertEqual(
ClientId,
metadata_field("clientid", Meta)
),
?assertEqual(
FileId,
metadata_field("fileid", Meta)
),
NameBin = list_to_binary(Name),
?assertMatch(
#{
<<"name">> := NameBin,
<<"size">> := 4
},
emqx_utils_json:decode(metadata_field("filemeta", Meta), [return_maps])
).
t_upload_error(Config) ->
ClientId = ?config(clientid, Config),
FileId = <<"🌚"/utf8>>,
Name = "cool_name",
Data = <<"data"/utf8>>,
{ok, _} = emqx_conf:update(
[file_transfer, storage, local, exporter, s3, bucket], <<"invalid-bucket">>, #{}
),
?assertEqual(
{error, unspecified_error},
emqx_ft_test_helpers:upload_file(ClientId, FileId, Name, Data)
).
t_paging(Config) ->
ClientId = ?config(clientid, Config),
N = 1050,
FileId = fun integer_to_binary/1,
Name = "cool_name",
Data = fun integer_to_binary/1,
ok = lists:foreach(
fun(I) ->
ok = emqx_ft_test_helpers:upload_file(ClientId, FileId(I), Name, Data(I))
end,
lists:seq(1, N)
),
{ok, #{items := [#{uri := Uri}]}} = emqx_ft_storage:files(#{transfer => {ClientId, FileId(123)}}),
?assertS3Data(Data(123), Uri),
lists:foreach(
fun(PageSize) ->
Pages = file_pages(#{limit => PageSize}),
?assertEqual(
expected_page_count(PageSize, N),
length(Pages)
),
FileIds = [
FId
|| #{transfer := {_, FId}} <- lists:concat(Pages)
],
?assertEqual(
lists:sort([FileId(I) || I <- lists:seq(1, N)]),
lists:sort(FileIds)
)
end,
%% less than S3 limit, greater than S3 limit
[20, 550]
).
t_invalid_cursor(_Config) ->
InvalidUtf8 = <<16#80>>,
?assertError(
{badarg, cursor},
emqx_ft_storage:files(#{following => InvalidUtf8})
).
%%--------------------------------------------------------------------
%% Helper Functions
%%--------------------------------------------------------------------
expected_page_count(PageSize, Total) ->
case Total rem PageSize of
0 -> Total div PageSize;
_ -> Total div PageSize + 1
end.
file_pages(Query) ->
case emqx_ft_storage:files(Query) of
{ok, #{items := Items, cursor := NewCursor}} ->
[Items] ++ file_pages(Query#{following => NewCursor});
{ok, #{items := Items}} ->
[Items];
{error, Error} ->
ct:fail("Failed to download files: ~p", [Error])
end.
metadata_field(Field, Meta) ->
Key = "x-amz-meta-" ++ Field,
case lists:keyfind(Key, 1, Meta) of
{Key, Value} -> list_to_binary(Value);
false -> false
end.
create_bucket() ->
BucketName = emqx_s3_test_helpers:unique_bucket(),
_ = application:ensure_all_started(lhttpc),
ok = erlcloud_s3:create_bucket(BucketName, emqx_ft_test_helpers:aws_config()),
BucketName.

View File

@ -0,0 +1,93 @@
%%--------------------------------------------------------------------
%% Copyright (c) 2020-2023 EMQ Technologies Co., Ltd. All Rights Reserved.
%%
%% Licensed under the Apache License, Version 2.0 (the "License");
%% you may not use this file except in compliance with the License.
%% You may obtain a copy of the License at
%%
%% http://www.apache.org/licenses/LICENSE-2.0
%%
%% Unless required by applicable law or agreed to in writing, software
%% distributed under the License is distributed on an "AS IS" BASIS,
%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
%% See the License for the specific language governing permissions and
%% limitations under the License.
%%--------------------------------------------------------------------
-module(emqx_ft_storage_fs_SUITE).
-compile(export_all).
-compile(nowarn_export_all).
-include_lib("common_test/include/ct.hrl").
-include_lib("stdlib/include/assert.hrl").
all() ->
[
{group, cluster}
].
-define(CLUSTER_CASES, [t_multinode_exports]).
groups() ->
[
{cluster, [sequence], ?CLUSTER_CASES}
].
init_per_suite(Config) ->
ok = emqx_common_test_helpers:start_apps([emqx_ft], emqx_ft_test_helpers:env_handler(Config)),
Config.
end_per_suite(_Config) ->
ok = emqx_common_test_helpers:stop_apps([emqx_ft]),
ok.
init_per_testcase(Case, Config) ->
[{tc, Case} | Config].
end_per_testcase(_Case, _Config) ->
ok.
init_per_group(cluster, Config) ->
Node = emqx_ft_test_helpers:start_additional_node(Config, emqx_ft_storage_fs1),
[{additional_node, Node} | Config];
init_per_group(_Group, Config) ->
Config.
end_per_group(cluster, Config) ->
ok = emqx_ft_test_helpers:stop_additional_node(?config(additional_node, Config));
end_per_group(_Group, _Config) ->
ok.
%%--------------------------------------------------------------------
%% Tests
%%--------------------------------------------------------------------
t_multinode_exports(Config) ->
Node1 = ?config(additional_node, Config),
ok = emqx_ft_test_helpers:upload_file(<<"c/1">>, <<"f:1">>, "fn1", <<"data">>, Node1),
Node2 = node(),
ok = emqx_ft_test_helpers:upload_file(<<"c/2">>, <<"f:2">>, "fn2", <<"data">>, Node2),
?assertMatch(
[
#{transfer := {<<"c/1">>, <<"f:1">>}, name := "fn1"},
#{transfer := {<<"c/2">>, <<"f:2">>}, name := "fn2"}
],
lists:sort(list_files(Config))
).
%%--------------------------------------------------------------------
%% Helpers
%%--------------------------------------------------------------------
client_id(Config) ->
atom_to_binary(?config(tc, Config), utf8).
storage(Config) ->
RawConfig = #{<<"storage">> => emqx_ft_test_helpers:local_storage(Config)},
#{storage := #{local := Storage}} = emqx_ft_schema:translate(RawConfig),
Storage.
list_files(Config) ->
{ok, #{items := Files}} = emqx_ft_storage_fs:files(storage(Config), #{}),
Files.

View File

@ -0,0 +1,363 @@
%%--------------------------------------------------------------------
%% Copyright (c) 2020-2023 EMQ Technologies Co., Ltd. All Rights Reserved.
%%
%% Licensed under the Apache License, Version 2.0 (the "License");
%% you may not use this file except in compliance with the License.
%% You may obtain a copy of the License at
%%
%% http://www.apache.org/licenses/LICENSE-2.0
%%
%% Unless required by applicable law or agreed to in writing, software
%% distributed under the License is distributed on an "AS IS" BASIS,
%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
%% See the License for the specific language governing permissions and
%% limitations under the License.
%%--------------------------------------------------------------------
-module(emqx_ft_storage_fs_gc_SUITE).
-compile(export_all).
-compile(nowarn_export_all).
-include_lib("emqx_ft/include/emqx_ft_storage_fs.hrl").
-include_lib("stdlib/include/assert.hrl").
-include_lib("snabbkaffe/include/test_macros.hrl").
all() ->
emqx_common_test_helpers:all(?MODULE).
init_per_suite(Config) ->
_ = application:load(emqx_ft),
ok = emqx_common_test_helpers:start_apps([]),
Config.
end_per_suite(_Config) ->
ok = emqx_common_test_helpers:stop_apps([]),
ok.
init_per_testcase(TC, Config) ->
SegmentsRoot = emqx_ft_test_helpers:root(Config, node(), [TC, segments]),
ExportsRoot = emqx_ft_test_helpers:root(Config, node(), [TC, exports]),
ok = emqx_common_test_helpers:start_app(
emqx_ft,
fun(emqx_ft) ->
emqx_ft_test_helpers:load_config(#{
<<"enable">> => true,
<<"storage">> => #{
<<"local">> => #{
<<"segments">> => #{<<"root">> => SegmentsRoot},
<<"exporter">> => #{
<<"local">> => #{<<"root">> => ExportsRoot}
}
}
}
})
end
),
ok = snabbkaffe:start_trace(),
Config.
end_per_testcase(_TC, _Config) ->
ok = snabbkaffe:stop(),
ok = application:stop(emqx_ft),
ok.
%%
-define(NSEGS(Filesize, SegmentSize), (ceil(Filesize / SegmentSize) + 1)).
t_gc_triggers_periodically(_Config) ->
Interval = 500,
ok = set_gc_config(interval, Interval),
ok = emqx_ft_storage_fs_gc:reset(),
?check_trace(
timer:sleep(Interval * 3),
fun(Trace) ->
[Event, _ | _] = ?of_kind(garbage_collection, Trace),
?assertMatch(
#{
stats := #gcstats{
files = 0,
directories = 0,
space = 0,
errors = #{} = Errors
}
} when map_size(Errors) == 0,
Event
)
end
).
t_gc_triggers_manually(_Config) ->
?check_trace(
?assertMatch(
#gcstats{files = 0, directories = 0, space = 0, errors = #{} = Errors} when
map_size(Errors) == 0,
emqx_ft_storage_fs_gc:collect()
),
fun(Trace) ->
[Event] = ?of_kind(garbage_collection, Trace),
?assertMatch(
#{stats := #gcstats{}},
Event
)
end
).
t_gc_complete_transfers(_Config) ->
{local, Storage} = emqx_ft_storage:backend(),
ok = set_gc_config(minimum_segments_ttl, 0),
ok = set_gc_config(maximum_segments_ttl, 3),
ok = set_gc_config(interval, 500),
ok = emqx_ft_storage_fs_gc:reset(),
Transfers = [
{
T1 = {<<"client1">>, mk_file_id()},
#{name => "cat.cur", segments_ttl => 10},
emqx_ft_content_gen:new({?LINE, S1 = 42}, SS1 = 16)
},
{
T2 = {<<"client2">>, mk_file_id()},
#{name => "cat.ico", segments_ttl => 10},
emqx_ft_content_gen:new({?LINE, S2 = 420}, SS2 = 64)
},
{
T3 = {<<"client42">>, mk_file_id()},
#{name => "cat.jpg", segments_ttl => 10},
emqx_ft_content_gen:new({?LINE, S3 = 42000}, SS3 = 1024)
}
],
% 1. Start all transfers
TransferSizes = emqx_utils:pmap(
fun(Transfer) -> start_transfer(Storage, Transfer) end,
Transfers
),
?assertEqual([S1, S2, S3], TransferSizes),
?assertMatch(
#gcstats{files = 0, directories = 0, errors = #{} = Es} when map_size(Es) == 0,
emqx_ft_storage_fs_gc:collect()
),
% 2. Complete just the first transfer
{ok, {ok, Event}} = ?wait_async_action(
?assertEqual(ok, complete_transfer(Storage, T1, S1)),
#{?snk_kind := garbage_collection},
1000
),
?assertMatch(
#{
stats := #gcstats{
files = Files,
directories = 2,
space = Space,
errors = #{} = Es
}
} when Files == ?NSEGS(S1, SS1) andalso Space > S1 andalso map_size(Es) == 0,
Event
),
% 3. Complete rest of transfers
{ok, Sub} = snabbkaffe_collector:subscribe(
?match_event(#{?snk_kind := garbage_collection}),
2,
1000,
0
),
?assertEqual(
[ok, ok],
emqx_utils:pmap(
fun({Transfer, Size}) -> complete_transfer(Storage, Transfer, Size) end,
[{T2, S2}, {T3, S3}]
)
),
{ok, Events} = snabbkaffe_collector:receive_events(Sub),
CFiles = lists:sum([Stats#gcstats.files || #{stats := Stats} <- Events]),
CDirectories = lists:sum([Stats#gcstats.directories || #{stats := Stats} <- Events]),
CSpace = lists:sum([Stats#gcstats.space || #{stats := Stats} <- Events]),
CErrors = lists:foldl(
fun maps:merge/2,
#{},
[Stats#gcstats.errors || #{stats := Stats} <- Events]
),
?assertEqual(?NSEGS(S2, SS2) + ?NSEGS(S3, SS3), CFiles),
?assertEqual(2 + 2, CDirectories),
?assertMatch(Space when Space > S2 + S3, CSpace),
?assertMatch(Errors when map_size(Errors) == 0, CErrors),
% 4. Ensure that empty transfer directories will be eventually collected
{ok, _} = ?block_until(
#{
?snk_kind := garbage_collection,
stats := #gcstats{
files = 0,
directories = 6,
space = 0
}
},
5000,
0
).
t_gc_incomplete_transfers(_Config) ->
ok = set_gc_config(minimum_segments_ttl, 0),
ok = set_gc_config(maximum_segments_ttl, 4),
{local, Storage} = emqx_ft_storage:backend(),
Transfers = [
{
{<<"client43"/utf8>>, <<"file-🦕"/utf8>>},
#{name => "dog.cur", segments_ttl => 1},
emqx_ft_content_gen:new({?LINE, S1 = 123}, SS1 = 32)
},
{
{<<"client44">>, <<"file-🦖"/utf8>>},
#{name => "dog.ico", segments_ttl => 2},
emqx_ft_content_gen:new({?LINE, S2 = 456}, SS2 = 64)
},
{
{<<"client1337">>, <<"file-🦀"/utf8>>},
#{name => "dog.jpg", segments_ttl => 3000},
emqx_ft_content_gen:new({?LINE, S3 = 7890}, SS3 = 128)
},
{
{<<"client31337">>, <<"file-⏳"/utf8>>},
#{name => "dog.jpg"},
emqx_ft_content_gen:new({?LINE, S4 = 1230}, SS4 = 256)
}
],
% 1. Start transfers, send all the segments but don't trigger completion.
_ = emqx_utils:pmap(fun(Transfer) -> start_transfer(Storage, Transfer) end, Transfers),
% 2. Enable periodic GC every 0.5 seconds.
ok = set_gc_config(interval, 500),
ok = emqx_ft_storage_fs_gc:reset(),
% 3. First we need the first transfer to be collected.
{ok, _} = ?block_until(
#{
?snk_kind := garbage_collection,
stats := #gcstats{
files = Files,
directories = 4,
space = Space
}
} when Files == (?NSEGS(S1, SS1)) andalso Space > S1,
5000,
0
),
% 4. Then the second one.
{ok, _} = ?block_until(
#{
?snk_kind := garbage_collection,
stats := #gcstats{
files = Files,
directories = 4,
space = Space
}
} when Files == (?NSEGS(S2, SS2)) andalso Space > S2,
5000,
0
),
% 5. Then transfers 3 and 4 because 3rd has too big TTL and 4th has no specific TTL.
{ok, _} = ?block_until(
#{
?snk_kind := garbage_collection,
stats := #gcstats{
files = Files,
directories = 4 * 2,
space = Space
}
} when Files == (?NSEGS(S3, SS3) + ?NSEGS(S4, SS4)) andalso Space > S3 + S4,
5000,
0
).
t_gc_handling_errors(_Config) ->
ok = set_gc_config(minimum_segments_ttl, 0),
ok = set_gc_config(maximum_segments_ttl, 0),
{local, Storage} = emqx_ft_storage:backend(),
Transfer1 = {<<"client1">>, mk_file_id()},
Transfer2 = {<<"client2">>, mk_file_id()},
Filemeta = #{name => "oops.pdf"},
Size = 420,
SegSize = 16,
_ = start_transfer(
Storage,
{Transfer1, Filemeta, emqx_ft_content_gen:new({?LINE, Size}, SegSize)}
),
_ = start_transfer(
Storage,
{Transfer2, Filemeta, emqx_ft_content_gen:new({?LINE, Size}, SegSize)}
),
% 1. Throw some chaos in the transfer directory.
DirFragment1 = emqx_ft_storage_fs:get_subdir(Storage, Transfer1, fragment),
DirTemporary1 = emqx_ft_storage_fs:get_subdir(Storage, Transfer1, temporary),
PathShadyLink = filename:join(DirTemporary1, "linked-here"),
ok = file:make_symlink(DirFragment1, PathShadyLink),
DirTransfer2 = emqx_ft_storage_fs:get_subdir(Storage, Transfer2),
PathTripUp = filename:join(DirTransfer2, "trip-up-here"),
ok = file:write_file(PathTripUp, <<"HAHA">>),
ok = timer:sleep(timer:seconds(1)),
% 2. Observe the errors are reported consistently.
?check_trace(
?assertMatch(
#gcstats{
files = Files,
directories = 3,
space = Space,
errors = #{
% NOTE: dangling symlink looks like `enoent` for some reason
{file, PathShadyLink} := {unexpected, _},
{directory, DirTransfer2} := eexist
}
} when Files == ?NSEGS(Size, SegSize) * 2 andalso Space > Size * 2,
emqx_ft_storage_fs_gc:collect()
),
fun(Trace) ->
?assertMatch(
[
#{
errors := #{
{file, PathShadyLink} := {unexpected, _},
{directory, DirTransfer2} := eexist
}
}
],
?of_kind("garbage_collection_errors", Trace)
)
end
).
%%
set_gc_config(Name, Value) ->
emqx_config:put([file_transfer, storage, local, segments, gc, Name], Value).
start_transfer(Storage, {Transfer, Meta, Gen}) ->
?assertEqual(
ok,
emqx_ft_storage_fs:store_filemeta(Storage, Transfer, Meta)
),
emqx_ft_content_gen:fold(
fun({Content, SegmentNum, #{chunk_size := SegmentSize}}, _Transferred) ->
Offset = (SegmentNum - 1) * SegmentSize,
?assertEqual(
ok,
emqx_ft_storage_fs:store_segment(Storage, Transfer, {Offset, Content})
),
Offset + byte_size(Content)
end,
0,
Gen
).
complete_transfer(Storage, Transfer, Size) ->
complete_transfer(Storage, Transfer, Size, 100).
complete_transfer(Storage, Transfer, Size, Timeout) ->
{async, Pid} = emqx_ft_storage_fs:assemble(Storage, Transfer, Size),
MRef = erlang:monitor(process, Pid),
Pid ! kickoff,
receive
{'DOWN', MRef, process, Pid, {shutdown, Result}} ->
Result
after Timeout ->
ct:fail("Assembler did not finish in time")
end.
mk_file_id() ->
emqx_guid:to_hexstr(emqx_guid:gen()).

View File

@ -0,0 +1,153 @@
%%--------------------------------------------------------------------
%% Copyright (c) 2020-2023 EMQ Technologies Co., Ltd. All Rights Reserved.
%%
%% Licensed under the Apache License, Version 2.0 (the "License");
%% you may not use this file except in compliance with the License.
%% You may obtain a copy of the License at
%%
%% http://www.apache.org/licenses/LICENSE-2.0
%%
%% Unless required by applicable law or agreed to in writing, software
%% distributed under the License is distributed on an "AS IS" BASIS,
%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
%% See the License for the specific language governing permissions and
%% limitations under the License.
%%--------------------------------------------------------------------
-module(emqx_ft_storage_fs_reader_SUITE).
-compile(export_all).
-compile(nowarn_export_all).
-include_lib("common_test/include/ct.hrl").
-include_lib("stdlib/include/assert.hrl").
all() -> emqx_common_test_helpers:all(?MODULE).
init_per_suite(Config) ->
ok = emqx_common_test_helpers:start_apps([emqx_ft], emqx_ft_test_helpers:env_handler(Config)),
Config.
end_per_suite(_Config) ->
ok = emqx_common_test_helpers:stop_apps([emqx_ft]),
ok.
init_per_testcase(_Case, Config) ->
file:make_dir(?config(data_dir, Config)),
Data = <<"hello world">>,
Path = expand_path(Config, "test_file"),
ok = mk_test_file(Path, Data),
[{path, Path} | Config].
end_per_testcase(_Case, _Config) ->
ok.
t_successful_read(Config) ->
Path = ?config(path, Config),
{ok, ReaderPid} = emqx_ft_storage_fs_reader:start_link(self(), Path),
?assertEqual(
{ok, <<"hello ">>},
emqx_ft_storage_fs_reader:read(ReaderPid, 6)
),
?assertEqual(
{ok, <<"world">>},
emqx_ft_storage_fs_reader:read(ReaderPid, 6)
),
?assertEqual(
eof,
emqx_ft_storage_fs_reader:read(ReaderPid, 6)
),
?assertNot(is_process_alive(ReaderPid)).
t_caller_dead(Config) ->
erlang:process_flag(trap_exit, true),
Path = ?config(path, Config),
CallerPid = spawn_link(
fun() ->
receive
stop -> ok
end
end
),
{ok, ReaderPid} = emqx_ft_storage_fs_reader:start_link(CallerPid, Path),
_ = erlang:monitor(process, ReaderPid),
?assertEqual(
{ok, <<"hello ">>},
emqx_ft_storage_fs_reader:read(ReaderPid, 6)
),
CallerPid ! stop,
receive
{'DOWN', _, process, ReaderPid, _} -> ok
after 1000 ->
ct:fail("Reader process did not die")
end.
t_tables(Config) ->
Path = ?config(path, Config),
{ok, ReaderPid0} = emqx_ft_storage_fs_reader:start_link(self(), Path),
ReaderQH0 = emqx_ft_storage_fs_reader:table(ReaderPid0, 6),
?assertEqual(
[<<"hello ">>, <<"world">>],
qlc:eval(ReaderQH0)
),
{ok, ReaderPid1} = emqx_ft_storage_fs_reader:start_link(self(), Path),
ReaderQH1 = emqx_ft_storage_fs_reader:table(ReaderPid1),
?assertEqual(
[<<"hello world">>],
qlc:eval(ReaderQH1)
).
t_bad_messages(Config) ->
Path = ?config(path, Config),
{ok, ReaderPid} = emqx_ft_storage_fs_reader:start_link(self(), Path),
ReaderPid ! {bad, message},
gen_server:cast(ReaderPid, {bad, message}),
?assertEqual(
{error, {bad_call, {bad, message}}},
gen_server:call(ReaderPid, {bad, message})
).
t_nonexistent_file(_Config) ->
?assertEqual(
{error, enoent},
emqx_ft_storage_fs_reader:start_link(self(), "/a/b/c/bar")
).
t_start_supervised(Config) ->
Path = ?config(path, Config),
{ok, ReaderPid} = emqx_ft_storage_fs_reader:start_supervised(self(), Path),
?assertEqual(
{ok, <<"hello ">>},
emqx_ft_storage_fs_reader:read(ReaderPid, 6)
).
t_rpc_error(_Config) ->
ReaderQH = emqx_ft_storage_fs_reader:table(fake_remote_pid('dummy@127.0.0.1'), 6),
?assertEqual(
[],
qlc:eval(ReaderQH)
).
mk_test_file(Path, Data) ->
ok = file:write_file(Path, Data).
expand_path(Config, Filename) ->
filename:join([?config(data_dir, Config), Filename]).
%% This is a hack to create a pid that is not registered on the local node.
%% https://www.erlang.org/doc/apps/erts/erl_ext_dist.html#new_pid_ext
fake_remote_pid(Node) ->
<<131, NodeAtom/binary>> = term_to_binary(Node),
PidBin = <<131, 88, NodeAtom/binary, 1:32/big, 1:32/big, 1:32/big>>,
binary_to_term(PidBin).

View File

@ -0,0 +1,128 @@
%%--------------------------------------------------------------------
%% Copyright (c) 2023 EMQ Technologies Co., Ltd. All Rights Reserved.
%%
%% Licensed under the Apache License, Version 2.0 (the "License");
%% you may not use this file except in compliance with the License.
%% You may obtain a copy of the License at
%%
%% http://www.apache.org/licenses/LICENSE-2.0
%%
%% Unless required by applicable law or agreed to in writing, software
%% distributed under the License is distributed on an "AS IS" BASIS,
%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
%% See the License for the specific language governing permissions and
%% limitations under the License.
%%--------------------------------------------------------------------
-module(emqx_ft_test_helpers).
-compile(export_all).
-compile(nowarn_export_all).
-include_lib("common_test/include/ct.hrl").
-define(S3_HOST, <<"minio">>).
-define(S3_PORT, 9000).
start_additional_node(Config, Name) ->
emqx_common_test_helpers:start_slave(
Name,
[
{apps, [emqx_ft]},
{join_to, node()},
{configure_gen_rpc, true},
{env_handler, env_handler(Config)}
]
).
stop_additional_node(Node) ->
ok = rpc:call(Node, ekka, leave, []),
ok = rpc:call(Node, emqx_common_test_helpers, stop_apps, [[emqx_ft]]),
ok = emqx_common_test_helpers:stop_slave(Node),
ok.
env_handler(Config) ->
fun
(emqx_ft) ->
load_config(#{<<"enable">> => true, <<"storage">> => local_storage(Config)});
(_) ->
ok
end.
local_storage(Config) ->
local_storage(Config, #{exporter => local}).
local_storage(Config, Opts) ->
#{
<<"local">> => #{
<<"segments">> => #{<<"root">> => root(Config, node(), [segments])},
<<"exporter">> => exporter(Config, Opts)
}
}.
exporter(Config, #{exporter := local}) ->
#{<<"local">> => #{<<"root">> => root(Config, node(), [exports])}};
exporter(_Config, #{exporter := s3, bucket_name := BucketName}) ->
BaseConfig = emqx_s3_test_helpers:base_raw_config(tcp),
#{
<<"s3">> => BaseConfig#{
<<"bucket">> => list_to_binary(BucketName),
<<"host">> => ?S3_HOST,
<<"port">> => ?S3_PORT
}
}.
load_config(Config) ->
emqx_common_test_helpers:load_config(emqx_ft_schema, #{<<"file_transfer">> => Config}).
tcp_port(Node) ->
{_, Port} = rpc:call(Node, emqx_config, get, [[listeners, tcp, default, bind]]),
Port.
root(Config, Node, Tail) ->
iolist_to_binary(filename:join([?config(priv_dir, Config), "file_transfer", Node | Tail])).
start_client(ClientId) ->
start_client(ClientId, node()).
start_client(ClientId, Node) ->
Port = tcp_port(Node),
{ok, Client} = emqtt:start_link([{proto_ver, v5}, {clientid, ClientId}, {port, Port}]),
{ok, _} = emqtt:connect(Client),
Client.
upload_file(ClientId, FileId, Name, Data) ->
upload_file(ClientId, FileId, Name, Data, node()).
upload_file(ClientId, FileId, Name, Data, Node) ->
C1 = start_client(ClientId, Node),
Size = byte_size(Data),
Meta = #{
name => Name,
expire_at => erlang:system_time(_Unit = second) + 3600,
size => Size
},
MetaPayload = emqx_utils_json:encode(emqx_ft:encode_filemeta(Meta)),
ct:pal("MetaPayload = ~ts", [MetaPayload]),
MetaTopic = <<"$file/", FileId/binary, "/init">>,
{ok, #{reason_code_name := success}} = emqtt:publish(C1, MetaTopic, MetaPayload, 1),
{ok, #{reason_code_name := success}} = emqtt:publish(
C1, <<"$file/", FileId/binary, "/0">>, Data, 1
),
FinTopic = <<"$file/", FileId/binary, "/fin/", (integer_to_binary(Size))/binary>>,
FinResult =
case emqtt:publish(C1, FinTopic, <<>>, 1) of
{ok, #{reason_code_name := success}} ->
ok;
{ok, #{reason_code_name := Error}} ->
{error, Error}
end,
ok = emqtt:stop(C1),
FinResult.
aws_config() ->
emqx_s3_test_helpers:aws_config(tcp, binary_to_list(?S3_HOST), ?S3_PORT).

View File

@ -0,0 +1,221 @@
%%--------------------------------------------------------------------
%% Copyright (c) 2020-2023 EMQ Technologies Co., Ltd. All Rights Reserved.
%%
%% Licensed under the Apache License, Version 2.0 (the "License");
%% you may not use this file except in compliance with the License.
%% You may obtain a copy of the License at
%%
%% http://www.apache.org/licenses/LICENSE-2.0
%%
%% Unless required by applicable law or agreed to in writing, software
%% distributed under the License is distributed on an "AS IS" BASIS,
%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
%% See the License for the specific language governing permissions and
%% limitations under the License.
%%--------------------------------------------------------------------
-module(prop_emqx_ft_assembly).
-include_lib("proper/include/proper.hrl").
-import(emqx_proper_types, [scaled/2]).
-define(COVERAGE_TIMEOUT, 5000).
prop_coverage() ->
?FORALL(
{Filesize, Segsizes},
{filesize_t(), segsizes_t()},
?FORALL(
Fragments,
noshrink(segments_t(Filesize, Segsizes)),
?TIMEOUT(
?COVERAGE_TIMEOUT,
begin
ASM1 = append_segments(mk_assembly(Filesize), Fragments),
{Time, ASM2} = timer:tc(emqx_ft_assembly, update, [ASM1]),
measure(
#{"Fragments" => length(Fragments), "Time" => Time},
case emqx_ft_assembly:status(ASM2) of
complete ->
Coverage = emqx_ft_assembly:coverage(ASM2),
measure(
#{"CoverageLength" => length(Coverage)},
is_coverage_complete(Coverage)
);
{incomplete, {missing, {segment, _, _}}} ->
measure("CoverageLength", 0, true)
end
)
end
)
)
).
prop_coverage_likely_incomplete() ->
?FORALL(
{Filesize, Segsizes, Hole},
{filesize_t(), segsizes_t(), filesize_t()},
?FORALL(
Fragments,
noshrink(segments_t(Filesize, Segsizes, Hole)),
?TIMEOUT(
?COVERAGE_TIMEOUT,
begin
ASM1 = append_segments(mk_assembly(Filesize), Fragments),
{Time, ASM2} = timer:tc(emqx_ft_assembly, update, [ASM1]),
measure(
#{"Fragments" => length(Fragments), "Time" => Time},
case emqx_ft_assembly:status(ASM2) of
complete ->
% NOTE: this is still possible due to the nature of `SUCHTHATMAYBE`
IsComplete = emqx_ft_assembly:coverage(ASM2),
collect(complete, is_coverage_complete(IsComplete));
{incomplete, {missing, {segment, _, _}}} ->
collect(incomplete, true)
end
)
end
)
)
).
prop_coverage_complete() ->
?FORALL(
{Filesize, Segsizes},
{filesize_t(), ?SUCHTHAT([BaseSegsize | _], segsizes_t(), BaseSegsize > 0)},
?FORALL(
{Fragments, RemoteNode},
noshrink({segments_t(Filesize, Segsizes), remote_node_t()}),
begin
% Ensure that we have complete coverage
ASM1 = mk_assembly(Filesize),
ASM2 = append_coverage(ASM1, RemoteNode, Filesize, Segsizes),
ASM3 = append_segments(ASM2, Fragments),
{Time, ASM4} = timer:tc(emqx_ft_assembly, update, [ASM3]),
measure(
#{"CoverageMax" => nsegs(Filesize, Segsizes), "Time" => Time},
case emqx_ft_assembly:status(ASM4) of
complete ->
Coverage = emqx_ft_assembly:coverage(ASM4),
measure(
#{"Coverage" => length(Coverage)},
is_coverage_complete(Coverage)
);
{incomplete, _} ->
false
end
)
end
)
).
measure(NamedSamples, Test) ->
maps:fold(fun(Name, Sample, Acc) -> measure(Name, Sample, Acc) end, Test, NamedSamples).
is_coverage_complete([]) ->
true;
is_coverage_complete(Coverage = [_ | Tail]) ->
is_coverage_complete(Coverage, Tail).
is_coverage_complete([_], []) ->
true;
is_coverage_complete(
[{_Node1, #{fragment := {segment, #{offset := O1, size := S1}}}} | Rest],
[{_Node2, #{fragment := {segment, #{offset := O2}}}} | Tail]
) ->
(O1 + S1 == O2) andalso is_coverage_complete(Rest, Tail).
mk_assembly(Filesize) ->
emqx_ft_assembly:append(emqx_ft_assembly:new(Filesize), node(), mk_filemeta(Filesize)).
append_segments(ASMIn, Fragments) ->
lists:foldl(
fun({Node, {Offset, Size}}, ASM) ->
emqx_ft_assembly:append(ASM, Node, mk_segment(Offset, Size))
end,
ASMIn,
Fragments
).
append_coverage(ASM, Node, Filesize, Segsizes = [BaseSegsize | _]) ->
append_coverage(ASM, Node, Filesize, BaseSegsize, 0, nsegs(Filesize, Segsizes)).
append_coverage(ASM, Node, Filesize, Segsize, I, NSegs) when I < NSegs ->
Offset = I * Segsize,
Size = min(Segsize, Filesize - Offset),
ASMNext = emqx_ft_assembly:append(ASM, Node, mk_segment(Offset, Size)),
append_coverage(ASMNext, Node, Filesize, Segsize, I + 1, NSegs);
append_coverage(ASM, _Node, _Filesize, _Segsize, _, _NSegs) ->
ASM.
mk_filemeta(Filesize) ->
#{
path => "MANIFEST.json",
fragment => {filemeta, #{name => ?MODULE_STRING, size => Filesize}}
}.
mk_segment(Offset, Size) ->
#{
path => "SEG" ++ integer_to_list(Offset) ++ integer_to_list(Size),
fragment => {segment, #{offset => Offset, size => Size}}
}.
nsegs(Filesize, [BaseSegsize | _]) ->
Filesize div max(1, BaseSegsize) + 1.
segments_t(Filesize, Segsizes) ->
scaled(nsegs(Filesize, Segsizes), list({node_t(), segment_t(Filesize, Segsizes)})).
segments_t(Filesize, Segsizes, Hole) ->
scaled(nsegs(Filesize, Segsizes), list({node_t(), segment_t(Filesize, Segsizes, Hole)})).
segment_t(Filesize, Segsizes, Hole) ->
?SUCHTHATMAYBE(
{Offset, Size},
segment_t(Filesize, Segsizes),
(Hole rem Filesize) =< Offset orelse (Hole rem Filesize) > (Offset + Size)
).
segment_t(Filesize, Segsizes) ->
?LET(
Segsize,
oneof(Segsizes),
?LET(
Index,
range(0, Filesize div max(1, Segsize)),
{Index * Segsize, min(Segsize, Filesize - (Index * Segsize))}
)
).
filesize_t() ->
scaled(4000, non_neg_integer()).
segsizes_t() ->
?LET(
BaseSize,
segsize_t(),
oneof([
[BaseSize, BaseSize * 2],
[BaseSize, BaseSize * 2, BaseSize * 3],
[BaseSize, BaseSize * 2, BaseSize * 5]
])
).
segsize_t() ->
scaled(50, non_neg_integer()).
remote_node_t() ->
oneof([
'emqx42@emqx.local',
'emqx43@emqx.local',
'emqx44@emqx.local'
]).
node_t() ->
oneof([
node(),
'emqx42@emqx.local',
'emqx43@emqx.local',
'emqx44@emqx.local'
]).

View File

@ -3,7 +3,7 @@
{id, "emqx_machine"},
{description, "The EMQX Machine"},
% strict semver, bump manually!
{vsn, "0.2.3"},
{vsn, "0.2.4"},
{modules, []},
{registered, []},
{applications, [kernel, stdlib, emqx_ctl]},

View File

@ -154,6 +154,8 @@ basic_reboot_apps() ->
ee ->
CE ++
[
emqx_s3,
emqx_ft,
emqx_eviction_agent,
emqx_node_rebalance
]

94
apps/emqx_s3/BSL.txt Normal file
View File

@ -0,0 +1,94 @@
Business Source License 1.1
Licensor: Hangzhou EMQ Technologies Co., Ltd.
Licensed Work: EMQX Enterprise Edition
The Licensed Work is (c) 2023
Hangzhou EMQ Technologies Co., Ltd.
Additional Use Grant: Students and educators are granted right to copy,
modify, and create derivative work for research
or education.
Change Date: 2027-02-01
Change License: Apache License, Version 2.0
For information about alternative licensing arrangements for the Software,
please contact Licensor: https://www.emqx.com/en/contact
Notice
The Business Source License (this document, or the “License”) is not an Open
Source license. However, the Licensed Work will eventually be made available
under an Open Source License, as stated in this License.
License text copyright (c) 2017 MariaDB Corporation Ab, All Rights Reserved.
“Business Source License” is a trademark of MariaDB Corporation Ab.
-----------------------------------------------------------------------------
Business Source License 1.1
Terms
The Licensor hereby grants you the right to copy, modify, create derivative
works, redistribute, and make non-production use of the Licensed Work. The
Licensor may make an Additional Use Grant, above, permitting limited
production use.
Effective on the Change Date, or the fourth anniversary of the first publicly
available distribution of a specific version of the Licensed Work under this
License, whichever comes first, the Licensor hereby grants you rights under
the terms of the Change License, and the rights granted in the paragraph
above terminate.
If your use of the Licensed Work does not comply with the requirements
currently in effect as described in this License, you must purchase a
commercial license from the Licensor, its affiliated entities, or authorized
resellers, or you must refrain from using the Licensed Work.
All copies of the original and modified Licensed Work, and derivative works
of the Licensed Work, are subject to this License. This License applies
separately for each version of the Licensed Work and the Change Date may vary
for each version of the Licensed Work released by Licensor.
You must conspicuously display this License on each original or modified copy
of the Licensed Work. If you receive the Licensed Work in original or
modified form from a third party, the terms and conditions set forth in this
License apply to your use of that work.
Any use of the Licensed Work in violation of this License will automatically
terminate your rights under this License for the current and all other
versions of the Licensed Work.
This License does not grant you any right in any trademark or logo of
Licensor or its affiliates (provided that you may use a trademark or logo of
Licensor as expressly required by this License).
TO THE EXTENT PERMITTED BY APPLICABLE LAW, THE LICENSED WORK IS PROVIDED ON
AN “AS IS” BASIS. LICENSOR HEREBY DISCLAIMS ALL WARRANTIES AND CONDITIONS,
EXPRESS OR IMPLIED, INCLUDING (WITHOUT LIMITATION) WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NON-INFRINGEMENT, AND
TITLE.
MariaDB hereby grants you permission to use this Licenses text to license
your works, and to refer to it using the trademark “Business Source License”,
as long as you comply with the Covenants of Licensor below.
Covenants of Licensor
In consideration of the right to use this Licenses text and the “Business
Source License” name and trademark, Licensor covenants to MariaDB, and to all
other recipients of the licensed work to be provided by Licensor:
1. To specify as the Change License the GPL Version 2.0 or any later version,
or a license that is compatible with GPL Version 2.0 or a later version,
where “compatible” means that software provided under the Change License can
be included in a program with software provided under GPL Version 2.0 or a
later version. Licensor may specify additional Change Licenses without
limitation.
2. To either: (a) specify an additional grant of rights to use that does not
impose any additional restriction on the right granted in this License, as
the Additional Use Grant; or (b) insert the text “None”.
3. To specify a Change Date.
4. Not to modify this License in any other way.

135
apps/emqx_s3/README.md Normal file
View File

@ -0,0 +1,135 @@
# emqx_s3
EMQX S3 Application
## Description
This application provides functionality for uploading files to S3.
## Usage
The steps to integrate this application are:
* Integrate S3 configuration schema where needed.
* On _client_ application start:
* Call `emqx_s3:start_profile(ProfileName, ProfileConfig)` with configuration.
* Add `emqx_config_handler` hook to call `emqx_s3:start_profile(ProfileName, ProfileConfig)` when configuration is updated.
* On _client_ application stop, call `emqx_s3:stop_profile(ProfileName)`.
`ProfileName` is a unique name used to distinguish different sets of S3 settings. Each profile has its own connection pool and configuration.
To use S3 from a _client_ application:
* Create an uploader process with `{ok, Pid} = emqx_s3:start_uploader(ProfileName, #{key => MyKey})`.
* Write data with `emqx_s3_uploader:write(Pid, <<"data">>)`.
* Finish the uploader with `emqx_s3_uploader:complete(Pid)` or `emqx_s3_uploader:abort(Pid)`.
### Configuration
Example of integrating S3 configuration schema into a _client_ application `emqx_someapp`.
```erlang
-module(emqx_someapp_schema).
...
roots() -> [someapp]
...
fields(someapp) ->
[
{other_setting, ...},
{s3_settings,
mk(
hoconsc:ref(emqx_s3_schema, s3),
#{
desc => ?DESC("s3_settings"),
required => true
}
)}
];
...
```
### Application start and config hooks
```erlang
-module(emqx_someapp_app).
-behaviour(application).
-export([start/2, stop/1]).
-export([
pre_config_update/3,
post_config_update/5
]).
start(_StartType, _StartArgs) ->
ProfileConfig = emqx_config:get([someapp, s3_settings]),
ProfileName = someapp,
ok = emqx_s3:start_profile(ProfileName, ProfileConfig),
ok = emqx_config_handler:add_handler([someapp], ?MODULE).
stop(_State) ->
ok = emqx_conf:remove_handler([someapp]),
ProfileName = someapp,
ok = emqx_s3:stop_profile(ProfileName).
pre_config_update(_Path, NewConfig, _OldConfig) ->
{ok, NewConfig}.
post_config_update(Path, _Req, NewConfig, _OldConfig, _AppEnvs) ->
NewProfileConfig = maps:get(s3_settings, NewConfig),
ProfileName = someapp,
%% more graceful error handling may be needed
ok = emqx_s3:update_profile(ProfileName, NewProfileConfig).
```
### Uploader usage
```erlang
-module(emqx_someapp_logic).
...
-spec do_upload_data(Key :: string(), Data :: binary()) -> ok.
do_upload_data(Key, Data) ->
ProfileName = someapp,
{ok, Pid} = emqx_s3:start_uploader(ProfileName, #{key => Key}),
ok = emqx_s3_uploader:write(Pid, Data),
ok = emqx_s3_uploader:complete(Pid).
```
## Design
![Design](./docs/s3_app.png)
* Each profile has its own supervisor `emqx_s3_profile_sup`.
* Under each profile supervisor, there is a
* `emqx_s3_profile_uploader_sup` supervisor for uploader processes.
* `emqx_s3_profile_conf` server for managing profile configuration.
When an uploader process is started, it checkouts the actual S3 configuration for the profile from the `emqx_s3_profile_conf` server. It uses the obtained configuration and connection pool to upload data to S3 till the termination, even if the configuration is updated.
Other processes (`emqx_XXX`) can also checkout the actual S3 configuration for the profile from the `emqx_s3_profile_conf` server.
`emqx_s3_profile_conf`:
* Keeps actual S3 configuration for the profile and creates a connection pool for the actual configuration.
* Creates a new connection pool when the configuration is updated.
* Keeps track of uploaders using connection pools.
* Drops connection pools when no uploaders are using it or after a timeout.
The code is designed to allow a painless transition from `ehttpc` pool to any other HTTP pool/client.
## Possible performance improvements
One of the downsides of the current implementation is that there is a lot of message passing between the uploader client and the actual sockets.
A possible improvement could be:
* Use a process-less HTTP client, like [Mint](https://github.com/elixir-mint/mint).
* Use a resource pool, like [NimblePool](https://github.com/dashbitco/nimble_pool) to manage the HTTP connections. It temporarily grants sockets to its clients.
* Do the buffering logic locally in the uploader client.
* Use `emqx_s3_client` directly from the uploader client.
In this case, the data will be directly sent to the socket, without being sent to any intermediate processes.

2
apps/emqx_s3/docker-ct Normal file
View File

@ -0,0 +1,2 @@
minio
toxiproxy

Binary file not shown.

After

Width:  |  Height:  |  Size: 250 KiB

View File

@ -0,0 +1,6 @@
{deps, [
{emqx, {path, "../../apps/emqx"}},
{erlcloud, {git, "https://github.com/emqx/erlcloud", {tag, "3.6.8-emqx-1"}}}
]}.
{project_plugins, [erlfmt]}.

View File

@ -0,0 +1,14 @@
{application, emqx_s3, [
{description, "EMQX S3"},
{vsn, "5.0.6"},
{modules, []},
{registered, [emqx_s3_sup]},
{applications, [
kernel,
stdlib,
gproc,
erlcloud,
ehttpc
]},
{mod, {emqx_s3_app, []}}
]}.

View File

@ -0,0 +1,96 @@
%%--------------------------------------------------------------------
%% Copyright (c) 2022-2023 EMQ Technologies Co., Ltd. All Rights Reserved.
%%--------------------------------------------------------------------
-module(emqx_s3).
-include_lib("emqx/include/types.hrl").
-export([
start_profile/2,
stop_profile/1,
update_profile/2,
start_uploader/2,
with_client/2
]).
-export_type([
profile_id/0,
profile_config/0,
acl/0
]).
-type profile_id() :: atom() | binary().
-type acl() ::
private
| public_read
| public_read_write
| authenticated_read
| bucket_owner_read
| bucket_owner_full_control.
-type transport_options() :: #{
headers => map(),
connect_timeout => pos_integer(),
enable_pipelining => pos_integer(),
max_retries => pos_integer(),
pool_size => pos_integer(),
pool_type => atom(),
ipv6_probe => boolean(),
ssl => map()
}.
-type profile_config() :: #{
bucket := string(),
access_key_id => string(),
secret_access_key => string(),
host := string(),
port := pos_integer(),
url_expire_time := pos_integer(),
acl => acl(),
min_part_size => pos_integer(),
transport_options => transport_options()
}.
-define(IS_PROFILE_ID(ProfileId), (is_atom(ProfileId) orelse is_binary(ProfileId))).
%%--------------------------------------------------------------------
%% API
%%--------------------------------------------------------------------
-spec start_profile(profile_id(), profile_config()) -> ok_or_error(term()).
start_profile(ProfileId, ProfileConfig) when ?IS_PROFILE_ID(ProfileId) ->
case emqx_s3_sup:start_profile(ProfileId, ProfileConfig) of
{ok, _} ->
ok;
{error, _} = Error ->
Error
end.
-spec stop_profile(profile_id()) -> ok_or_error(term()).
stop_profile(ProfileId) when ?IS_PROFILE_ID(ProfileId) ->
emqx_s3_sup:stop_profile(ProfileId).
-spec update_profile(profile_id(), profile_config()) -> ok_or_error(term()).
update_profile(ProfileId, ProfileConfig) when ?IS_PROFILE_ID(ProfileId) ->
emqx_s3_profile_conf:update_config(ProfileId, ProfileConfig).
-spec start_uploader(profile_id(), emqx_s3_uploader:opts()) ->
supervisor:start_ret() | {error, profile_not_found}.
start_uploader(ProfileId, Opts) when ?IS_PROFILE_ID(ProfileId) ->
emqx_s3_profile_uploader_sup:start_uploader(ProfileId, Opts).
-spec with_client(profile_id(), fun((emqx_s3_client:client()) -> Result)) ->
{error, profile_not_found} | Result.
with_client(ProfileId, Fun) when is_function(Fun, 1) andalso ?IS_PROFILE_ID(ProfileId) ->
case emqx_s3_profile_conf:checkout_config(ProfileId) of
{ok, ClientConfig, _UploadConfig} ->
try
Fun(emqx_s3_client:create(ClientConfig))
after
emqx_s3_profile_conf:checkin_config(ProfileId)
end;
{error, _} = Error ->
Error
end.

View File

@ -0,0 +1,13 @@
%%--------------------------------------------------------------------
%% Copyright (c) 2022-2023 EMQ Technologies Co., Ltd. All Rights Reserved.
%%--------------------------------------------------------------------
-define(VIA_GPROC(Id), {via, gproc, {n, l, Id}}).
-define(SAFE_CALL_VIA_GPROC(Id, Message, Timeout, NoProcError),
try gen_server:call(?VIA_GPROC(Id), Message, Timeout) of
Result -> Result
catch
exit:{noproc, _} -> {error, NoProcError}
end
).

View File

@ -0,0 +1,16 @@
%%--------------------------------------------------------------------
%% Copyright (c) 2022-2023 EMQ Technologies Co., Ltd. All Rights Reserved.
%%--------------------------------------------------------------------
-module(emqx_s3_app).
-behaviour(application).
-export([start/2, stop/1]).
start(_Type, _Args) ->
{ok, Sup} = emqx_s3_sup:start_link(),
{ok, Sup}.
stop(_State) ->
ok.

View File

@ -0,0 +1,428 @@
%%--------------------------------------------------------------------
%% Copyright (c) 2022-2023 EMQ Technologies Co., Ltd. All Rights Reserved.
%%--------------------------------------------------------------------
-module(emqx_s3_client).
-include_lib("emqx/include/types.hrl").
-include_lib("emqx/include/logger.hrl").
-include_lib("erlcloud/include/erlcloud_aws.hrl").
-export([
create/1,
put_object/3,
put_object/4,
start_multipart/2,
start_multipart/3,
upload_part/5,
complete_multipart/4,
abort_multipart/3,
list/2,
uri/2,
format/1,
format_request/1
]).
-export_type([
client/0,
headers/0
]).
-type headers() :: #{binary() | string() => iodata()}.
-type erlcloud_headers() :: list({string(), iodata()}).
-type key() :: string().
-type part_number() :: non_neg_integer().
-type upload_id() :: string().
-type etag() :: string().
-type http_pool() :: ehttpc:pool_name().
-type pool_type() :: random | hash.
-type upload_options() :: list({acl, emqx_s3:acl()}).
-opaque client() :: #{
aws_config := aws_config(),
upload_options := upload_options(),
bucket := string(),
headers := erlcloud_headers(),
url_expire_time := non_neg_integer(),
pool_type := pool_type()
}.
-type config() :: #{
scheme := string(),
host := string(),
port := part_number(),
bucket := string(),
headers := headers(),
acl := emqx_s3:acl() | undefined,
url_expire_time := pos_integer(),
access_key_id := string() | undefined,
secret_access_key := string() | undefined,
http_pool := http_pool(),
pool_type := pool_type(),
request_timeout := timeout() | undefined,
max_retries := non_neg_integer() | undefined
}.
-type s3_options() :: proplists:proplist().
-define(DEFAULT_REQUEST_TIMEOUT, 30000).
-define(DEFAULT_MAX_RETRIES, 2).
%%--------------------------------------------------------------------
%% API
%%--------------------------------------------------------------------
-spec create(config()) -> client().
create(Config) ->
#{
aws_config => aws_config(Config),
upload_options => upload_options(Config),
bucket => maps:get(bucket, Config),
url_expire_time => maps:get(url_expire_time, Config),
headers => headers(Config),
pool_type => maps:get(pool_type, Config)
}.
-spec put_object(client(), key(), iodata()) -> ok_or_error(term()).
put_object(Client, Key, Value) ->
put_object(Client, #{}, Key, Value).
-spec put_object(client(), headers(), key(), iodata()) -> ok_or_error(term()).
put_object(
#{bucket := Bucket, upload_options := Options, headers := Headers, aws_config := AwsConfig},
SpecialHeaders,
Key,
Value
) ->
AllHeaders = join_headers(Headers, SpecialHeaders),
try erlcloud_s3:put_object(Bucket, erlcloud_key(Key), Value, Options, AllHeaders, AwsConfig) of
Props when is_list(Props) ->
ok
catch
error:{aws_error, Reason} ->
?SLOG(debug, #{msg => "put_object_fail", key => Key, reason => Reason}),
{error, Reason}
end.
-spec start_multipart(client(), key()) -> ok_or_error(upload_id(), term()).
start_multipart(Client, Key) ->
start_multipart(Client, #{}, Key).
-spec start_multipart(client(), headers(), key()) -> ok_or_error(upload_id(), term()).
start_multipart(
#{bucket := Bucket, upload_options := Options, headers := Headers, aws_config := AwsConfig},
SpecialHeaders,
Key
) ->
AllHeaders = join_headers(Headers, SpecialHeaders),
case erlcloud_s3:start_multipart(Bucket, erlcloud_key(Key), Options, AllHeaders, AwsConfig) of
{ok, Props} ->
{ok, response_property('uploadId', Props)};
{error, Reason} ->
?SLOG(debug, #{msg => "start_multipart_fail", key => Key, reason => Reason}),
{error, Reason}
end.
-spec upload_part(client(), key(), upload_id(), part_number(), iodata()) ->
ok_or_error(etag(), term()).
upload_part(
#{bucket := Bucket, headers := Headers, aws_config := AwsConfig},
Key,
UploadId,
PartNumber,
Value
) ->
case
erlcloud_s3:upload_part(
Bucket, erlcloud_key(Key), UploadId, PartNumber, Value, Headers, AwsConfig
)
of
{ok, Props} ->
{ok, response_property(etag, Props)};
{error, Reason} ->
?SLOG(debug, #{msg => "upload_part_fail", key => Key, reason => Reason}),
{error, Reason}
end.
-spec complete_multipart(client(), key(), upload_id(), [etag()]) -> ok_or_error(term()).
complete_multipart(
#{bucket := Bucket, headers := Headers, aws_config := AwsConfig},
Key,
UploadId,
ETags
) ->
case
erlcloud_s3:complete_multipart(
Bucket, erlcloud_key(Key), UploadId, ETags, Headers, AwsConfig
)
of
ok ->
ok;
{error, Reason} ->
?SLOG(debug, #{msg => "complete_multipart_fail", key => Key, reason => Reason}),
{error, Reason}
end.
-spec abort_multipart(client(), key(), upload_id()) -> ok_or_error(term()).
abort_multipart(#{bucket := Bucket, headers := Headers, aws_config := AwsConfig}, Key, UploadId) ->
case erlcloud_s3:abort_multipart(Bucket, erlcloud_key(Key), UploadId, [], Headers, AwsConfig) of
ok ->
ok;
{error, Reason} ->
?SLOG(debug, #{msg => "abort_multipart_fail", key => Key, reason => Reason}),
{error, Reason}
end.
-spec list(client(), s3_options()) -> ok_or_error(proplists:proplist(), term()).
list(#{bucket := Bucket, aws_config := AwsConfig}, Options) ->
try erlcloud_s3:list_objects(Bucket, Options, AwsConfig) of
Result -> {ok, Result}
catch
error:{aws_error, Reason} ->
?SLOG(debug, #{msg => "list_objects_fail", bucket => Bucket, reason => Reason}),
{error, Reason}
end.
-spec uri(client(), key()) -> iodata().
uri(#{bucket := Bucket, aws_config := AwsConfig, url_expire_time := ExpireTime}, Key) ->
erlcloud_s3:make_presigned_v4_url(ExpireTime, Bucket, get, erlcloud_key(Key), [], AwsConfig).
-spec format(client()) -> term().
format(#{aws_config := AwsConfig} = Client) ->
Client#{aws_config => AwsConfig#aws_config{secret_access_key = "***"}}.
%%--------------------------------------------------------------------
%% Internal functions
%%--------------------------------------------------------------------
upload_options(#{acl := Acl}) when Acl =/= undefined ->
[
{acl, Acl}
];
upload_options(#{}) ->
[].
headers(#{headers := Headers}) ->
headers_user_to_erlcloud_request(Headers);
headers(#{}) ->
[].
aws_config(#{
scheme := Scheme,
host := Host,
port := Port,
access_key_id := AccessKeyId,
secret_access_key := SecretAccessKey,
http_pool := HttpPool,
pool_type := PoolType,
request_timeout := Timeout,
max_retries := MaxRetries
}) ->
#aws_config{
s3_scheme = Scheme,
s3_host = Host,
s3_port = Port,
s3_bucket_access_method = path,
s3_bucket_after_host = true,
access_key_id = AccessKeyId,
secret_access_key = SecretAccessKey,
http_client = request_fun(
HttpPool, PoolType, with_default(MaxRetries, ?DEFAULT_MAX_RETRIES)
),
%% This value will be transparently passed to ehttpc
timeout = with_default(Timeout, ?DEFAULT_REQUEST_TIMEOUT),
%% We rely on retry mechanism of ehttpc
retry_num = 1
}.
-spec request_fun(http_pool(), pool_type(), non_neg_integer()) -> erlcloud_httpc:request_fun().
request_fun(HttpPool, PoolType, MaxRetries) ->
fun(Url, Method, Headers, Body, Timeout, _Config) ->
with_path_and_query_only(Url, fun(PathQuery) ->
Request = make_request(
Method, PathQuery, headers_erlcloud_request_to_ehttpc(Headers), Body
),
case pick_worker_safe(HttpPool, PoolType) of
{ok, Worker} ->
ehttpc_request(Worker, Method, Request, Timeout, MaxRetries);
{error, Reason} ->
?SLOG(error, #{
msg => "s3_request_fun_fail",
reason => Reason,
http_pool => HttpPool,
pool_type => PoolType,
method => Method,
request => Request,
timeout => Timeout,
max_retries => MaxRetries
}),
{error, Reason}
end
end)
end.
ehttpc_request(HttpPool, Method, Request, Timeout, MaxRetries) ->
try timer:tc(fun() -> ehttpc:request(HttpPool, Method, Request, Timeout, MaxRetries) end) of
{Time, {ok, StatusCode, RespHeaders}} ->
?SLOG(info, #{
msg => "s3_ehttpc_request_ok",
status_code => StatusCode,
headers => RespHeaders,
time => Time
}),
{ok, {
{StatusCode, undefined}, headers_ehttpc_to_erlcloud_response(RespHeaders), undefined
}};
{Time, {ok, StatusCode, RespHeaders, RespBody}} ->
?SLOG(info, #{
msg => "s3_ehttpc_request_ok",
status_code => StatusCode,
headers => RespHeaders,
body => RespBody,
time => Time
}),
{ok, {
{StatusCode, undefined}, headers_ehttpc_to_erlcloud_response(RespHeaders), RespBody
}};
{Time, {error, Reason}} ->
?SLOG(error, #{
msg => "s3_ehttpc_request_fail",
reason => Reason,
timeout => Timeout,
pool => HttpPool,
method => Method,
time => Time
}),
{error, Reason}
catch
error:badarg ->
?SLOG(error, #{
msg => "s3_ehttpc_request_fail",
reason => badarg,
timeout => Timeout,
pool => HttpPool,
method => Method
}),
{error, no_ehttpc_pool};
error:Reason ->
?SLOG(error, #{
msg => "s3_ehttpc_request_fail",
reason => Reason,
timeout => Timeout,
pool => HttpPool,
method => Method
}),
{error, Reason}
end.
pick_worker_safe(HttpPool, PoolType) ->
try
{ok, pick_worker(HttpPool, PoolType)}
catch
error:badarg ->
{error, no_ehttpc_pool}
end.
pick_worker(HttpPool, random) ->
ehttpc_pool:pick_worker(HttpPool);
pick_worker(HttpPool, hash) ->
ehttpc_pool:pick_worker(HttpPool, self()).
-define(IS_BODY_EMPTY(Body), (Body =:= undefined orelse Body =:= <<>>)).
-define(NEEDS_NO_BODY(Method), (Method =:= get orelse Method =:= head orelse Method =:= delete)).
make_request(Method, PathQuery, Headers, Body) when
?IS_BODY_EMPTY(Body) andalso ?NEEDS_NO_BODY(Method)
->
{PathQuery, Headers};
make_request(_Method, PathQuery, Headers, Body) when ?IS_BODY_EMPTY(Body) ->
{PathQuery, [{<<"content-length">>, <<"0">>} | Headers], <<>>};
make_request(_Method, PathQuery, Headers, Body) ->
{PathQuery, Headers, Body}.
format_request({PathQuery, Headers, _Body}) -> {PathQuery, Headers, <<"...">>};
format_request({PathQuery, Headers}) -> {PathQuery, Headers}.
with_path_and_query_only(Url, Fun) ->
case string:split(Url, "//", leading) of
[_Scheme, UrlRem] ->
case string:split(UrlRem, "/", leading) of
[_HostPort, PathQuery] ->
Fun([$/ | PathQuery]);
_ ->
{error, {invalid_url, Url}}
end;
_ ->
{error, {invalid_url, Url}}
end.
%% We need some header conversions to tie the emqx_s3, erlcloud and ehttpc APIs together.
%% The request header flow is:
%% UserHeaders -> [emqx_s3_client API] -> ErlcloudRequestHeaders0 ->
%% -> [erlcloud API] -> ErlcloudRequestHeaders1 -> [emqx_s3_client injected request_fun] ->
%% -> EhttpcRequestHeaders -> [ehttpc API]
%% The response header flow is:
%% [ehttpc API] -> EhttpcResponseHeaders -> [emqx_s3_client injected request_fun] ->
%% -> ErlcloudResponseHeaders0 -> [erlcloud API] -> [emqx_s3_client API]
%% UserHeders (emqx_s3 API headers) are maps with string/binary keys.
%% ErlcloudRequestHeaders are lists of tuples with string keys and iodata values
%% ErlcloudResponseHeders are lists of tuples with lower case string keys and iodata values.
%% EhttpcHeaders are lists of tuples with binary keys and iodata values.
%% Users provide headers as a map, but erlcloud expects a list of tuples with string keys and values.
headers_user_to_erlcloud_request(UserHeaders) ->
[{to_list_string(K), V} || {K, V} <- maps:to_list(UserHeaders)].
%% Ehttpc returns operates on headers as a list of tuples with binary keys.
%% Erlcloud expects a list of tuples with string values and lowcase string keys
%% from the underlying http library.
headers_ehttpc_to_erlcloud_response(EhttpcHeaders) ->
[{string:to_lower(to_list_string(K)), to_list_string(V)} || {K, V} <- EhttpcHeaders].
%% Ehttpc expects a list of tuples with binary keys.
%% Erlcloud provides a list of tuples with string keys.
headers_erlcloud_request_to_ehttpc(ErlcloudHeaders) ->
[{to_binary(K), V} || {K, V} <- ErlcloudHeaders].
join_headers(ErlcloudHeaders, UserSpecialHeaders) ->
ErlcloudHeaders ++ headers_user_to_erlcloud_request(UserSpecialHeaders).
to_binary(Val) when is_list(Val) -> list_to_binary(Val);
to_binary(Val) when is_binary(Val) -> Val.
to_list_string(Val) when is_binary(Val) ->
binary_to_list(Val);
to_list_string(Val) when is_list(Val) ->
Val.
erlcloud_key(Characters) ->
binary_to_list(unicode:characters_to_binary(Characters)).
response_property(Name, Props) ->
case proplists:get_value(Name, Props) of
undefined ->
%% This schould not happen for valid S3 implementations
?SLOG(error, #{
msg => "missing_s3_response_property",
name => Name,
props => Props
}),
error({missing_s3_response_property, Name});
Value ->
Value
end.
with_default(undefined, Default) -> Default;
with_default(Value, _Default) -> Value.

View File

@ -0,0 +1,388 @@
%%--------------------------------------------------------------------
%% Copyright (c) 2022-2023 EMQ Technologies Co., Ltd. All Rights Reserved.
%%--------------------------------------------------------------------
-module(emqx_s3_profile_conf).
-behaviour(gen_server).
-include_lib("emqx/include/logger.hrl").
-include_lib("emqx/include/types.hrl").
-include_lib("snabbkaffe/include/snabbkaffe.hrl").
-include("src/emqx_s3.hrl").
-export([
start_link/2,
child_spec/2
]).
-export([
checkout_config/1,
checkout_config/2,
checkin_config/1,
checkin_config/2,
update_config/2,
update_config/3
]).
-export([
init/1,
handle_call/3,
handle_cast/2,
handle_info/2,
terminate/2,
code_change/3
]).
%% For test purposes
-export([
client_config/2,
start_http_pool/2,
id/1
]).
-define(DEFAULT_CALL_TIMEOUT, 5000).
-define(DEFAULT_HTTP_POOL_TIMEOUT, 60000).
-define(DEAFULT_HTTP_POOL_CLEANUP_INTERVAL, 60000).
-define(SAFE_CALL_VIA_GPROC(ProfileId, Message, Timeout),
?SAFE_CALL_VIA_GPROC(id(ProfileId), Message, Timeout, profile_not_found)
).
-spec child_spec(emqx_s3:profile_id(), emqx_s3:profile_config()) -> supervisor:child_spec().
child_spec(ProfileId, ProfileConfig) ->
#{
id => ProfileId,
start => {?MODULE, start_link, [ProfileId, ProfileConfig]},
restart => permanent,
shutdown => 5000,
type => worker,
modules => [?MODULE]
}.
-spec start_link(emqx_s3:profile_id(), emqx_s3:profile_config()) -> gen_server:start_ret().
start_link(ProfileId, ProfileConfig) ->
gen_server:start_link(?VIA_GPROC(id(ProfileId)), ?MODULE, [ProfileId, ProfileConfig], []).
-spec update_config(emqx_s3:profile_id(), emqx_s3:profile_config()) -> ok_or_error(term()).
update_config(ProfileId, ProfileConfig) ->
update_config(ProfileId, ProfileConfig, ?DEFAULT_CALL_TIMEOUT).
-spec update_config(emqx_s3:profile_id(), emqx_s3:profile_config(), timeout()) ->
ok_or_error(term()).
update_config(ProfileId, ProfileConfig, Timeout) ->
?SAFE_CALL_VIA_GPROC(ProfileId, {update_config, ProfileConfig}, Timeout).
-spec checkout_config(emqx_s3:profile_id()) ->
{ok, emqx_s3_client:config(), emqx_s3_uploader:config()} | {error, profile_not_found}.
checkout_config(ProfileId) ->
checkout_config(ProfileId, ?DEFAULT_CALL_TIMEOUT).
-spec checkout_config(emqx_s3:profile_id(), timeout()) ->
{ok, emqx_s3_client:config(), emqx_s3_uploader:config()} | {error, profile_not_found}.
checkout_config(ProfileId, Timeout) ->
?SAFE_CALL_VIA_GPROC(ProfileId, {checkout_config, self()}, Timeout).
-spec checkin_config(emqx_s3:profile_id()) -> ok | {error, profile_not_found}.
checkin_config(ProfileId) ->
checkin_config(ProfileId, ?DEFAULT_CALL_TIMEOUT).
-spec checkin_config(emqx_s3:profile_id(), timeout()) -> ok | {error, profile_not_found}.
checkin_config(ProfileId, Timeout) ->
?SAFE_CALL_VIA_GPROC(ProfileId, {checkin_config, self()}, Timeout).
%%--------------------------------------------------------------------
%% gen_server callbacks
%%--------------------------------------------------------------------
init([ProfileId, ProfileConfig]) ->
_ = process_flag(trap_exit, true),
ok = cleanup_profile_pools(ProfileId),
case start_http_pool(ProfileId, ProfileConfig) of
{ok, PoolName} ->
HttpPoolCleanupInterval = http_pool_cleanup_interval(ProfileConfig),
{ok, #{
profile_id => ProfileId,
profile_config => ProfileConfig,
client_config => client_config(ProfileConfig, PoolName),
uploader_config => uploader_config(ProfileConfig),
pool_name => PoolName,
pool_clients => emqx_s3_profile_http_pool_clients:create_table(),
%% We don't expose these options to users currently, but use in tests
http_pool_timeout => http_pool_timeout(ProfileConfig),
http_pool_cleanup_interval => HttpPoolCleanupInterval,
outdated_pool_cleanup_tref => erlang:send_after(
HttpPoolCleanupInterval, self(), cleanup_outdated
)
}};
{error, Reason} ->
{stop, Reason}
end.
handle_call(
{checkout_config, Pid},
_From,
#{
client_config := ClientConfig,
uploader_config := UploaderConfig
} = State
) ->
ok = register_client(Pid, State),
{reply, {ok, ClientConfig, UploaderConfig}, State};
handle_call({checkin_config, Pid}, _From, State) ->
ok = unregister_client(Pid, State),
{reply, ok, State};
handle_call(
{update_config, NewProfileConfig},
_From,
#{profile_id := ProfileId} = State
) ->
case update_http_pool(ProfileId, NewProfileConfig, State) of
{ok, PoolName} ->
NewState = State#{
profile_config => NewProfileConfig,
client_config => client_config(NewProfileConfig, PoolName),
uploader_config => uploader_config(NewProfileConfig),
http_pool_timeout => http_pool_timeout(NewProfileConfig),
http_pool_cleanup_interval => http_pool_cleanup_interval(NewProfileConfig),
pool_name => PoolName
},
{reply, ok, NewState};
{error, Reason} ->
{reply, {error, Reason}, State}
end;
handle_call(_Request, _From, State) ->
{reply, {error, not_implemented}, State}.
handle_cast(_Request, State) ->
{noreply, State}.
handle_info({'DOWN', _Ref, process, Pid, _Reason}, State) ->
ok = unregister_client(Pid, State),
{noreply, State};
handle_info(cleanup_outdated, #{http_pool_cleanup_interval := HttpPoolCleanupInterval} = State0) ->
%% Maybe cleanup asynchoronously
ok = cleanup_outdated_pools(State0),
State1 = State0#{
outdated_pool_cleanup_tref => erlang:send_after(
HttpPoolCleanupInterval, self(), cleanup_outdated
)
},
{noreply, State1};
handle_info(_Info, State) ->
{noreply, State}.
terminate(_Reason, #{profile_id := ProfileId}) ->
cleanup_profile_pools(ProfileId).
code_change(_OldVsn, State, _Extra) ->
{ok, State}.
%%--------------------------------------------------------------------
%% Internal functions
%%--------------------------------------------------------------------
id(ProfileId) ->
{?MODULE, ProfileId}.
client_config(ProfileConfig, PoolName) ->
HTTPOpts = maps:get(transport_options, ProfileConfig, #{}),
#{
scheme => scheme(HTTPOpts),
host => maps:get(host, ProfileConfig),
port => maps:get(port, ProfileConfig),
url_expire_time => maps:get(url_expire_time, ProfileConfig),
headers => maps:get(headers, HTTPOpts, #{}),
acl => maps:get(acl, ProfileConfig, undefined),
bucket => maps:get(bucket, ProfileConfig),
access_key_id => maps:get(access_key_id, ProfileConfig, undefined),
secret_access_key => maps:get(secret_access_key, ProfileConfig, undefined),
request_timeout => maps:get(request_timeout, HTTPOpts, undefined),
max_retries => maps:get(max_retries, HTTPOpts, undefined),
pool_type => maps:get(pool_type, HTTPOpts, random),
http_pool => PoolName
}.
uploader_config(#{max_part_size := MaxPartSize, min_part_size := MinPartSize} = _ProfileConfig) ->
#{
min_part_size => MinPartSize,
max_part_size => MaxPartSize
}.
scheme(#{ssl := #{enable := true}}) -> "https://";
scheme(_TransportOpts) -> "http://".
start_http_pool(ProfileId, ProfileConfig) ->
HttpConfig = http_config(ProfileConfig),
PoolName = pool_name(ProfileId),
case do_start_http_pool(PoolName, HttpConfig) of
ok ->
ok = emqx_s3_profile_http_pools:register(ProfileId, PoolName),
ok = ?tp(debug, "s3_start_http_pool", #{pool_name => PoolName, profile_id => ProfileId}),
{ok, PoolName};
{error, _} = Error ->
Error
end.
update_http_pool(ProfileId, ProfileConfig, #{pool_name := OldPoolName} = State) ->
HttpConfig = http_config(ProfileConfig),
OldHttpConfig = old_http_config(State),
case OldHttpConfig =:= HttpConfig of
true ->
{ok, OldPoolName};
false ->
PoolName = pool_name(ProfileId),
case do_start_http_pool(PoolName, HttpConfig) of
ok ->
ok = set_old_pool_outdated(State),
ok = emqx_s3_profile_http_pools:register(ProfileId, PoolName),
{ok, PoolName};
{error, _} = Error ->
Error
end
end.
pool_name(ProfileId) ->
iolist_to_binary([
<<"s3-http-">>,
profile_id_to_bin(ProfileId),
<<"-">>,
integer_to_binary(erlang:system_time(millisecond)),
<<"-">>,
integer_to_binary(erlang:unique_integer([positive]))
]).
profile_id_to_bin(Atom) when is_atom(Atom) -> atom_to_binary(Atom, utf8);
profile_id_to_bin(Bin) when is_binary(Bin) -> Bin.
old_http_config(#{profile_config := ProfileConfig}) -> http_config(ProfileConfig).
set_old_pool_outdated(#{
profile_id := ProfileId, pool_name := PoolName, http_pool_timeout := HttpPoolTimeout
}) ->
_ = emqx_s3_profile_http_pools:set_outdated(ProfileId, PoolName, HttpPoolTimeout),
ok.
cleanup_profile_pools(ProfileId) ->
lists:foreach(
fun(PoolName) ->
ok = stop_http_pool(ProfileId, PoolName)
end,
emqx_s3_profile_http_pools:all(ProfileId)
).
register_client(Pid, #{profile_id := ProfileId, pool_clients := PoolClients, pool_name := PoolName}) ->
MRef = monitor(process, Pid),
ok = emqx_s3_profile_http_pool_clients:register(PoolClients, Pid, MRef, PoolName),
_ = emqx_s3_profile_http_pools:register_client(ProfileId, PoolName),
ok.
unregister_client(
Pid,
#{
profile_id := ProfileId, pool_clients := PoolClients, pool_name := PoolName
}
) ->
case emqx_s3_profile_http_pool_clients:unregister(PoolClients, Pid) of
undefined ->
ok;
{MRef, PoolName} ->
true = erlang:demonitor(MRef, [flush]),
_ = emqx_s3_profile_http_pools:unregister_client(ProfileId, PoolName),
ok;
{MRef, OutdatedPoolName} ->
true = erlang:demonitor(MRef, [flush]),
ClientNum = emqx_s3_profile_http_pools:unregister_client(ProfileId, OutdatedPoolName),
maybe_stop_outdated_pool(ProfileId, OutdatedPoolName, ClientNum)
end.
maybe_stop_outdated_pool(ProfileId, OutdatedPoolName, 0) ->
ok = stop_http_pool(ProfileId, OutdatedPoolName);
maybe_stop_outdated_pool(_ProfileId, _OutdatedPoolName, _ClientNum) ->
ok.
cleanup_outdated_pools(#{profile_id := ProfileId}) ->
lists:foreach(
fun(PoolName) ->
ok = stop_http_pool(ProfileId, PoolName)
end,
emqx_s3_profile_http_pools:outdated(ProfileId)
).
%%--------------------------------------------------------------------
%% HTTP Pool implementation dependent functions
%%--------------------------------------------------------------------
http_config(
#{
host := Host,
port := Port,
transport_options := #{
pool_type := PoolType,
pool_size := PoolSize,
enable_pipelining := EnablePipelining,
connect_timeout := ConnectTimeout
} = HTTPOpts
}
) ->
{Transport, TransportOpts} =
case scheme(HTTPOpts) of
"http://" ->
{tcp, []};
"https://" ->
SSLOpts = emqx_tls_lib:to_client_opts(maps:get(ssl, HTTPOpts)),
{tls, SSLOpts}
end,
NTransportOpts = maybe_ipv6_probe(TransportOpts, maps:get(ipv6_probe, HTTPOpts, true)),
[
{host, Host},
{port, Port},
{connect_timeout, ConnectTimeout},
{keepalive, 30000},
{pool_type, PoolType},
{pool_size, PoolSize},
{transport, Transport},
{transport_opts, NTransportOpts},
{enable_pipelining, EnablePipelining}
].
maybe_ipv6_probe(TransportOpts, true) ->
emqx_utils:ipv6_probe(TransportOpts);
maybe_ipv6_probe(TransportOpts, false) ->
TransportOpts.
http_pool_cleanup_interval(ProfileConfig) ->
maps:get(
http_pool_cleanup_interval, ProfileConfig, ?DEAFULT_HTTP_POOL_CLEANUP_INTERVAL
).
http_pool_timeout(ProfileConfig) ->
maps:get(
http_pool_timeout, ProfileConfig, ?DEFAULT_HTTP_POOL_TIMEOUT
).
stop_http_pool(ProfileId, PoolName) ->
case ehttpc_sup:stop_pool(PoolName) of
ok ->
ok;
{error, Reason} ->
?SLOG(error, #{msg => "ehttpc_pool_stop_fail", pool_name => PoolName, reason => Reason}),
ok
end,
ok = emqx_s3_profile_http_pools:unregister(ProfileId, PoolName),
ok = ?tp(debug, "s3_stop_http_pool", #{pool_name => PoolName}).
do_start_http_pool(PoolName, HttpConfig) ->
?SLOG(warning, #{msg => "s3_start_http_pool", pool_name => PoolName, config => HttpConfig}),
case ehttpc_sup:start_pool(PoolName, HttpConfig) of
{ok, _} ->
?SLOG(warning, #{msg => "s3_start_http_pool_success", pool_name => PoolName}),
ok;
{error, _} = Error ->
?SLOG(error, #{msg => "s3_start_http_pool_fail", pool_name => PoolName, error => Error}),
Error
end.

View File

@ -0,0 +1,36 @@
%%--------------------------------------------------------------------
%% Copyright (c) 2022-2023 EMQ Technologies Co., Ltd. All Rights Reserved.
%%--------------------------------------------------------------------
-module(emqx_s3_profile_http_pool_clients).
-export([
create_table/0,
register/4,
unregister/2
]).
-define(TAB, ?MODULE).
-spec create_table() -> ok.
create_table() ->
ets:new(?TAB, [
private,
set
]).
-spec register(ets:tid(), pid(), reference(), emqx_s3_profile_http_pools:pool_name()) -> ok.
register(Tab, Pid, MRef, PoolName) ->
true = ets:insert(Tab, {Pid, {MRef, PoolName}}),
ok.
-spec unregister(ets:tid(), pid()) ->
{reference(), emqx_s3_profile_http_pools:pool_name()} | undefined.
unregister(Tab, Pid) ->
case ets:take(Tab, Pid) of
[{Pid, {MRef, PoolName}}] ->
{MRef, PoolName};
[] ->
undefined
end.

View File

@ -0,0 +1,124 @@
%%--------------------------------------------------------------------
%% Copyright (c) 2022-2023 EMQ Technologies Co., Ltd. All Rights Reserved.
%%--------------------------------------------------------------------
-module(emqx_s3_profile_http_pools).
-include_lib("stdlib/include/ms_transform.hrl").
-export([
create_table/0,
register/2,
unregister/2,
register_client/2,
unregister_client/2,
set_outdated/3,
outdated/1,
all/1
]).
-export_type([pool_name/0]).
-define(TAB, ?MODULE).
-type pool_name() :: ecpool:pool_name().
-type pool_key() :: {emqx_s3:profile_id(), pool_name()}.
-record(pool, {
key :: pool_key(),
client_count = 0 :: integer(),
deadline = undefined :: undefined | integer(),
extra = #{} :: map()
}).
-spec create_table() -> ok.
create_table() ->
_ = ets:new(?TAB, [
named_table,
public,
ordered_set,
{keypos, #pool.key},
{read_concurrency, true},
{write_concurrency, true}
]),
ok.
-spec register(emqx_s3:profile_id(), pool_name()) ->
ok.
register(ProfileId, PoolName) ->
Key = key(ProfileId, PoolName),
true = ets:insert(?TAB, #pool{
key = Key,
client_count = 0,
deadline = undefined,
extra = #{}
}),
ok.
-spec unregister(emqx_s3:profile_id(), pool_name()) ->
ok.
unregister(ProfileId, PoolName) ->
Key = key(ProfileId, PoolName),
true = ets:delete(?TAB, Key),
ok.
-spec register_client(emqx_s3:profile_id(), pool_name()) ->
integer().
register_client(ProfileId, PoolName) ->
Key = key(ProfileId, PoolName),
ets:update_counter(?TAB, Key, {#pool.client_count, 1}).
-spec unregister_client(emqx_s3:profile_id(), pool_name()) ->
integer().
unregister_client(ProfileId, PoolName) ->
Key = key(ProfileId, PoolName),
try
ets:update_counter(?TAB, Key, {#pool.client_count, -1})
catch
error:badarg ->
undefined
end.
-spec set_outdated(emqx_s3:profile_id(), pool_name(), integer()) ->
ok.
set_outdated(ProfileId, PoolName, Timeout) ->
Key = key(ProfileId, PoolName),
Now = erlang:monotonic_time(millisecond),
_ = ets:update_element(?TAB, Key, {#pool.deadline, Now + Timeout}),
ok.
-spec outdated(emqx_s3:profile_id()) ->
[pool_name()].
outdated(ProfileId) ->
Now = erlang:monotonic_time(millisecond),
MS = ets:fun2ms(
fun(#pool{key = {CurProfileId, CurPoolName}, deadline = CurDeadline}) when
CurProfileId =:= ProfileId andalso
CurDeadline =/= undefined andalso CurDeadline < Now
->
CurPoolName
end
),
ets:select(?TAB, MS).
-spec all(emqx_s3:profile_id()) ->
[pool_name()].
all(ProfileId) ->
MS = ets:fun2ms(
fun(#pool{key = {CurProfileId, CurPoolName}}) when CurProfileId =:= ProfileId ->
CurPoolName
end
),
ets:select(?TAB, MS).
%%--------------------------------------------------------------------
%% Helpers
%%--------------------------------------------------------------------
key(ProfileId, PoolName) ->
{ProfileId, PoolName}.

View File

@ -0,0 +1,48 @@
%%--------------------------------------------------------------------
%% Copyright (c) 2022-2023 EMQ Technologies Co., Ltd. All Rights Reserved.
%%--------------------------------------------------------------------
-module(emqx_s3_profile_sup).
-behaviour(supervisor).
-include_lib("emqx/include/types.hrl").
-export([
start_link/2,
child_spec/2
]).
-export([init/1]).
-spec start_link(emqx_s3:profile_id(), emqx_s3:profile_config()) -> supervisor:start_ret().
start_link(ProfileId, ProfileConfig) ->
supervisor:start_link(?MODULE, [ProfileId, ProfileConfig]).
-spec child_spec(emqx_s3:profile_id(), emqx_s3:profile_config()) -> supervisor:child_spec().
child_spec(ProfileId, ProfileConfig) ->
#{
id => ProfileId,
start => {?MODULE, start_link, [ProfileId, ProfileConfig]},
restart => permanent,
shutdown => 5000,
type => supervisor,
modules => [?MODULE]
}.
%%--------------------------------------------------------------------
%% supervisor callbacks
%%-------------------------------------------------------------------
init([ProfileId, ProfileConfig]) ->
SupFlags = #{
strategy => one_for_one,
intensity => 10,
period => 5
},
ChildSpecs = [
%% Order matters
emqx_s3_profile_conf:child_spec(ProfileId, ProfileConfig),
emqx_s3_profile_uploader_sup:child_spec(ProfileId)
],
{ok, {SupFlags, ChildSpecs}}.

View File

@ -0,0 +1,75 @@
%%--------------------------------------------------------------------
%% Copyright (c) 2022-2023 EMQ Technologies Co., Ltd. All Rights Reserved.
%%--------------------------------------------------------------------
-module(emqx_s3_profile_uploader_sup).
-behaviour(supervisor).
-include_lib("emqx/include/types.hrl").
-include_lib("emqx/include/logger.hrl").
-include("src/emqx_s3.hrl").
-export([
start_link/1,
child_spec/1,
id/1,
start_uploader/2
]).
-export([init/1]).
-export_type([id/0]).
-type id() :: {?MODULE, emqx_s3:profile_id()}.
-spec start_link(emqx_s3:profile_id()) -> supervisor:start_ret().
start_link(ProfileId) ->
supervisor:start_link(?VIA_GPROC(id(ProfileId)), ?MODULE, [ProfileId]).
-spec child_spec(emqx_s3:profile_id()) -> supervisor:child_spec().
child_spec(ProfileId) ->
#{
id => id(ProfileId),
start => {?MODULE, start_link, [ProfileId]},
restart => permanent,
shutdown => 5000,
type => supervisor,
modules => [?MODULE]
}.
-spec id(emqx_s3:profile_id()) -> id().
id(ProfileId) ->
{?MODULE, ProfileId}.
-spec start_uploader(emqx_s3:profile_id(), emqx_s3_uploader:opts()) ->
supervisor:start_ret() | {error, profile_not_found}.
start_uploader(ProfileId, Opts) ->
try supervisor:start_child(?VIA_GPROC(id(ProfileId)), [Opts]) of
Result -> Result
catch
exit:{noproc, _} -> {error, profile_not_found}
end.
%%--------------------------------------------------------------------
%% supervisor callbacks
%%-------------------------------------------------------------------
init([ProfileId]) ->
SupFlags = #{
strategy => simple_one_for_one,
intensity => 10,
period => 5
},
ChildSpecs = [
#{
id => emqx_s3_uploader,
start => {emqx_s3_uploader, start_link, [ProfileId]},
restart => temporary,
shutdown => 5000,
type => worker,
modules => [emqx_s3_uploader]
}
],
{ok, {SupFlags, ChildSpecs}}.

View File

@ -0,0 +1,172 @@
%%--------------------------------------------------------------------
%% Copyright (c) 2022-2023 EMQ Technologies Co., Ltd. All Rights Reserved.
%%--------------------------------------------------------------------
-module(emqx_s3_schema).
-include_lib("typerefl/include/types.hrl").
-include_lib("hocon/include/hoconsc.hrl").
-import(hoconsc, [mk/2, ref/2]).
-export([roots/0, fields/1, namespace/0, tags/0, desc/1]).
-export([translate/1]).
-export([translate/2]).
roots() ->
[s3].
namespace() -> "s3".
tags() ->
[<<"S3">>].
fields(s3) ->
[
{access_key_id,
mk(
string(),
#{
desc => ?DESC("access_key_id"),
required => false
}
)},
{secret_access_key,
mk(
string(),
#{
desc => ?DESC("secret_access_key"),
required => false,
sensitive => true
}
)},
{bucket,
mk(
string(),
#{
desc => ?DESC("bucket"),
required => true
}
)},
{host,
mk(
string(),
#{
desc => ?DESC("host"),
required => true
}
)},
{port,
mk(
pos_integer(),
#{
desc => ?DESC("port"),
required => true
}
)},
{url_expire_time,
mk(
emqx_schema:duration_s(),
#{
default => "1h",
desc => ?DESC("url_expire_time"),
required => false
}
)},
{min_part_size,
mk(
emqx_schema:bytesize(),
#{
default => "5mb",
desc => ?DESC("min_part_size"),
required => true,
validator => fun part_size_validator/1
}
)},
{max_part_size,
mk(
emqx_schema:bytesize(),
#{
default => "5gb",
desc => ?DESC("max_part_size"),
required => true,
validator => fun part_size_validator/1
}
)},
{acl,
mk(
hoconsc:enum([
private,
public_read,
public_read_write,
authenticated_read,
bucket_owner_read,
bucket_owner_full_control
]),
#{
desc => ?DESC("acl"),
required => false
}
)},
{transport_options,
mk(
ref(?MODULE, transport_options),
#{
desc => ?DESC("transport_options"),
required => false
}
)}
];
fields(transport_options) ->
[
{ipv6_probe,
mk(
boolean(),
#{
default => true,
desc => ?DESC("ipv6_probe"),
required => false
}
)}
] ++
props_without(
[base_url, max_retries, retry_interval, request], emqx_connector_http:fields(config)
) ++
props_with(
[headers, max_retries, request_timeout], emqx_connector_http:fields("request")
).
desc(s3) ->
"S3 connection options";
desc(transport_options) ->
"Options for the HTTP transport layer used by the S3 client".
translate(Conf) ->
translate(Conf, #{}).
translate(Conf, OptionsIn) ->
Options = maps:merge(#{atom_key => true}, OptionsIn),
#{s3 := TranslatedConf} = hocon_tconf:check_plain(
emqx_s3_schema, #{<<"s3">> => Conf}, Options, [s3]
),
TranslatedConf.
%%--------------------------------------------------------------------
%% Helpers
%%--------------------------------------------------------------------
props_with(Keys, Proplist) ->
lists:filter(fun({K, _}) -> lists:member(K, Keys) end, Proplist).
props_without(Keys, Proplist) ->
lists:filter(fun({K, _}) -> not lists:member(K, Keys) end, Proplist).
part_size_validator(PartSizeLimit) ->
case
PartSizeLimit >= 5 * 1024 * 1024 andalso
PartSizeLimit =< 5 * 1024 * 1024 * 1024
of
true -> ok;
false -> {error, "must be at least 5mb and less than 5gb"}
end.

View File

@ -0,0 +1,47 @@
%%--------------------------------------------------------------------
%% Copyright (c) 2022-2023 EMQ Technologies Co., Ltd. All Rights Reserved.
%%--------------------------------------------------------------------
-module(emqx_s3_sup).
-behaviour(supervisor).
-include_lib("emqx/include/types.hrl").
-export([
start_link/0,
start_profile/2,
stop_profile/1
]).
-export([init/1]).
-spec start_link() -> supervisor:start_ret().
start_link() ->
supervisor:start_link({local, ?MODULE}, ?MODULE, []).
-spec start_profile(emqx_s3:profile_id(), emqx_s3:profile_config()) -> supervisor:startchild_ret().
start_profile(ProfileId, ProfileConfig) ->
supervisor:start_child(?MODULE, emqx_s3_profile_sup:child_spec(ProfileId, ProfileConfig)).
-spec stop_profile(emqx_s3:profile_id()) -> ok_or_error(term()).
stop_profile(ProfileId) ->
case supervisor:terminate_child(?MODULE, ProfileId) of
ok ->
supervisor:delete_child(?MODULE, ProfileId);
{error, Reason} ->
{error, Reason}
end.
%%--------------------------------------------------------------------
%% supervisor callbacks
%%-------------------------------------------------------------------
init([]) ->
ok = emqx_s3_profile_http_pools:create_table(),
SupFlags = #{
strategy => one_for_one,
intensity => 10,
period => 5
},
{ok, {SupFlags, []}}.

View File

@ -0,0 +1,329 @@
%%--------------------------------------------------------------------
%% Copyright (c) 2022-2023 EMQ Technologies Co., Ltd. All Rights Reserved.
%%--------------------------------------------------------------------
-module(emqx_s3_uploader).
-include_lib("emqx/include/types.hrl").
-behaviour(gen_statem).
-export([
start_link/2,
write/2,
write/3,
complete/1,
complete/2,
abort/1,
abort/2
]).
-export([
init/1,
callback_mode/0,
handle_event/4,
terminate/3,
code_change/4,
format_status/1,
format_status/2
]).
-export_type([opts/0]).
-type opts() :: #{
key := string(),
headers => emqx_s3_client:headers()
}.
-type data() :: #{
profile_id := emqx_s3:profile_id(),
client := emqx_s3_client:client(),
key := emqx_s3_client:key(),
buffer := iodata(),
buffer_size := non_neg_integer(),
min_part_size := pos_integer(),
max_part_size := pos_integer(),
upload_id := undefined | emqx_s3_client:upload_id(),
etags := [emqx_s3_client:etag()],
part_number := emqx_s3_client:part_number(),
headers := emqx_s3_client:headers()
}.
%% 5MB
-define(DEFAULT_MIN_PART_SIZE, 5242880).
%% 5GB
-define(DEFAULT_MAX_PART_SIZE, 5368709120).
-define(DEFAULT_TIMEOUT, 30000).
-spec start_link(emqx_s3:profile_id(), opts()) -> gen_statem:start_ret().
start_link(ProfileId, #{key := Key} = Opts) when is_list(Key) ->
gen_statem:start_link(?MODULE, [ProfileId, Opts], []).
-spec write(pid(), iodata()) -> ok_or_error(term()).
write(Pid, WriteData) ->
write(Pid, WriteData, ?DEFAULT_TIMEOUT).
-spec write(pid(), iodata(), timeout()) -> ok_or_error(term()).
write(Pid, WriteData, Timeout) ->
gen_statem:call(Pid, {write, wrap(WriteData)}, Timeout).
-spec complete(pid()) -> ok_or_error(term()).
complete(Pid) ->
complete(Pid, ?DEFAULT_TIMEOUT).
-spec complete(pid(), timeout()) -> ok_or_error(term()).
complete(Pid, Timeout) ->
gen_statem:call(Pid, complete, Timeout).
-spec abort(pid()) -> ok_or_error(term()).
abort(Pid) ->
abort(Pid, ?DEFAULT_TIMEOUT).
-spec abort(pid(), timeout()) -> ok_or_error(term()).
abort(Pid, Timeout) ->
gen_statem:call(Pid, abort, Timeout).
%%--------------------------------------------------------------------
%% gen_statem callbacks
%%--------------------------------------------------------------------
callback_mode() -> handle_event_function.
init([ProfileId, #{key := Key} = Opts]) ->
process_flag(trap_exit, true),
{ok, ClientConfig, UploaderConfig} = emqx_s3_profile_conf:checkout_config(ProfileId),
Client = client(ClientConfig),
{ok, upload_not_started, #{
profile_id => ProfileId,
client => Client,
headers => maps:get(headers, Opts, #{}),
key => Key,
buffer => [],
buffer_size => 0,
min_part_size => maps:get(min_part_size, UploaderConfig, ?DEFAULT_MIN_PART_SIZE),
max_part_size => maps:get(max_part_size, UploaderConfig, ?DEFAULT_MAX_PART_SIZE),
upload_id => undefined,
etags => [],
part_number => 1
}}.
handle_event({call, From}, {write, WriteDataWrapped}, State, Data0) ->
WriteData = unwrap(WriteDataWrapped),
case is_valid_part(WriteData, Data0) of
true ->
handle_write(State, From, WriteData, Data0);
false ->
{keep_state_and_data, {reply, From, {error, {too_large, iolist_size(WriteData)}}}}
end;
handle_event({call, From}, complete, upload_not_started, Data0) ->
case put_object(Data0) of
ok ->
{stop_and_reply, normal, {reply, From, ok}};
{error, _} = Error ->
{stop_and_reply, Error, {reply, From, Error}, Data0}
end;
handle_event({call, From}, complete, upload_started, Data0) ->
case complete_upload(Data0) of
{ok, Data1} ->
{stop_and_reply, normal, {reply, From, ok}, Data1};
{error, _} = Error ->
{stop_and_reply, Error, {reply, From, Error}, Data0}
end;
handle_event({call, From}, abort, upload_not_started, _Data) ->
{stop_and_reply, normal, {reply, From, ok}};
handle_event({call, From}, abort, upload_started, Data0) ->
case abort_upload(Data0) of
ok ->
{stop_and_reply, normal, {reply, From, ok}};
{error, _} = Error ->
{stop_and_reply, Error, {reply, From, Error}, Data0}
end.
handle_write(upload_not_started, From, WriteData, Data0) ->
Data1 = append_buffer(Data0, WriteData),
case maybe_start_upload(Data1) of
not_started ->
{keep_state, Data1, {reply, From, ok}};
{started, Data2} ->
case upload_part(Data2) of
{ok, Data3} ->
{next_state, upload_started, Data3, {reply, From, ok}};
{error, _} = Error ->
{stop_and_reply, Error, {reply, From, Error}, Data2}
end;
{error, _} = Error ->
{stop_and_reply, Error, {reply, From, Error}, Data1}
end;
handle_write(upload_started, From, WriteData, Data0) ->
Data1 = append_buffer(Data0, WriteData),
case maybe_upload_part(Data1) of
{ok, Data2} ->
{keep_state, Data2, {reply, From, ok}};
{error, _} = Error ->
{stop_and_reply, Error, {reply, From, Error}, Data1}
end.
terminate(Reason, _State, #{client := Client, upload_id := UploadId, key := Key}) when
(UploadId =/= undefined) andalso (Reason =/= normal)
->
emqx_s3_client:abort_multipart(Client, Key, UploadId);
terminate(_Reason, _State, _Data) ->
ok.
code_change(_OldVsn, StateName, State, _Extra) ->
{ok, StateName, State}.
format_status(#{data := #{client := Client} = Data} = Status) ->
Status#{
data => Data#{
client => emqx_s3_client:format(Client),
buffer => [<<"...">>]
}
}.
format_status(_Opt, [PDict, State, #{client := Client} = Data]) ->
#{
data => Data#{
client => emqx_s3_client:format(Client),
buffer => [<<"...">>]
},
state => State,
pdict => PDict
}.
%%--------------------------------------------------------------------
%% Internal functions
%%--------------------------------------------------------------------
-spec maybe_start_upload(data()) -> not_started | {started, data()} | {error, term()}.
maybe_start_upload(#{buffer_size := BufferSize, min_part_size := MinPartSize} = Data) ->
case BufferSize >= MinPartSize of
true ->
start_upload(Data);
false ->
not_started
end.
-spec start_upload(data()) -> {started, data()} | {error, term()}.
start_upload(#{client := Client, key := Key, headers := Headers} = Data) ->
case emqx_s3_client:start_multipart(Client, Headers, Key) of
{ok, UploadId} ->
NewData = Data#{upload_id => UploadId},
{started, NewData};
{error, _} = Error ->
Error
end.
-spec maybe_upload_part(data()) -> ok_or_error(data(), term()).
maybe_upload_part(#{buffer_size := BufferSize, min_part_size := MinPartSize} = Data) ->
case BufferSize >= MinPartSize of
true ->
upload_part(Data);
false ->
{ok, Data}
end.
-spec upload_part(data()) -> ok_or_error(data(), term()).
upload_part(#{buffer_size := 0} = Data) ->
{ok, Data};
upload_part(
#{
client := Client,
key := Key,
upload_id := UploadId,
buffer := Buffer,
part_number := PartNumber,
etags := ETags
} = Data
) ->
case emqx_s3_client:upload_part(Client, Key, UploadId, PartNumber, Buffer) of
{ok, ETag} ->
NewData = Data#{
buffer => [],
buffer_size => 0,
part_number => PartNumber + 1,
etags => [{PartNumber, ETag} | ETags]
},
{ok, NewData};
{error, _} = Error ->
Error
end.
-spec complete_upload(data()) -> ok_or_error(data(), term()).
complete_upload(
#{
client := Client,
key := Key,
upload_id := UploadId
} = Data0
) ->
case upload_part(Data0) of
{ok, #{etags := ETags} = Data1} ->
case
emqx_s3_client:complete_multipart(
Client, Key, UploadId, lists:reverse(ETags)
)
of
ok ->
{ok, Data1};
{error, _} = Error ->
Error
end;
{error, _} = Error ->
Error
end.
-spec abort_upload(data()) -> ok_or_error(term()).
abort_upload(
#{
client := Client,
key := Key,
upload_id := UploadId
}
) ->
case emqx_s3_client:abort_multipart(Client, Key, UploadId) of
ok ->
ok;
{error, _} = Error ->
Error
end.
-spec put_object(data()) -> ok_or_error(term()).
put_object(
#{
client := Client,
key := Key,
buffer := Buffer,
headers := Headers
}
) ->
case emqx_s3_client:put_object(Client, Headers, Key, Buffer) of
ok ->
ok;
{error, _} = Error ->
Error
end.
-spec append_buffer(data(), iodata()) -> data().
append_buffer(#{buffer := Buffer, buffer_size := BufferSize} = Data, WriteData) ->
Data#{
buffer => [Buffer, WriteData],
buffer_size => BufferSize + iolist_size(WriteData)
}.
-compile({inline, [wrap/1, unwrap/1]}).
wrap(Data) ->
fun() -> Data end.
unwrap(WrappedData) ->
WrappedData().
is_valid_part(WriteData, #{max_part_size := MaxPartSize, buffer_size := BufferSize}) ->
BufferSize + iolist_size(WriteData) =< MaxPartSize.
client(Config) ->
emqx_s3_client:create(Config).

View File

@ -0,0 +1,29 @@
-----BEGIN CERTIFICATE-----
MIIE5DCCAswCCQCF3o0gIdaNDjANBgkqhkiG9w0BAQsFADA0MRIwEAYDVQQKDAlF
TVFYIFRlc3QxHjAcBgNVBAMMFUNlcnRpZmljYXRlIEF1dGhvcml0eTAeFw0yMTEy
MzAwODQxMTFaFw00OTA1MTcwODQxMTFaMDQxEjAQBgNVBAoMCUVNUVggVGVzdDEe
MBwGA1UEAwwVQ2VydGlmaWNhdGUgQXV0aG9yaXR5MIICIjANBgkqhkiG9w0BAQEF
AAOCAg8AMIICCgKCAgEAqmqSrxyH16j63QhqGLT1UO8I+m6BM3HfnJQM8laQdtJ0
WgHqCh0/OphH3S7v4SfF4fNJDEJWMWuuzJzU9cTqHPLzhvo3+ZHcMIENgtY2p2Cf
7AQjEqFViEDyv2ZWNEe76BJeShntdY5NZr4gIPar99YGG/Ln8YekspleV+DU38rE
EX9WzhgBr02NN9z4NzIxeB+jdvPnxcXs3WpUxzfnUjOQf/T1tManvSdRbFmKMbxl
A8NLYK3oAYm8EbljWUINUNN6loqYhbigKv8bvo5S4xvRqmX86XB7sc0SApngtNcg
O0EKn8z/KVPDskE+8lMfGMiU2e2Tzw6Rph57mQPOPtIp5hPiKRik7ST9n0p6piXW
zRLplJEzSjf40I1u+VHmpXlWI/Fs8b1UkDSMiMVJf0LyWb4ziBSZOY2LtZzWHbWj
LbNgxQcwSS29tKgUwfEFmFcm+iOM59cPfkl2IgqVLh5h4zmKJJbfQKSaYb5fcKRf
50b1qsN40VbR3Pk/0lJ0/WqgF6kZCExmT1qzD5HJES/5grjjKA4zIxmHOVU86xOF
ouWvtilVR4PGkzmkFvwK5yRhBUoGH/A9BurhqOc0QCGay1kqHQFA6se4JJS+9KOS
x8Rn1Nm6Pi7sd6Le3cKmHTlyl5a/ofKqTCX2Qh+v/7y62V1V1wnoh3ipRjdPTnMC
AwEAATANBgkqhkiG9w0BAQsFAAOCAgEARCqaocvlMFUQjtFtepO2vyG1krn11xJ0
e7md26i+g8SxCCYqQ9IqGmQBg0Im8fyNDKRN/LZoj5+A4U4XkG1yya91ZIrPpWyF
KUiRAItchNj3g1kHmI2ckl1N//6Kpx3DPaS7qXZaN3LTExf6Ph+StE1FnS0wVF+s
tsNIf6EaQ+ZewW3pjdlLeAws3jvWKUkROc408Ngvx74zbbKo/zAC4tz8oH9ZcpsT
WD8enVVEeUQKI6ItcpZ9HgTI9TFWgfZ1vYwvkoRwNIeabYI62JKmLEo2vGfGwWKr
c+GjnJ/tlVI2DpPljfWOnQ037/7yyJI/zo65+HPRmGRD6MuW/BdPDYOvOZUTcQKh
kANi5THSbJJgZcG3jb1NLebaUQ1H0zgVjn0g3KhUV+NJQYk8RQ7rHtB+MySqTKlM
kRkRjfTfR0Ykxpks7Mjvsb6NcZENf08ZFPd45+e/ptsxpiKu4e4W4bV7NZDvNKf9
0/aD3oGYNMiP7s+KJ1lRSAjnBuG21Yk8FpzG+yr8wvJhV8aFgNQ5wIH86SuUTmN0
5bVzFEIcUejIwvGoQEctNHBlOwHrb7zmB6OwyZeMapdXBQ+9UDhYg8ehDqdDOdfn
wsBcnjD2MwNhlE1hjL+tZWLNwSHiD6xx3LvNoXZu2HK8Cp3SOrkE69cFghYMIZZb
T+fp6tNL6LE=
-----END CERTIFICATE-----

Some files were not shown because too many files have changed in this diff Show More