diff --git a/.ci/docker-compose-file/.env b/.ci/docker-compose-file/.env index 3b00b454f..12bc988bf 100644 --- a/.ci/docker-compose-file/.env +++ b/.ci/docker-compose-file/.env @@ -7,6 +7,7 @@ INFLUXDB_TAG=2.5.0 TDENGINE_TAG=3.0.2.4 DYNAMO_TAG=1.21.0 CASSANDRA_TAG=3.11.6 +MINIO_TAG=RELEASE.2023-03-20T20-16-18Z OPENTS_TAG=9aa7f88 MS_IMAGE_ADDR=mcr.microsoft.com/mssql/server diff --git a/.ci/docker-compose-file/docker-compose-minio-tcp.yaml b/.ci/docker-compose-file/docker-compose-minio-tcp.yaml new file mode 100644 index 000000000..fa78e4426 --- /dev/null +++ b/.ci/docker-compose-file/docker-compose-minio-tcp.yaml @@ -0,0 +1,21 @@ +version: '3.7' + +services: + minio: + hostname: minio + image: quay.io/minio/minio:${MINIO_TAG} + command: server --address ":9000" --console-address ":9001" /minio-data + expose: + - "9000" + - "9001" + ports: + - "9000:9000" + - "9001:9001" + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:9000/minio/health/live"] + interval: 30s + timeout: 5s + retries: 3 + networks: + emqx_bridge: + diff --git a/.ci/docker-compose-file/docker-compose-minio-tls.yaml b/.ci/docker-compose-file/docker-compose-minio-tls.yaml new file mode 100644 index 000000000..4999cccb5 --- /dev/null +++ b/.ci/docker-compose-file/docker-compose-minio-tls.yaml @@ -0,0 +1,23 @@ +version: '3.7' + +services: + minio_tls: + hostname: minio-tls + image: quay.io/minio/minio:${MINIO_TAG} + command: server --certs-dir /etc/certs --address ":9100" --console-address ":9101" /minio-data + volumes: + - ./certs/server.crt:/etc/certs/public.crt + - ./certs/server.key:/etc/certs/private.key + expose: + - "9100" + - "9101" + ports: + - "9100:9100" + - "9101:9101" + healthcheck: + test: ["CMD", "curl", "-k", "-f", "https://localhost:9100/minio/health/live"] + interval: 30s + timeout: 5s + retries: 3 + networks: + emqx_bridge: diff --git a/.ci/docker-compose-file/docker-compose-toxiproxy.yaml b/.ci/docker-compose-file/docker-compose-toxiproxy.yaml index d91118406..88c2cb61a 100644 --- a/.ci/docker-compose-file/docker-compose-toxiproxy.yaml +++ b/.ci/docker-compose-file/docker-compose-toxiproxy.yaml @@ -13,19 +13,37 @@ services: volumes: - "./toxiproxy.json:/config/toxiproxy.json" ports: + # Toxiproxy management API - 8474:8474 + # InfluxDB - 8086:8086 + # InfluxDB TLS - 8087:8087 + # SQL Server - 11433:1433 + # MySQL - 13306:3306 + # MySQL TLS - 13307:3307 + # PostgreSQL - 15432:5432 + # PostgreSQL TLS - 15433:5433 + # TDEngine - 16041:6041 + # DynamoDB - 18000:8000 + # RocketMQ - 19876:9876 + # Cassandra - 19042:9042 + # Cassandra TLS - 19142:9142 + # S3 + - 19000:19000 + # S3 TLS + - 19100:19100 + # IOTDB - 14242:4242 - 28080:18080 command: diff --git a/.ci/docker-compose-file/toxiproxy.json b/.ci/docker-compose-file/toxiproxy.json index dee3134f5..c266b2792 100644 --- a/.ci/docker-compose-file/toxiproxy.json +++ b/.ci/docker-compose-file/toxiproxy.json @@ -131,5 +131,17 @@ "listen": "0.0.0.0:18080", "upstream": "iotdb:18080", "enabled": true + }, + { + "name": "minio_tcp", + "listen": "0.0.0.0:19000", + "upstream": "minio:9000", + "enabled": true + }, + { + "name": "minio_tls", + "listen": "0.0.0.0:19100", + "upstream": "minio-tls:9100", + "enabled": true } ] diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index 5db0f4465..a45d9af59 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -8,6 +8,7 @@ /apps/emqx_connector/ @emqx/emqx-review-board @JimMoen /apps/emqx_dashboard/ @emqx/emqx-review-board @JimMoen @lafirest /apps/emqx_exhook/ @emqx/emqx-review-board @JimMoen @lafirest +/apps/emqx_ft/ @emqx/emqx-review-board @savonarola @keynslug /apps/emqx_gateway/ @emqx/emqx-review-board @lafirest /apps/emqx_management/ @emqx/emqx-review-board @lafirest @sstrigler /apps/emqx_plugin_libs/ @emqx/emqx-review-board @lafirest diff --git a/.github/workflows/run_test_cases.yaml b/.github/workflows/run_test_cases.yaml index b82b545df..c28ebc0bc 100644 --- a/.github/workflows/run_test_cases.yaml +++ b/.github/workflows/run_test_cases.yaml @@ -193,6 +193,7 @@ jobs: INFLUXDB_TAG: "2.5.0" TDENGINE_TAG: "3.0.2.4" OPENTS_TAG: "9aa7f88" + MINIO_TAG: "RELEASE.2023-03-20T20-16-18Z" PROFILE: ${{ matrix.profile }} CT_COVER_EXPORT_PREFIX: ${{ matrix.profile }}-${{ matrix.otp }} run: ./scripts/ct/run.sh --ci --app ${{ matrix.app }} diff --git a/apps/emqx/include/asserts.hrl b/apps/emqx/include/asserts.hrl index 98d8e72fc..1be725d2d 100644 --- a/apps/emqx/include/asserts.hrl +++ b/apps/emqx/include/asserts.hrl @@ -29,3 +29,17 @@ ) ) ). + +-define(assertInclude(PATTERN, LIST), + ?assert( + lists:any( + fun(X__Elem_) -> + case X__Elem_ of + PATTERN -> true; + _ -> false + end + end, + LIST + ) + ) +). diff --git a/apps/emqx/priv/bpapi.versions b/apps/emqx/priv/bpapi.versions index dceb38c47..aabe80b7d 100644 --- a/apps/emqx/priv/bpapi.versions +++ b/apps/emqx/priv/bpapi.versions @@ -9,12 +9,16 @@ {emqx_bridge,4}. {emqx_broker,1}. {emqx_cm,1}. +{emqx_cm,2}. {emqx_conf,1}. {emqx_conf,2}. {emqx_dashboard,1}. {emqx_delayed,1}. {emqx_eviction_agent,1}. {emqx_exhook,1}. +{emqx_ft_storage_exporter_fs,1}. +{emqx_ft_storage_fs,1}. +{emqx_ft_storage_fs_reader,1}. {emqx_gateway_api_listeners,1}. {emqx_gateway_cm,1}. {emqx_gateway_http,1}. diff --git a/apps/emqx/src/emqx_channel.erl b/apps/emqx/src/emqx_channel.erl index 69e0a55f7..45a97711d 100644 --- a/apps/emqx/src/emqx_channel.erl +++ b/apps/emqx/src/emqx_channel.erl @@ -717,9 +717,13 @@ do_publish(_PacketId, Msg = #message{qos = ?QOS_0}, Channel) -> {ok, NChannel}; do_publish(PacketId, Msg = #message{qos = ?QOS_1}, Channel) -> PubRes = emqx_broker:publish(Msg), - RC = puback_reason_code(PubRes), - NChannel = ensure_quota(PubRes, Channel), - handle_out(puback, {PacketId, RC}, NChannel); + RC = puback_reason_code(PacketId, Msg, PubRes), + case RC of + undefined -> + {ok, Channel}; + _Value -> + do_finish_publish(PacketId, PubRes, RC, Channel) + end; do_publish( PacketId, Msg = #message{qos = ?QOS_2}, @@ -727,7 +731,7 @@ do_publish( ) -> case emqx_session:publish(ClientInfo, PacketId, Msg, Session) of {ok, PubRes, NSession} -> - RC = puback_reason_code(PubRes), + RC = pubrec_reason_code(PubRes), NChannel0 = set_session(NSession, Channel), NChannel1 = ensure_timer(await_timer, NChannel0), NChannel2 = ensure_quota(PubRes, NChannel1), @@ -740,6 +744,10 @@ do_publish( handle_out(disconnect, RC, Channel) end. +do_finish_publish(PacketId, PubRes, RC, Channel) -> + NChannel = ensure_quota(PubRes, Channel), + handle_out(puback, {PacketId, RC}, NChannel). + ensure_quota(_, Channel = #channel{quota = infinity}) -> Channel; ensure_quota(PubRes, Channel = #channel{quota = Limiter}) -> @@ -759,9 +767,14 @@ ensure_quota(PubRes, Channel = #channel{quota = Limiter}) -> ensure_timer(quota_timer, Intv, Channel#channel{quota = NLimiter}) end. --compile({inline, [puback_reason_code/1]}). -puback_reason_code([]) -> ?RC_NO_MATCHING_SUBSCRIBERS; -puback_reason_code([_ | _]) -> ?RC_SUCCESS. +-compile({inline, [pubrec_reason_code/1]}). +pubrec_reason_code([]) -> ?RC_NO_MATCHING_SUBSCRIBERS; +pubrec_reason_code([_ | _]) -> ?RC_SUCCESS. + +puback_reason_code(PacketId, Msg, [] = PubRes) -> + emqx_hooks:run_fold('message.puback', [PacketId, Msg, PubRes], ?RC_NO_MATCHING_SUBSCRIBERS); +puback_reason_code(PacketId, Msg, [_ | _] = PubRes) -> + emqx_hooks:run_fold('message.puback', [PacketId, Msg, PubRes], ?RC_SUCCESS). -compile({inline, [after_message_acked/3]}). after_message_acked(ClientInfo, Msg, PubAckProps) -> @@ -1266,6 +1279,8 @@ handle_info(die_if_test = Info, Channel) -> {ok, Channel}; handle_info({disconnect, ReasonCode, ReasonName, Props}, Channel) -> handle_out(disconnect, {ReasonCode, ReasonName, Props}, Channel); +handle_info({puback, PacketId, PubRes, RC}, Channel) -> + do_finish_publish(PacketId, PubRes, RC, Channel); handle_info(Info, Channel) -> ?SLOG(error, #{msg => "unexpected_info", info => Info}), {ok, Channel}. diff --git a/apps/emqx/src/emqx_cm.erl b/apps/emqx/src/emqx_cm.erl index 66c1db36e..9a3b4e39b 100644 --- a/apps/emqx/src/emqx_cm.erl +++ b/apps/emqx/src/emqx_cm.erl @@ -97,6 +97,7 @@ mark_channel_connected/1, mark_channel_disconnected/1, get_connected_client_count/0, + takeover_finish/2, do_kick_session/3, do_get_chan_stats/2, @@ -188,11 +189,13 @@ unregister_channel(ClientId) when is_binary(ClientId) -> ok. %% @private -do_unregister_channel(Chan) -> +do_unregister_channel({_ClientId, ChanPid} = Chan) -> ok = emqx_cm_registry:unregister_channel(Chan), true = ets:delete(?CHAN_CONN_TAB, Chan), true = ets:delete(?CHAN_INFO_TAB, Chan), - ets:delete_object(?CHAN_TAB, Chan). + ets:delete_object(?CHAN_TAB, Chan), + ok = emqx_hooks:run('channel.unregistered', [ChanPid]), + true. -spec connection_closed(emqx_types:clientid()) -> true. connection_closed(ClientId) -> @@ -220,7 +223,7 @@ do_get_chan_info(ClientId, ChanPid) -> -spec get_chan_info(emqx_types:clientid(), chan_pid()) -> maybe(emqx_types:infos()). get_chan_info(ClientId, ChanPid) -> - wrap_rpc(emqx_cm_proto_v1:get_chan_info(ClientId, ChanPid)). + wrap_rpc(emqx_cm_proto_v2:get_chan_info(ClientId, ChanPid)). %% @doc Update infos of the channel. -spec set_chan_info(emqx_types:clientid(), emqx_types:attrs()) -> boolean(). @@ -250,7 +253,7 @@ do_get_chan_stats(ClientId, ChanPid) -> -spec get_chan_stats(emqx_types:clientid(), chan_pid()) -> maybe(emqx_types:stats()). get_chan_stats(ClientId, ChanPid) -> - wrap_rpc(emqx_cm_proto_v1:get_chan_stats(ClientId, ChanPid)). + wrap_rpc(emqx_cm_proto_v2:get_chan_stats(ClientId, ChanPid)). %% @doc Set channel's stats. -spec set_chan_stats(emqx_types:clientid(), emqx_types:stats()) -> boolean(). @@ -312,13 +315,7 @@ open_session(false, ClientInfo = #{clientid := ClientId}, ConnInfo) -> }}; {living, ConnMod, ChanPid, Session} -> ok = emqx_session:resume(ClientInfo, Session), - case - request_stepdown( - {takeover, 'end'}, - ConnMod, - ChanPid - ) - of + case wrap_rpc(emqx_cm_proto_v2:takeover_finish(ConnMod, ChanPid)) of {ok, Pendings} -> Session1 = emqx_persistent_session:persist( ClientInfo, ConnInfo, Session @@ -408,6 +405,13 @@ takeover_session(ClientId) -> takeover_session(ClientId, ChanPid) end. +takeover_finish(ConnMod, ChanPid) -> + request_stepdown( + {takeover, 'end'}, + ConnMod, + ChanPid + ). + takeover_session(ClientId, Pid) -> try do_takeover_session(ClientId, Pid) @@ -437,7 +441,7 @@ do_takeover_session(ClientId, ChanPid) when node(ChanPid) == node() -> end end; do_takeover_session(ClientId, ChanPid) -> - wrap_rpc(emqx_cm_proto_v1:takeover_session(ClientId, ChanPid)). + wrap_rpc(emqx_cm_proto_v2:takeover_session(ClientId, ChanPid)). %% @doc Discard all the sessions identified by the ClientId. -spec discard_session(emqx_types:clientid()) -> ok. @@ -539,7 +543,7 @@ do_kick_session(Action, ClientId, ChanPid) -> %% @private This function is shared for session 'kick' and 'discard' (as the first arg Action). kick_session(Action, ClientId, ChanPid) -> try - wrap_rpc(emqx_cm_proto_v1:kick_session(Action, ClientId, ChanPid)) + wrap_rpc(emqx_cm_proto_v2:kick_session(Action, ClientId, ChanPid)) catch Error:Reason -> %% This should mostly be RPC failures. @@ -759,7 +763,7 @@ do_get_chann_conn_mod(ClientId, ChanPid) -> end. get_chann_conn_mod(ClientId, ChanPid) -> - wrap_rpc(emqx_cm_proto_v1:get_chann_conn_mod(ClientId, ChanPid)). + wrap_rpc(emqx_cm_proto_v2:get_chann_conn_mod(ClientId, ChanPid)). mark_channel_connected(ChanPid) -> ?tp(emqx_cm_connected_client_count_inc, #{chan_pid => ChanPid}), diff --git a/apps/emqx/src/emqx_maybe.erl b/apps/emqx/src/emqx_maybe.erl new file mode 100644 index 000000000..5b5d5e94b --- /dev/null +++ b/apps/emqx/src/emqx_maybe.erl @@ -0,0 +1,90 @@ +%%-------------------------------------------------------------------- +%% Copyright (c) 2017-2023 EMQ Technologies Co., Ltd. All Rights Reserved. +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. +%%-------------------------------------------------------------------- + +-module(emqx_maybe). + +-include_lib("emqx/include/types.hrl"). + +-export([to_list/1]). +-export([from_list/1]). +-export([define/2]). +-export([apply/2]). + +-type t(T) :: maybe(T). +-export_type([t/1]). + +-spec to_list(maybe(A)) -> [A]. +to_list(undefined) -> + []; +to_list(Term) -> + [Term]. + +-spec from_list([A]) -> maybe(A). +from_list([]) -> + undefined; +from_list([Term]) -> + Term. + +-spec define(maybe(A), B) -> A | B. +define(undefined, Term) -> + Term; +define(Term, _) -> + Term. + +%% @doc Apply a function to a maybe argument. +-spec apply(fun((A) -> maybe(A)), maybe(A)) -> + maybe(A). +apply(_Fun, undefined) -> + undefined; +apply(Fun, Term) when is_function(Fun) -> + erlang:apply(Fun, [Term]). + +%% + +-ifdef(TEST). +-include_lib("eunit/include/eunit.hrl"). + +to_list_test_() -> + [ + ?_assertEqual([], to_list(undefined)), + ?_assertEqual([42], to_list(42)) + ]. + +from_list_test_() -> + [ + ?_assertEqual(undefined, from_list([])), + ?_assertEqual(3.1415, from_list([3.1415])), + ?_assertError(_, from_list([1, 2, 3])) + ]. + +define_test_() -> + [ + ?_assertEqual(42, define(42, undefined)), + ?_assertEqual(<<"default">>, define(undefined, <<"default">>)), + ?_assertEqual(undefined, define(undefined, undefined)) + ]. + +apply_test_() -> + [ + ?_assertEqual(<<"42">>, ?MODULE:apply(fun erlang:integer_to_binary/1, 42)), + ?_assertEqual(undefined, ?MODULE:apply(fun erlang:integer_to_binary/1, undefined)), + ?_assertEqual(undefined, ?MODULE:apply(fun crash/1, undefined)) + ]. + +crash(_) -> + erlang:error(crashed). + +-endif. diff --git a/apps/emqx/src/emqx_types.erl b/apps/emqx/src/emqx_types.erl index 96d75daba..75bba8d59 100644 --- a/apps/emqx/src/emqx_types.erl +++ b/apps/emqx/src/emqx_types.erl @@ -101,6 +101,8 @@ -export_type([oom_policy/0]). +-export_type([takeover_data/0]). + -type proto_ver() :: ?MQTT_PROTO_V3 | ?MQTT_PROTO_V4 @@ -242,3 +244,5 @@ max_heap_size => non_neg_integer(), enable => boolean() }. + +-type takeover_data() :: map(). diff --git a/apps/emqx/src/emqx_wdgraph.erl b/apps/emqx/src/emqx_wdgraph.erl new file mode 100644 index 000000000..bd7f58e7c --- /dev/null +++ b/apps/emqx/src/emqx_wdgraph.erl @@ -0,0 +1,208 @@ +%%-------------------------------------------------------------------- +%% Copyright (c) 2020-2023 EMQ Technologies Co., Ltd. All Rights Reserved. +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. +%%-------------------------------------------------------------------- + +%% Weighted directed graph. +%% +%% Purely functional, built on top of a single `gb_tree`. +%% Weights are currently assumed to be non-negative numbers, hovewer +%% presumably anything that is ≄ 0 should work (but won't typecheck 🄲). + +-module(emqx_wdgraph). + +-export([new/0]). +-export([insert_edge/5]). +-export([find_edge/3]). +-export([get_edges/2]). + +-export([fold/3]). + +-export([find_shortest_path/3]). + +-export_type([t/0]). +-export_type([t/2]). +-export_type([weight/0]). + +-type gnode() :: term(). +-type weight() :: _NonNegative :: number(). +-type label() :: term(). + +-opaque t() :: t(gnode(), label()). +-opaque t(Node, Label) :: gb_trees:tree({Node}, [{Node, weight(), Label}]). + +%% + +-spec new() -> t(_, _). +new() -> + gb_trees:empty(). + +%% Add an edge. +%% Nodes are not expected to exist beforehand, and created lazily. +%% There could be only one edge between each pair of nodes, this function +%% replaces any existing edge in the graph. +-spec insert_edge(Node, Node, weight(), Label, t(Node, Label)) -> t(Node, Label). +insert_edge(From, To, Weight, EdgeLabel, G) -> + Edges = tree_lookup({From}, G, []), + EdgesNext = lists:keystore(To, 1, Edges, {To, Weight, EdgeLabel}), + tree_update({From}, EdgesNext, G). + +%% Find exising edge between two nodes, if any. +-spec find_edge(Node, Node, t(Node, Label)) -> {weight(), Label} | false. +find_edge(From, To, G) -> + Edges = tree_lookup({From}, G, []), + case lists:keyfind(To, 1, Edges) of + {To, Weight, Label} -> + {Weight, Label}; + false -> + false + end. + +%% Get all edges from the given node. +-spec get_edges(Node, t(Node, Label)) -> [{Node, weight(), Label}]. +get_edges(Node, G) -> + tree_lookup({Node}, G, []). + +-spec fold(FoldFun, Acc, t(Node, Label)) -> Acc when + FoldFun :: fun((Node, _Edge :: {Node, weight(), Label}, Acc) -> Acc). +fold(FoldFun, Acc, G) -> + fold_iterator(FoldFun, Acc, gb_trees:iterator(G)). + +fold_iterator(FoldFun, AccIn, It) -> + case gb_trees:next(It) of + {{Node}, Edges = [_ | _], ItNext} -> + AccNext = lists:foldl( + fun(Edge = {_To, _Weight, _Label}, Acc) -> + FoldFun(Node, Edge, Acc) + end, + AccIn, + Edges + ), + fold_iterator(FoldFun, AccNext, ItNext); + none -> + AccIn + end. + +% Find the shortest path between two nodes, if any. If the path exists, return list +% of edge labels along that path. +% This is a Dijkstra shortest path algorithm. It is one-way right now, for +% simplicity sake. +-spec find_shortest_path(Node, Node, t(Node, Label)) -> [Label] | {false, _StoppedAt :: Node}. +find_shortest_path(From, To, G1) -> + % NOTE + % If `From` and `To` are the same node, then path is `[]` even if this + % node does not exist in the graph. + G2 = set_cost(From, 0, [], G1), + case find_shortest_path(From, 0, To, G2) of + {true, G3} -> + construct_path(From, To, [], G3); + {false, Last} -> + {false, Last} + end. + +find_shortest_path(Node, Cost, Target, G1) -> + Edges = get_edges(Node, G1), + G2 = update_neighbours(Node, Cost, Edges, G1), + case take_queued(G2) of + {Target, _NextCost, G3} -> + {true, G3}; + {Next, NextCost, G3} -> + find_shortest_path(Next, NextCost, Target, G3); + none -> + {false, Node} + end. + +construct_path(From, From, Acc, _) -> + Acc; +construct_path(From, To, Acc, G) -> + {Prev, Label} = get_label(To, G), + construct_path(From, Prev, [Label | Acc], G). + +update_neighbours(Node, NodeCost, Edges, G1) -> + lists:foldl( + fun(Edge, GAcc) -> update_neighbour(Node, NodeCost, Edge, GAcc) end, + G1, + Edges + ). + +update_neighbour(Node, NodeCost, {Neighbour, Weight, Label}, G) -> + case is_visited(G, Neighbour) of + false -> + CurrentCost = get_cost(Neighbour, G), + case NodeCost + Weight of + NeighCost when NeighCost < CurrentCost -> + set_cost(Neighbour, NeighCost, {Node, Label}, G); + _ -> + G + end; + true -> + G + end. + +get_cost(Node, G) -> + case tree_lookup({Node, cost}, G, inf) of + {Cost, _Label} -> + Cost; + inf -> + inf + end. + +get_label(Node, G) -> + {_Cost, Label} = gb_trees:get({Node, cost}, G), + Label. + +set_cost(Node, Cost, Label, G1) -> + G3 = + case tree_lookup({Node, cost}, G1, inf) of + {CostWas, _Label} -> + {true, G2} = gb_trees:take({queued, CostWas, Node}, G1), + gb_trees:insert({queued, Cost, Node}, true, G2); + inf -> + gb_trees:insert({queued, Cost, Node}, true, G1) + end, + G4 = tree_update({Node, cost}, {Cost, Label}, G3), + G4. + +take_queued(G1) -> + It = gb_trees:iterator_from({queued, 0, 0}, G1), + case gb_trees:next(It) of + {{queued, Cost, Node} = Index, true, _It} -> + {Node, Cost, gb_trees:delete(Index, G1)}; + _ -> + none + end. + +is_visited(G, Node) -> + case tree_lookup({Node, cost}, G, inf) of + inf -> + false; + {Cost, _Label} -> + not tree_lookup({queued, Cost, Node}, G, false) + end. + +tree_lookup(Index, Tree, Default) -> + case gb_trees:lookup(Index, Tree) of + {value, V} -> + V; + none -> + Default + end. + +tree_update(Index, Value, Tree) -> + case gb_trees:is_defined(Index, Tree) of + true -> + gb_trees:update(Index, Value, Tree); + false -> + gb_trees:insert(Index, Value, Tree) + end. diff --git a/apps/emqx/src/proto/emqx_cm_proto_v2.erl b/apps/emqx/src/proto/emqx_cm_proto_v2.erl new file mode 100644 index 000000000..4208df97f --- /dev/null +++ b/apps/emqx/src/proto/emqx_cm_proto_v2.erl @@ -0,0 +1,88 @@ +%%-------------------------------------------------------------------- +%% Copyright (c) 2022 EMQ Technologies Co., Ltd. All Rights Reserved. +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. +%%-------------------------------------------------------------------- + +-module(emqx_cm_proto_v2). + +-behaviour(emqx_bpapi). + +-export([ + introduced_in/0, + + lookup_client/2, + kickout_client/2, + + get_chan_stats/2, + get_chan_info/2, + get_chann_conn_mod/2, + + takeover_session/2, + takeover_finish/2, + kick_session/3 +]). + +-include("bpapi.hrl"). +-include("src/emqx_cm.hrl"). + +introduced_in() -> + "5.0.0". + +-spec kickout_client(node(), emqx_types:clientid()) -> ok | {badrpc, _}. +kickout_client(Node, ClientId) -> + rpc:call(Node, emqx_cm, kick_session, [ClientId]). + +-spec lookup_client(node(), {clientid, emqx_types:clientid()} | {username, emqx_types:username()}) -> + [emqx_cm:channel_info()] | {badrpc, _}. +lookup_client(Node, Key) -> + rpc:call(Node, emqx_cm, lookup_client, [Key]). + +-spec get_chan_stats(emqx_types:clientid(), emqx_cm:chan_pid()) -> emqx_types:stats() | {badrpc, _}. +get_chan_stats(ClientId, ChanPid) -> + rpc:call(node(ChanPid), emqx_cm, do_get_chan_stats, [ClientId, ChanPid], ?T_GET_INFO * 2). + +-spec get_chan_info(emqx_types:clientid(), emqx_cm:chan_pid()) -> emqx_types:infos() | {badrpc, _}. +get_chan_info(ClientId, ChanPid) -> + rpc:call(node(ChanPid), emqx_cm, do_get_chan_info, [ClientId, ChanPid], ?T_GET_INFO * 2). + +-spec get_chann_conn_mod(emqx_types:clientid(), emqx_cm:chan_pid()) -> + module() | undefined | {badrpc, _}. +get_chann_conn_mod(ClientId, ChanPid) -> + rpc:call(node(ChanPid), emqx_cm, do_get_chann_conn_mod, [ClientId, ChanPid], ?T_GET_INFO * 2). + +-spec takeover_session(emqx_types:clientid(), emqx_cm:chan_pid()) -> + none + | {expired | persistent, emqx_session:session()} + | {living, _ConnMod :: atom(), emqx_cm:chan_pid(), emqx_session:session()} + | {badrpc, _}. +takeover_session(ClientId, ChanPid) -> + rpc:call(node(ChanPid), emqx_cm, takeover_session, [ClientId, ChanPid], ?T_TAKEOVER * 2). + +-spec takeover_finish(module(), emqx_cm:chan_pid()) -> + {ok, emqx_type:takeover_data()} + | {ok, list(emqx_type:deliver()), emqx_type:takeover_data()} + | {error, term()} + | {badrpc, _}. +takeover_finish(ConnMod, ChanPid) -> + erpc:call( + node(ChanPid), + emqx_cm, + takeover_finish, + [ConnMod, ChanPid], + ?T_TAKEOVER * 2 + ). + +-spec kick_session(kick | discard, emqx_types:clientid(), emqx_cm:chan_pid()) -> ok | {badrpc, _}. +kick_session(Action, ClientId, ChanPid) -> + rpc:call(node(ChanPid), emqx_cm, do_kick_session, [Action, ClientId, ChanPid], ?T_KICK * 2). diff --git a/apps/emqx/test/emqx_channel_SUITE.erl b/apps/emqx/test/emqx_channel_SUITE.erl index 2b7280b32..0b88ff045 100644 --- a/apps/emqx/test/emqx_channel_SUITE.erl +++ b/apps/emqx/test/emqx_channel_SUITE.erl @@ -1133,7 +1133,7 @@ t_ws_cookie_init(_) -> ?assertMatch(#{ws_cookie := WsCookie}, emqx_channel:info(clientinfo, Channel)). %%-------------------------------------------------------------------- -%% Test cases for other mechnisms +%% Test cases for other mechanisms %%-------------------------------------------------------------------- t_flapping_detect(_) -> diff --git a/apps/emqx/test/emqx_channel_delayed_puback_SUITE.erl b/apps/emqx/test/emqx_channel_delayed_puback_SUITE.erl new file mode 100644 index 000000000..4f2938b24 --- /dev/null +++ b/apps/emqx/test/emqx_channel_delayed_puback_SUITE.erl @@ -0,0 +1,70 @@ +%%-------------------------------------------------------------------- +%% Copyright (c) 2018-2022 EMQ Technologies Co., Ltd. All Rights Reserved. +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. +%%-------------------------------------------------------------------- + +-module(emqx_channel_delayed_puback_SUITE). + +-compile(export_all). +-compile(nowarn_export_all). + +-include_lib("eunit/include/eunit.hrl"). +-include_lib("common_test/include/ct.hrl"). +-include_lib("snabbkaffe/include/snabbkaffe.hrl"). + +-include_lib("emqx/include/emqx.hrl"). +-include_lib("emqx/include/emqx_mqtt.hrl"). +-include_lib("emqx/include/emqx_hooks.hrl"). + +all() -> + emqx_common_test_helpers:all(?MODULE). + +init_per_suite(Config) -> + emqx_common_test_helpers:boot_modules(all), + emqx_common_test_helpers:start_apps([]), + Config. + +end_per_suite(_Config) -> + emqx_common_test_helpers:stop_apps([]). + +init_per_testcase(Case, Config) -> + ?MODULE:Case({init, Config}). + +end_per_testcase(Case, Config) -> + ?MODULE:Case({'end', Config}). + +%%-------------------------------------------------------------------- +%% Test cases +%%-------------------------------------------------------------------- + +t_delayed_puback({init, Config}) -> + emqx_hooks:put('message.puback', {?MODULE, on_message_puback, []}, ?HP_LOWEST), + Config; +t_delayed_puback({'end', _Config}) -> + emqx_hooks:del('message.puback', {?MODULE, on_message_puback}); +t_delayed_puback(_Config) -> + {ok, ConnPid} = emqtt:start_link([{clientid, <<"clientid">>}, {proto_ver, v5}]), + {ok, _} = emqtt:connect(ConnPid), + {ok, #{reason_code := ?RC_UNSPECIFIED_ERROR}} = emqtt:publish( + ConnPid, <<"topic">>, <<"hello">>, 1 + ), + emqtt:disconnect(ConnPid). + +%%-------------------------------------------------------------------- +%% Helpers +%%-------------------------------------------------------------------- + +on_message_puback(PacketId, _Msg, PubRes, _RC) -> + erlang:send(self(), {puback, PacketId, PubRes, ?RC_UNSPECIFIED_ERROR}), + {stop, undefined}. diff --git a/apps/emqx/test/emqx_common_test_helpers.erl b/apps/emqx/test/emqx_common_test_helpers.erl index 3654b8fae..40e9ca5fc 100644 --- a/apps/emqx/test/emqx_common_test_helpers.erl +++ b/apps/emqx/test/emqx_common_test_helpers.erl @@ -30,6 +30,7 @@ start_apps/1, start_apps/2, start_apps/3, + start_app/2, stop_apps/1, stop_apps/2, reload/2, @@ -244,6 +245,9 @@ do_render_app_config(App, Schema, ConfigFile, Opts) -> copy_certs(App, RenderedConfigFile), ok. +start_app(App, SpecAppConfig) -> + start_app(App, SpecAppConfig, #{}). + start_app(App, SpecAppConfig, Opts) -> render_and_load_app_config(App, Opts), SpecAppConfig(App), @@ -302,12 +306,7 @@ read_schema_configs(no_schema, _ConfigFile) -> ok; read_schema_configs(Schema, ConfigFile) -> NewConfig = generate_config(Schema, ConfigFile), - lists:foreach( - fun({App, Configs}) -> - [application:set_env(App, Par, Value) || {Par, Value} <- Configs] - end, - NewConfig - ). + application:set_env(NewConfig). generate_config(SchemaModule, ConfigFile) when is_atom(SchemaModule) -> {ok, Conf0} = hocon:load(ConfigFile, #{format => richmap}), diff --git a/apps/emqx/test/emqx_proper_types.erl b/apps/emqx/test/emqx_proper_types.erl index 2f0f9d494..e1d95227b 100644 --- a/apps/emqx/test/emqx_proper_types.erl +++ b/apps/emqx/test/emqx_proper_types.erl @@ -43,12 +43,21 @@ ip/0, port/0, limited_atom/0, - limited_latin_atom/0 + limited_latin_atom/0, + printable_utf8/0, + printable_codepoint/0 +]). + +%% Generic Types +-export([ + scaled/2 ]). %% Iterators -export([nof/1]). +-type proptype() :: proper_types:raw_type(). + %%-------------------------------------------------------------------- %% Types High level %%-------------------------------------------------------------------- @@ -606,6 +615,20 @@ limited_atom() -> limited_any_term() -> oneof([binary(), number(), string()]). +printable_utf8() -> + ?SUCHTHAT( + String, + ?LET(L, list(printable_codepoint()), unicode:characters_to_binary(L)), + is_binary(String) + ). + +printable_codepoint() -> + frequency([ + {7, range(16#20, 16#7E)}, + {2, range(16#00A0, 16#D7FF)}, + {1, range(16#E000, 16#FFFD)} + ]). + %%-------------------------------------------------------------------- %% Iterators %%-------------------------------------------------------------------- @@ -632,6 +655,14 @@ limited_list(N, T) -> end ). +%%-------------------------------------------------------------------- +%% Generic Types +%%-------------------------------------------------------------------- + +-spec scaled(number(), proptype()) -> proptype(). +scaled(F, T) when F > 0 -> + ?SIZED(S, resize(round(S * F), T)). + %%-------------------------------------------------------------------- %% Internal funcs %%-------------------------------------------------------------------- diff --git a/apps/emqx/test/emqx_wdgraph_tests.erl b/apps/emqx/test/emqx_wdgraph_tests.erl new file mode 100644 index 000000000..c159a0f49 --- /dev/null +++ b/apps/emqx/test/emqx_wdgraph_tests.erl @@ -0,0 +1,104 @@ +%%-------------------------------------------------------------------- +%% Copyright (c) 2020-2023 EMQ Technologies Co., Ltd. All Rights Reserved. +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. +%%-------------------------------------------------------------------- + +-module(emqx_wdgraph_tests). + +-include_lib("eunit/include/eunit.hrl"). + +empty_test_() -> + G = emqx_wdgraph:new(), + [ + ?_assertEqual([], emqx_wdgraph:get_edges(foo, G)), + ?_assertEqual(false, emqx_wdgraph:find_edge(foo, bar, G)) + ]. + +edges_nodes_test_() -> + G1 = emqx_wdgraph:new(), + G2 = emqx_wdgraph:insert_edge(foo, bar, 42, "fancy", G1), + G3 = emqx_wdgraph:insert_edge(bar, baz, 1, "cheapest", G2), + G4 = emqx_wdgraph:insert_edge(bar, foo, 0, "free", G3), + G5 = emqx_wdgraph:insert_edge(foo, bar, 100, "luxury", G4), + [ + ?_assertEqual({42, "fancy"}, emqx_wdgraph:find_edge(foo, bar, G2)), + ?_assertEqual({100, "luxury"}, emqx_wdgraph:find_edge(foo, bar, G5)), + ?_assertEqual([{bar, 100, "luxury"}], emqx_wdgraph:get_edges(foo, G5)), + + ?_assertEqual({1, "cheapest"}, emqx_wdgraph:find_edge(bar, baz, G5)), + ?_assertEqual([{baz, 1, "cheapest"}, {foo, 0, "free"}], emqx_wdgraph:get_edges(bar, G5)) + ]. + +fold_test_() -> + G1 = emqx_wdgraph:new(), + G2 = emqx_wdgraph:insert_edge(foo, bar, 42, "fancy", G1), + G3 = emqx_wdgraph:insert_edge(bar, baz, 1, "cheapest", G2), + G4 = emqx_wdgraph:insert_edge(bar, foo, 0, "free", G3), + G5 = emqx_wdgraph:insert_edge(foo, bar, 100, "luxury", G4), + [ + ?_assertEqual( + % 100 + 0 + 1 + 101, + emqx_wdgraph:fold(fun(_From, {_, Weight, _}, Acc) -> Weight + Acc end, 0, G5) + ), + ?_assertEqual( + [bar, baz, foo], + lists:usort( + emqx_wdgraph:fold(fun(From, {To, _, _}, Acc) -> [From, To | Acc] end, [], G5) + ) + ) + ]. + +nonexistent_nodes_path_test_() -> + G1 = emqx_wdgraph:new(), + G2 = emqx_wdgraph:insert_edge(foo, bar, 42, "fancy", G1), + G3 = emqx_wdgraph:insert_edge(bar, baz, 1, "cheapest", G2), + [ + ?_assertEqual( + {false, nosuchnode}, + emqx_wdgraph:find_shortest_path(nosuchnode, baz, G3) + ), + ?_assertEqual( + [], + emqx_wdgraph:find_shortest_path(nosuchnode, nosuchnode, G3) + ) + ]. + +nonexistent_path_test_() -> + G1 = emqx_wdgraph:new(), + G2 = emqx_wdgraph:insert_edge(foo, bar, 42, "fancy", G1), + G3 = emqx_wdgraph:insert_edge(baz, boo, 1, "cheapest", G2), + G4 = emqx_wdgraph:insert_edge(boo, last, 3.5, "change", G3), + [ + ?_assertEqual( + {false, last}, + emqx_wdgraph:find_shortest_path(baz, foo, G4) + ), + ?_assertEqual( + {false, bar}, + emqx_wdgraph:find_shortest_path(foo, last, G4) + ) + ]. + +shortest_path_test() -> + G1 = emqx_wdgraph:new(), + G2 = emqx_wdgraph:insert_edge(foo, bar, 42, "fancy", G1), + G3 = emqx_wdgraph:insert_edge(bar, baz, 1, "cheapest", G2), + G4 = emqx_wdgraph:insert_edge(baz, last, 0, "free", G3), + G5 = emqx_wdgraph:insert_edge(bar, last, 100, "luxury", G4), + G6 = emqx_wdgraph:insert_edge(bar, foo, 0, "comeback", G5), + ?assertEqual( + ["fancy", "cheapest", "free"], + emqx_wdgraph:find_shortest_path(foo, last, G6) + ). diff --git a/apps/emqx_bridge_dynamo/rebar.config b/apps/emqx_bridge_dynamo/rebar.config index fbccb5c9a..d3ba1093d 100644 --- a/apps/emqx_bridge_dynamo/rebar.config +++ b/apps/emqx_bridge_dynamo/rebar.config @@ -1,6 +1,6 @@ %% -*- mode: erlang; -*- {erl_opts, [debug_info]}. -{deps, [ {erlcloud, {git, "https://github.com/emqx/erlcloud.git", {tag, "3.5.16-emqx-1"}}} +{deps, [ {erlcloud, {git, "https://github.com/emqx/erlcloud", {tag, "3.6.8-emqx-1"}}} , {emqx_connector, {path, "../../apps/emqx_connector"}} , {emqx_resource, {path, "../../apps/emqx_resource"}} , {emqx_bridge, {path, "../../apps/emqx_bridge"}} diff --git a/apps/emqx_dashboard/src/emqx_dashboard.erl b/apps/emqx_dashboard/src/emqx_dashboard.erl index 08b7f0142..13fd18267 100644 --- a/apps/emqx_dashboard/src/emqx_dashboard.erl +++ b/apps/emqx_dashboard/src/emqx_dashboard.erl @@ -32,8 +32,6 @@ -include_lib("emqx/include/http_api.hrl"). -include_lib("emqx/include/emqx_release.hrl"). --define(BASE_PATH, "/api/v5"). - -define(EMQX_MIDDLE, emqx_dashboard_middleware). %%-------------------------------------------------------------------- @@ -52,7 +50,7 @@ start_listeners(Listeners) -> GlobalSpec = #{ openapi => "3.0.0", info => #{title => "EMQX API", version => ?EMQX_API_VERSION}, - servers => [#{url => ?BASE_PATH}], + servers => [#{url => emqx_dashboard_swagger:base_path()}], components => #{ schemas => #{}, 'securitySchemes' => #{ @@ -69,11 +67,11 @@ start_listeners(Listeners) -> {"/", cowboy_static, {priv_file, emqx_dashboard, "www/index.html"}}, {"/static/[...]", cowboy_static, {priv_dir, emqx_dashboard, "www/static"}}, {emqx_mgmt_api_status:path(), emqx_mgmt_api_status, []}, - {?BASE_PATH ++ "/[...]", emqx_dashboard_bad_api, []}, + {emqx_dashboard_swagger:relative_uri("/[...]"), emqx_dashboard_bad_api, []}, {'_', cowboy_static, {priv_file, emqx_dashboard, "www/index.html"}} ], BaseMinirest = #{ - base_path => ?BASE_PATH, + base_path => emqx_dashboard_swagger:base_path(), modules => minirest_api:find_api_modules(apps()), authorization => Authorization, security => [#{'basicAuth' => []}, #{'bearerAuth' => []}], diff --git a/apps/emqx_dashboard/src/emqx_dashboard_swagger.erl b/apps/emqx_dashboard/src/emqx_dashboard_swagger.erl index 0344c84c4..a5b826cca 100644 --- a/apps/emqx_dashboard/src/emqx_dashboard_swagger.erl +++ b/apps/emqx_dashboard/src/emqx_dashboard_swagger.erl @@ -19,12 +19,17 @@ -include_lib("typerefl/include/types.hrl"). -include_lib("hocon/include/hoconsc.hrl"). +-define(BASE_PATH, "/api/v5"). + %% API -export([spec/1, spec/2]). -export([namespace/0, namespace/1, fields/1]). -export([schema_with_example/2, schema_with_examples/2]). -export([error_codes/1, error_codes/2]). -export([file_schema/1]). +-export([base_path/0]). +-export([relative_uri/1]). +-export([compose_filters/2]). -export([ filter_check_request/2, @@ -84,14 +89,30 @@ -type request() :: #{bindings => map(), query_string => map(), body => map()}. -type request_meta() :: #{module => module(), path => string(), method => atom()}. --type filter_result() :: {ok, request()} | {400, 'BAD_REQUEST', binary()}. --type filter() :: fun((request(), request_meta()) -> filter_result()). +%% More exact types are defined in minirest.hrl, but we don't want to include it +%% because it defines a lot of types and they may clash with the types declared locally. +-type status_code() :: pos_integer(). +-type error_code() :: atom() | binary(). +-type error_message() :: binary(). +-type response_body() :: term(). +-type headers() :: map(). + +-type response() :: + status_code() + | {status_code()} + | {status_code(), response_body()} + | {status_code(), headers(), response_body()} + | {status_code(), error_code(), error_message()}. + +-type filter_result() :: {ok, request()} | response(). +-type filter() :: emqx_maybe:t(fun((request(), request_meta()) -> filter_result())). -type spec_opts() :: #{ check_schema => boolean() | filter(), translate_body => boolean(), schema_converter => fun((hocon_schema:schema(), Module :: atom()) -> map()), - i18n_lang => atom() | string() | binary() + i18n_lang => atom() | string() | binary(), + filter => filter() }. -type route_path() :: string() | binary(). @@ -117,9 +138,9 @@ spec(Module, Options) -> lists:foldl( fun(Path, {AllAcc, AllRefsAcc}) -> {OperationId, Specs, Refs} = parse_spec_ref(Module, Path, Options), - CheckSchema = support_check_schema(Options), + Opts = #{filter => filter(Options)}, { - [{filename:join("/", Path), Specs, OperationId, CheckSchema} | AllAcc], + [{filename:join("/", Path), Specs, OperationId, Opts} | AllAcc], Refs ++ AllRefsAcc } end, @@ -184,6 +205,14 @@ error_codes(Codes = [_ | _], MsgDesc) -> })} ]. +-spec base_path() -> uri_string:uri_string(). +base_path() -> + ?BASE_PATH. + +-spec relative_uri(uri_string:uri_string()) -> uri_string:uri_string(). +relative_uri(Uri) -> + base_path() ++ Uri. + file_schema(FileName) -> #{ content => #{ @@ -242,6 +271,21 @@ gen_api_schema_json_iodata(SchemaMod, SchemaInfo, Converter) -> [pretty, force_utf8] ). +-spec compose_filters(filter(), filter()) -> filter(). +compose_filters(undefined, Filter2) -> + Filter2; +compose_filters(Filter1, undefined) -> + Filter1; +compose_filters(Filter1, Filter2) -> + fun(Request, RequestMeta) -> + case Filter1(Request, RequestMeta) of + {ok, Request1} -> + Filter2(Request1, RequestMeta); + Response -> + Response + end + end. + %%------------------------------------------------------------------------------ %% Private functions %%------------------------------------------------------------------------------ @@ -273,14 +317,22 @@ check_only(Schema, Map, Opts) -> _ = hocon_tconf:check_plain(Schema, Map, Opts), Map. -support_check_schema(#{check_schema := true, translate_body := true}) -> - #{filter => fun ?MODULE:filter_check_request_and_translate_body/2}; -support_check_schema(#{check_schema := true}) -> - #{filter => fun ?MODULE:filter_check_request/2}; -support_check_schema(#{check_schema := Filter}) when is_function(Filter, 2) -> - #{filter => Filter}; -support_check_schema(_) -> - #{filter => undefined}. +filter(Options) -> + CheckSchemaFilter = check_schema_filter(Options), + CustomFilter = custom_filter(Options), + compose_filters(CheckSchemaFilter, CustomFilter). + +custom_filter(Options) -> + maps:get(filter, Options, undefined). + +check_schema_filter(#{check_schema := true, translate_body := true}) -> + fun ?MODULE:filter_check_request_and_translate_body/2; +check_schema_filter(#{check_schema := true}) -> + fun ?MODULE:filter_check_request/2; +check_schema_filter(#{check_schema := Filter}) when is_function(Filter, 2) -> + Filter; +check_schema_filter(_) -> + undefined. parse_spec_ref(Module, Path, Options) -> Schema = diff --git a/apps/emqx_dashboard/test/emqx_dashboard_api_test_helpers.erl b/apps/emqx_dashboard/test/emqx_dashboard_api_test_helpers.erl index 25b4065de..908d7e0ef 100644 --- a/apps/emqx_dashboard/test/emqx_dashboard_api_test_helpers.erl +++ b/apps/emqx_dashboard/test/emqx_dashboard_api_test_helpers.erl @@ -26,11 +26,12 @@ request/4, multipart_formdata_request/3, multipart_formdata_request/4, + host/0, uri/0, uri/1 ]). --define(HOST, "http://127.0.0.1:18083/"). +-define(HOST, "http://127.0.0.1:18083"). -define(API_VERSION, "v5"). -define(BASE_PATH, "api"). @@ -98,10 +99,13 @@ request(Username, Method, Url, Body) -> {error, Reason} end. +host() -> + ?HOST. + uri() -> uri([]). uri(Parts) when is_list(Parts) -> NParts = [E || E <- Parts], - ?HOST ++ to_list(filename:join([?BASE_PATH, ?API_VERSION | NParts])). + host() ++ "/" ++ to_list(filename:join([?BASE_PATH, ?API_VERSION | NParts])). auth_header(Username) -> Password = <<"public">>, diff --git a/apps/emqx_ft/BSL.txt b/apps/emqx_ft/BSL.txt new file mode 100644 index 000000000..0acc0e696 --- /dev/null +++ b/apps/emqx_ft/BSL.txt @@ -0,0 +1,94 @@ +Business Source License 1.1 + +Licensor: Hangzhou EMQ Technologies Co., Ltd. +Licensed Work: EMQX Enterprise Edition + The Licensed Work is (c) 2023 + Hangzhou EMQ Technologies Co., Ltd. +Additional Use Grant: Students and educators are granted right to copy, + modify, and create derivative work for research + or education. +Change Date: 2027-02-01 +Change License: Apache License, Version 2.0 + +For information about alternative licensing arrangements for the Software, +please contact Licensor: https://www.emqx.com/en/contact + +Notice + +The Business Source License (this document, or the ā€œLicenseā€) is not an Open +Source license. However, the Licensed Work will eventually be made available +under an Open Source License, as stated in this License. + +License text copyright (c) 2017 MariaDB Corporation Ab, All Rights Reserved. +ā€œBusiness Source Licenseā€ is a trademark of MariaDB Corporation Ab. + +----------------------------------------------------------------------------- + +Business Source License 1.1 + +Terms + +The Licensor hereby grants you the right to copy, modify, create derivative +works, redistribute, and make non-production use of the Licensed Work. The +Licensor may make an Additional Use Grant, above, permitting limited +production use. + +Effective on the Change Date, or the fourth anniversary of the first publicly +available distribution of a specific version of the Licensed Work under this +License, whichever comes first, the Licensor hereby grants you rights under +the terms of the Change License, and the rights granted in the paragraph +above terminate. + +If your use of the Licensed Work does not comply with the requirements +currently in effect as described in this License, you must purchase a +commercial license from the Licensor, its affiliated entities, or authorized +resellers, or you must refrain from using the Licensed Work. + +All copies of the original and modified Licensed Work, and derivative works +of the Licensed Work, are subject to this License. This License applies +separately for each version of the Licensed Work and the Change Date may vary +for each version of the Licensed Work released by Licensor. + +You must conspicuously display this License on each original or modified copy +of the Licensed Work. If you receive the Licensed Work in original or +modified form from a third party, the terms and conditions set forth in this +License apply to your use of that work. + +Any use of the Licensed Work in violation of this License will automatically +terminate your rights under this License for the current and all other +versions of the Licensed Work. + +This License does not grant you any right in any trademark or logo of +Licensor or its affiliates (provided that you may use a trademark or logo of +Licensor as expressly required by this License). + +TO THE EXTENT PERMITTED BY APPLICABLE LAW, THE LICENSED WORK IS PROVIDED ON +AN ā€œAS ISā€ BASIS. LICENSOR HEREBY DISCLAIMS ALL WARRANTIES AND CONDITIONS, +EXPRESS OR IMPLIED, INCLUDING (WITHOUT LIMITATION) WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NON-INFRINGEMENT, AND +TITLE. + +MariaDB hereby grants you permission to use this License’s text to license +your works, and to refer to it using the trademark ā€œBusiness Source Licenseā€, +as long as you comply with the Covenants of Licensor below. + +Covenants of Licensor + +In consideration of the right to use this License’s text and the ā€œBusiness +Source Licenseā€ name and trademark, Licensor covenants to MariaDB, and to all +other recipients of the licensed work to be provided by Licensor: + +1. To specify as the Change License the GPL Version 2.0 or any later version, + or a license that is compatible with GPL Version 2.0 or a later version, + where ā€œcompatibleā€ means that software provided under the Change License can + be included in a program with software provided under GPL Version 2.0 or a + later version. Licensor may specify additional Change Licenses without + limitation. + +2. To either: (a) specify an additional grant of rights to use that does not + impose any additional restriction on the right granted in this License, as + the Additional Use Grant; or (b) insert the text ā€œNoneā€. + +3. To specify a Change Date. + +4. Not to modify this License in any other way. diff --git a/apps/emqx_ft/README.md b/apps/emqx_ft/README.md new file mode 100644 index 000000000..a479754d2 --- /dev/null +++ b/apps/emqx_ft/README.md @@ -0,0 +1,86 @@ +# EMQX File Transfer + +EMQX File Transfer application enables the _File Transfer over MQTT_ feature described in [EIP-0021](https://github.com/emqx/eip), and provides support to publish transferred files either to the node-local file system or to the S3 API compatible remote object storage. + +## Usage + +As almost any other EMQX application, `emqx_ft` is configured via the EMQX configuration system. The following snippet is the minimal configuration that will enable File Transfer over MQTT. + +``` +file_transfer { + enabled = true +} +``` + +The configuration above will make File Transfer available to all MQTT clients, and will use the default storage backend, which in turn uses node-local file system both for temporary storage and for the final destination of the transferred files. + +## Configuration + +Every configuration parameter is described in the `emqx_ft_schema` module. + +The most important configuration parameter is `storage`, which defines the storage backend to use. Currently, only `local` storage backend is available, which stores all the temporary data accumulating during file transfers in the node-local file system. Those go into `${EMQX_DATA_DIR}/file_transfer` directory by default, but can be configured via `local.storage.segments.root` parameter. The final destination of the transferred files on the other hand is defined by `local.storage.exporter` parameter, and currently can be either `local` or `s3`. + +### Local Exporter + +The `local` exporter is the default one, and it stores the transferred files in the node-local file system. The final destination directory is defined by `local.storage.exporter.local.root` parameter, and defaults to `${EMQX_DATA_DIR}/file_transfer/exports` directory. + +``` +file_transfer { + enabled = true + storage { + local { + exporter { + local { root = "/var/lib/emqx/transfers" } + } + } + } +} +``` + +Important to note that even though the transferred files go into the node-local file system, the File Transfer API provides a cluster-wide view of the transferred files, and any file can be downloaded from any node in the cluster. + +### S3 Exporter + +The `s3` exporter stores the transferred files in the S3 API compatible remote object storage. The destination bucket is defined by `local.storage.exporter.s3.bucket` parameter. + +This snippet configures File Transfer to store the transferred files in the `my-bucket` bucket in the `us-east-1` region of the AWS S3 service. + +``` +file_transfer { + enabled = true + storage { + local { + exporter { + s3 { + host = "s3.us-east-1.amazonaws.com" + port = "443" + access_key_id = "AKIA27EZDDM9XLINWXFE" + secret_access_key = "..." + bucket = "my-bucket" + } + } + } + } +} + +``` + +## API + +### MQTT + +When enabled, File Transfer application reserves MQTT topics starting with `$file/` prefix for the purpose of serving the File Transfer protocol, as described in [EIP-0021](https://github.com/emqx/eip). + +### REST + +Application publishes a basic set of APIs, to: +* List all the transferred files available for download. +* Configure the application, including the storage backend. +* (When using `local` storage exporter) Download the transferred files. + +Switching to the `s3` storage exporter is possible at any time, but the files transferred before the switch will not be +available for download anymore. Though, the files will still be available in the node-local file system. + +## Contributing + +Please see our [contributing.md](../../CONTRIBUTING.md). diff --git a/apps/emqx_ft/docker-ct b/apps/emqx_ft/docker-ct new file mode 100644 index 000000000..36f9d86d3 --- /dev/null +++ b/apps/emqx_ft/docker-ct @@ -0,0 +1 @@ +minio diff --git a/apps/emqx_ft/etc/emqx_ft.conf b/apps/emqx_ft/etc/emqx_ft.conf new file mode 100644 index 000000000..e69de29bb diff --git a/apps/emqx_ft/include/emqx_ft_storage_fs.hrl b/apps/emqx_ft/include/emqx_ft_storage_fs.hrl new file mode 100644 index 000000000..81ab9cfad --- /dev/null +++ b/apps/emqx_ft/include/emqx_ft_storage_fs.hrl @@ -0,0 +1,29 @@ +%%-------------------------------------------------------------------- +%% Copyright (c) 2023 EMQ Technologies Co., Ltd. All Rights Reserved. +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. +%%-------------------------------------------------------------------- + +-ifndef(EMQX_FT_STORAGE_FS_HRL). +-define(EMQX_FT_STORAGE_FS_HRL, true). + +-record(gcstats, { + started_at :: integer(), + finished_at :: integer() | undefined, + files = 0 :: non_neg_integer(), + directories = 0 :: non_neg_integer(), + space = 0 :: non_neg_integer(), + errors = #{} :: #{_GCSubject => {error, _}} +}). + +-endif. diff --git a/apps/emqx_ft/rebar.config b/apps/emqx_ft/rebar.config new file mode 100644 index 000000000..2c0962035 --- /dev/null +++ b/apps/emqx_ft/rebar.config @@ -0,0 +1,11 @@ +%% -*- mode: erlang -*- + +{erl_opts, [debug_info]}. +{deps, [{emqx, {path, "../emqx"}}]}. + +{shell, [ + % {config, "config/sys.config"}, + {apps, [emqx_ft]} +]}. + +{project_plugins, [erlfmt]}. diff --git a/apps/emqx_ft/src/emqx_ft.app.src b/apps/emqx_ft/src/emqx_ft.app.src new file mode 100644 index 000000000..058fe984a --- /dev/null +++ b/apps/emqx_ft/src/emqx_ft.app.src @@ -0,0 +1,14 @@ +{application, emqx_ft, [ + {description, "EMQX file transfer over MQTT"}, + {vsn, "0.1.0"}, + {registered, []}, + {mod, {emqx_ft_app, []}}, + {applications, [ + kernel, + stdlib, + gproc, + emqx_s3 + ]}, + {env, []}, + {modules, []} +]}. diff --git a/apps/emqx_ft/src/emqx_ft.erl b/apps/emqx_ft/src/emqx_ft.erl new file mode 100644 index 000000000..898203b51 --- /dev/null +++ b/apps/emqx_ft/src/emqx_ft.erl @@ -0,0 +1,425 @@ +%%-------------------------------------------------------------------- +%% Copyright (c) 2023 EMQ Technologies Co., Ltd. All Rights Reserved. +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. +%%-------------------------------------------------------------------- + +-module(emqx_ft). + +-include_lib("emqx/include/emqx.hrl"). +-include_lib("emqx/include/emqx_mqtt.hrl"). +-include_lib("emqx/include/emqx_hooks.hrl"). +-include_lib("snabbkaffe/include/trace.hrl"). + +-export([ + hook/0, + unhook/0 +]). + +-export([ + on_message_publish/1, + on_message_puback/4 +]). + +-export([ + decode_filemeta/1, + encode_filemeta/1 +]). + +-export([on_complete/4]). + +-export_type([ + clientid/0, + transfer/0, + bytes/0, + offset/0, + filemeta/0, + segment/0, + checksum/0 +]). + +%% Number of bytes +-type bytes() :: non_neg_integer(). + +%% MQTT Client ID +-type clientid() :: binary(). + +-type fileid() :: binary(). +-type transfer() :: {clientid(), fileid()}. +-type offset() :: bytes(). +-type checksum() :: {_Algo :: atom(), _Digest :: binary()}. + +-type filemeta() :: #{ + %% Display name + name := string(), + %% Size in bytes, as advertised by the client. + %% Client is free to specify here whatever it wants, which means we can end + %% up with a file of different size after assembly. It's not clear from + %% specification what that means (e.g. what are clients' expectations), we + %% currently do not condider that an error (or, specifically, a signal that + %% the resulting file is corrupted during transmission). + size => _Bytes :: non_neg_integer(), + checksum => checksum(), + expire_at := emqx_datetime:epoch_second(), + %% TTL of individual segments + %% Somewhat confusing that we won't know it on the nodes where the filemeta + %% is missing. + segments_ttl => _Seconds :: pos_integer(), + user_data => emqx_ft_schema:json_value() +}. + +-type segment() :: {offset(), _Content :: binary()}. + +%%-------------------------------------------------------------------- +%% API for app +%%-------------------------------------------------------------------- + +hook() -> + ok = emqx_hooks:put('message.publish', {?MODULE, on_message_publish, []}, ?HP_LOWEST), + ok = emqx_hooks:put('message.puback', {?MODULE, on_message_puback, []}, ?HP_LOWEST). + +unhook() -> + ok = emqx_hooks:del('message.publish', {?MODULE, on_message_publish}), + ok = emqx_hooks:del('message.puback', {?MODULE, on_message_puback}). + +%%-------------------------------------------------------------------- +%% API +%%-------------------------------------------------------------------- + +decode_filemeta(Payload) when is_binary(Payload) -> + case emqx_utils_json:safe_decode(Payload, [return_maps]) of + {ok, Map} -> + decode_filemeta(Map); + {error, Error} -> + {error, {invalid_filemeta_json, Error}} + end; +decode_filemeta(Map) when is_map(Map) -> + Schema = emqx_ft_schema:schema(filemeta), + try + Meta = hocon_tconf:check_plain(Schema, Map, #{atom_key => true, required => false}), + {ok, Meta} + catch + throw:{_Schema, Errors} -> + {error, {invalid_filemeta, Errors}} + end. + +encode_filemeta(Meta = #{}) -> + Schema = emqx_ft_schema:schema(filemeta), + hocon_tconf:make_serializable(Schema, emqx_utils_maps:binary_key_map(Meta), #{}). + +%%-------------------------------------------------------------------- +%% Hooks +%%-------------------------------------------------------------------- + +on_message_publish( + Msg = #message{ + id = _Id, + topic = <<"$file/", _/binary>> + } +) -> + Headers = Msg#message.headers, + {stop, Msg#message{headers = Headers#{allow_publish => false}}}; +on_message_publish(Msg) -> + {ok, Msg}. + +on_message_puback(PacketId, #message{topic = Topic} = Msg, _PubRes, _RC) -> + case Topic of + <<"$file/", FileCommand/binary>> -> + {stop, on_file_command(PacketId, Msg, FileCommand)}; + _ -> + ignore + end. + +%%-------------------------------------------------------------------- +%% Handlers for transfer messages +%%-------------------------------------------------------------------- + +%% TODO Move to emqx_ft_mqtt? + +on_file_command(PacketId, Msg, FileCommand) -> + case emqx_topic:tokens(FileCommand) of + [FileIdIn | Rest] -> + validate([{fileid, FileIdIn}], fun([FileId]) -> + on_file_command(PacketId, FileId, Msg, Rest) + end); + [] -> + ?RC_UNSPECIFIED_ERROR + end. + +on_file_command(PacketId, FileId, Msg, FileCommand) -> + Transfer = transfer(Msg, FileId), + case FileCommand of + [<<"init">>] -> + validate( + [{filemeta, Msg#message.payload}], + fun([Meta]) -> + on_init(PacketId, Msg, Transfer, Meta) + end + ); + [<<"fin">>, FinalSizeBin | MaybeChecksum] when length(MaybeChecksum) =< 1 -> + ChecksumBin = emqx_maybe:from_list(MaybeChecksum), + validate( + [{size, FinalSizeBin}, {{maybe, checksum}, ChecksumBin}], + fun([FinalSize, Checksum]) -> + on_fin(PacketId, Msg, Transfer, FinalSize, Checksum) + end + ); + [<<"abort">>] -> + on_abort(Msg, Transfer); + [OffsetBin] -> + validate([{offset, OffsetBin}], fun([Offset]) -> + on_segment(PacketId, Msg, Transfer, Offset, undefined) + end); + [OffsetBin, ChecksumBin] -> + validate( + [{offset, OffsetBin}, {checksum, ChecksumBin}], + fun([Offset, Checksum]) -> + validate( + [{integrity, Msg#message.payload, Checksum}], + fun(_) -> + on_segment(PacketId, Msg, Transfer, Offset, Checksum) + end + ) + end + ); + _ -> + ?RC_UNSPECIFIED_ERROR + end. + +on_init(PacketId, Msg, Transfer, Meta) -> + ?tp(info, "file_transfer_init", #{ + mqtt_msg => Msg, + packet_id => PacketId, + transfer => Transfer, + filemeta => Meta + }), + PacketKey = {self(), PacketId}, + Callback = fun(Result) -> + ?MODULE:on_complete("store_filemeta", PacketKey, Transfer, Result) + end, + with_responder(PacketKey, Callback, emqx_ft_conf:init_timeout(), fun() -> + case store_filemeta(Transfer, Meta) of + % Stored, ack through the responder right away + ok -> + emqx_ft_responder:ack(PacketKey, ok); + % Storage operation started, packet will be acked by the responder + % {async, Pid} -> + % ok = emqx_ft_responder:kickoff(PacketKey, Pid), + % ok; + %% Storage operation failed, ack through the responder + {error, _} = Error -> + emqx_ft_responder:ack(PacketKey, Error) + end + end). + +on_abort(_Msg, _FileId) -> + %% TODO + ?RC_SUCCESS. + +on_segment(PacketId, Msg, Transfer, Offset, Checksum) -> + ?tp(info, "file_transfer_segment", #{ + mqtt_msg => Msg, + packet_id => PacketId, + transfer => Transfer, + offset => Offset, + checksum => Checksum + }), + Segment = {Offset, Msg#message.payload}, + PacketKey = {self(), PacketId}, + Callback = fun(Result) -> + ?MODULE:on_complete("store_segment", PacketKey, Transfer, Result) + end, + with_responder(PacketKey, Callback, emqx_ft_conf:store_segment_timeout(), fun() -> + case store_segment(Transfer, Segment) of + ok -> + emqx_ft_responder:ack(PacketKey, ok); + % {async, Pid} -> + % ok = emqx_ft_responder:kickoff(PacketKey, Pid), + % ok; + {error, _} = Error -> + emqx_ft_responder:ack(PacketKey, Error) + end + end). + +on_fin(PacketId, Msg, Transfer, FinalSize, Checksum) -> + ?tp(info, "file_transfer_fin", #{ + mqtt_msg => Msg, + packet_id => PacketId, + transfer => Transfer, + final_size => FinalSize, + checksum => Checksum + }), + %% TODO: handle checksum? Do we need it? + FinPacketKey = {self(), PacketId}, + Callback = fun(Result) -> + ?MODULE:on_complete("assemble", FinPacketKey, Transfer, Result) + end, + with_responder(FinPacketKey, Callback, emqx_ft_conf:assemble_timeout(), fun() -> + case assemble(Transfer, FinalSize) of + %% Assembling completed, ack through the responder right away + ok -> + emqx_ft_responder:ack(FinPacketKey, ok); + %% Assembling started, packet will be acked by the responder + {async, Pid} -> + ok = emqx_ft_responder:kickoff(FinPacketKey, Pid), + ok; + %% Assembling failed, ack through the responder + {error, _} = Error -> + emqx_ft_responder:ack(FinPacketKey, Error) + end + end). + +with_responder(Key, Callback, Timeout, CriticalSection) -> + case emqx_ft_responder:start(Key, Callback, Timeout) of + %% We have new packet + {ok, _} -> + CriticalSection(); + %% Packet already received. + %% Since we are still handling the previous one, + %% we probably have retransmit here + {error, {already_started, _}} -> + ok + end, + undefined. + +store_filemeta(Transfer, Segment) -> + try + emqx_ft_storage:store_filemeta(Transfer, Segment) + catch + C:E:S -> + ?tp(error, "start_store_filemeta_failed", #{ + class => C, reason => E, stacktrace => S + }), + {error, {internal_error, E}} + end. + +store_segment(Transfer, Segment) -> + try + emqx_ft_storage:store_segment(Transfer, Segment) + catch + C:E:S -> + ?tp(error, "start_store_segment_failed", #{ + class => C, reason => E, stacktrace => S + }), + {error, {internal_error, E}} + end. + +assemble(Transfer, FinalSize) -> + try + emqx_ft_storage:assemble(Transfer, FinalSize) + catch + C:E:S -> + ?tp(error, "start_assemble_failed", #{ + class => C, reason => E, stacktrace => S + }), + {error, {internal_error, E}} + end. + +transfer(Msg, FileId) -> + ClientId = Msg#message.from, + {clientid_to_binary(ClientId), FileId}. + +on_complete(Op, {ChanPid, PacketId}, Transfer, Result) -> + ?tp(debug, "on_complete", #{ + operation => Op, + packet_id => PacketId, + transfer => Transfer + }), + case Result of + {Mode, ok} when Mode == ack orelse Mode == down -> + erlang:send(ChanPid, {puback, PacketId, [], ?RC_SUCCESS}); + {Mode, {error, _} = Reason} when Mode == ack orelse Mode == down -> + ?tp(error, Op ++ "_failed", #{ + transfer => Transfer, + reason => Reason + }), + erlang:send(ChanPid, {puback, PacketId, [], ?RC_UNSPECIFIED_ERROR}); + timeout -> + ?tp(error, Op ++ "_timed_out", #{ + transfer => Transfer + }), + erlang:send(ChanPid, {puback, PacketId, [], ?RC_UNSPECIFIED_ERROR}) + end. + +validate(Validations, Fun) -> + case do_validate(Validations, []) of + {ok, Parsed} -> + Fun(Parsed); + {error, Reason} -> + ?tp(info, "client_violated_protocol", #{reason => Reason}), + ?RC_UNSPECIFIED_ERROR + end. + +do_validate([], Parsed) -> + {ok, lists:reverse(Parsed)}; +do_validate([{fileid, FileId} | Rest], Parsed) -> + case byte_size(FileId) of + S when S > 0 -> + do_validate(Rest, [FileId | Parsed]); + 0 -> + {error, {invalid_fileid, FileId}} + end; +do_validate([{filemeta, Payload} | Rest], Parsed) -> + case decode_filemeta(Payload) of + {ok, Meta} -> + do_validate(Rest, [Meta | Parsed]); + {error, Reason} -> + {error, Reason} + end; +do_validate([{offset, Offset} | Rest], Parsed) -> + case string:to_integer(Offset) of + {Int, <<>>} -> + do_validate(Rest, [Int | Parsed]); + _ -> + {error, {invalid_offset, Offset}} + end; +do_validate([{size, Size} | Rest], Parsed) -> + case string:to_integer(Size) of + {Int, <<>>} -> + do_validate(Rest, [Int | Parsed]); + _ -> + {error, {invalid_size, Size}} + end; +do_validate([{checksum, Checksum} | Rest], Parsed) -> + case parse_checksum(Checksum) of + {ok, Bin} -> + do_validate(Rest, [Bin | Parsed]); + {error, _Reason} -> + {error, {invalid_checksum, Checksum}} + end; +do_validate([{integrity, Payload, Checksum} | Rest], Parsed) -> + case crypto:hash(sha256, Payload) of + Checksum -> + do_validate(Rest, [Payload | Parsed]); + Mismatch -> + {error, {checksum_mismatch, binary:encode_hex(Mismatch)}} + end; +do_validate([{{maybe, _}, undefined} | Rest], Parsed) -> + do_validate(Rest, [undefined | Parsed]); +do_validate([{{maybe, T}, Value} | Rest], Parsed) -> + do_validate([{T, Value} | Rest], Parsed). + +parse_checksum(Checksum) when is_binary(Checksum) andalso byte_size(Checksum) =:= 64 -> + try + {ok, binary:decode_hex(Checksum)} + catch + error:badarg -> + {error, invalid_checksum} + end; +parse_checksum(_Checksum) -> + {error, invalid_checksum}. + +clientid_to_binary(A) when is_atom(A) -> + atom_to_binary(A); +clientid_to_binary(B) when is_binary(B) -> + B. diff --git a/apps/emqx_ft/src/emqx_ft_api.erl b/apps/emqx_ft/src/emqx_ft_api.erl new file mode 100644 index 000000000..3fd279c76 --- /dev/null +++ b/apps/emqx_ft/src/emqx_ft_api.erl @@ -0,0 +1,239 @@ +%%-------------------------------------------------------------------- +%% Copyright (c) 2023 EMQ Technologies Co., Ltd. All Rights Reserved. +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. +%%-------------------------------------------------------------------- +-module(emqx_ft_api). + +-behaviour(minirest_api). + +-include_lib("typerefl/include/types.hrl"). +-include_lib("hocon/include/hoconsc.hrl"). + +%% Swagger specs from hocon schema +-export([ + api_spec/0, + paths/0, + schema/1, + namespace/0 +]). + +-export([ + roots/0, + fields/1 +]). + +%% Minirest filter for checking if file transfer is enabled +-export([check_ft_enabled/2]). + +%% API callbacks +-export([ + '/file_transfer/files'/2, + '/file_transfer/files/:clientid/:fileid'/2 +]). + +-import(hoconsc, [mk/2, ref/1, ref/2]). + +namespace() -> "file_transfer". + +api_spec() -> + emqx_dashboard_swagger:spec(?MODULE, #{ + check_schema => true, filter => fun ?MODULE:check_ft_enabled/2 + }). + +paths() -> + [ + "/file_transfer/files", + "/file_transfer/files/:clientid/:fileid" + ]. + +schema("/file_transfer/files") -> + #{ + 'operationId' => '/file_transfer/files', + get => #{ + tags => [<<"file_transfer">>], + summary => <<"List all uploaded files">>, + description => ?DESC("file_list"), + parameters => [ + ref(following), + ref(emqx_dashboard_swagger, limit) + ], + responses => #{ + 200 => <<"Operation success">>, + 400 => emqx_dashboard_swagger:error_codes( + ['BAD_REQUEST'], <<"Invalid cursor">> + ), + 503 => emqx_dashboard_swagger:error_codes( + ['SERVICE_UNAVAILABLE'], error_desc('SERVICE_UNAVAILABLE') + ) + } + } + }; +schema("/file_transfer/files/:clientid/:fileid") -> + #{ + 'operationId' => '/file_transfer/files/:clientid/:fileid', + get => #{ + tags => [<<"file_transfer">>], + summary => <<"List files uploaded in a specific transfer">>, + description => ?DESC("file_list_transfer"), + parameters => [ + ref(client_id), + ref(file_id) + ], + responses => #{ + 200 => <<"Operation success">>, + 404 => emqx_dashboard_swagger:error_codes( + ['FILES_NOT_FOUND'], error_desc('FILES_NOT_FOUND') + ), + 503 => emqx_dashboard_swagger:error_codes( + ['SERVICE_UNAVAILABLE'], error_desc('SERVICE_UNAVAILABLE') + ) + } + } + }. + +check_ft_enabled(Params, _Meta) -> + case emqx_ft_conf:enabled() of + true -> + {ok, Params}; + false -> + {503, error_msg('SERVICE_UNAVAILABLE', <<"Service unavailable">>)} + end. + +'/file_transfer/files'(get, #{ + query_string := QueryString +}) -> + try + Limit = limit(QueryString), + Query = + case maps:get(<<"following">>, QueryString, undefined) of + undefined -> + #{limit => Limit}; + Cursor -> + #{limit => Limit, following => Cursor} + end, + case emqx_ft_storage:files(Query) of + {ok, Page} -> + {200, format_page(Page)}; + {error, _} -> + {503, error_msg('SERVICE_UNAVAILABLE')} + end + catch + error:{badarg, cursor} -> + {400, error_msg('BAD_REQUEST', <<"Invalid cursor">>)} + end. + +'/file_transfer/files/:clientid/:fileid'(get, #{ + bindings := #{clientid := ClientId, fileid := FileId} +}) -> + Transfer = {ClientId, FileId}, + case emqx_ft_storage:files(#{transfer => Transfer}) of + {ok, Page} -> + {200, format_page(Page)}; + {error, [{_Node, enoent} | _]} -> + {404, error_msg('FILES_NOT_FOUND')}; + {error, _} -> + {503, error_msg('SERVICE_UNAVAILABLE')} + end. + +format_page(#{items := Files, cursor := Cursor}) -> + #{ + <<"files">> => lists:map(fun format_file_info/1, Files), + <<"cursor">> => Cursor + }; +format_page(#{items := Files}) -> + #{ + <<"files">> => lists:map(fun format_file_info/1, Files) + }. + +error_msg(Code) -> + #{code => Code, message => error_desc(Code)}. + +error_msg(Code, Msg) -> + #{code => Code, message => emqx_utils:readable_error_msg(Msg)}. + +error_desc('FILES_NOT_FOUND') -> + <<"Files requested for this transfer could not be found">>; +error_desc('SERVICE_UNAVAILABLE') -> + <<"Service unavailable">>. + +roots() -> + []. + +-spec fields(hocon_schema:name()) -> [hoconsc:field()]. +fields(client_id) -> + [ + {clientid, + mk(binary(), #{ + in => path, + desc => <<"MQTT Client ID">>, + required => true + })} + ]; +fields(file_id) -> + [ + {fileid, + mk(binary(), #{ + in => path, + desc => <<"File ID">>, + required => true + })} + ]; +fields(following) -> + [ + {following, + mk(binary(), #{ + in => query, + desc => <<"Cursor to start listing files from">>, + required => false + })} + ]. + +%%-------------------------------------------------------------------- +%% Helpers +%%-------------------------------------------------------------------- + +format_file_info( + Info = #{ + name := Name, + size := Size, + uri := URI, + timestamp := Timestamp, + transfer := {ClientId, FileId} + } +) -> + Res = #{ + name => format_name(Name), + size => Size, + timestamp => format_timestamp(Timestamp), + clientid => ClientId, + fileid => FileId, + uri => iolist_to_binary(URI) + }, + case Info of + #{meta := Meta} -> + Res#{metadata => emqx_ft:encode_filemeta(Meta)}; + #{} -> + Res + end. + +format_timestamp(Timestamp) -> + iolist_to_binary(calendar:system_time_to_rfc3339(Timestamp, [{unit, second}])). + +format_name(NameBin) when is_binary(NameBin) -> + NameBin; +format_name(Name) when is_list(Name) -> + iolist_to_binary(Name). + +limit(QueryString) -> + maps:get(<<"limit">>, QueryString, emqx_mgmt:default_row_limit()). diff --git a/apps/emqx_ft/src/emqx_ft_app.erl b/apps/emqx_ft/src/emqx_ft_app.erl new file mode 100644 index 000000000..299683e43 --- /dev/null +++ b/apps/emqx_ft/src/emqx_ft_app.erl @@ -0,0 +1,30 @@ +%%-------------------------------------------------------------------- +%% Copyright (c) 2023 EMQ Technologies Co., Ltd. All Rights Reserved. +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. +%%-------------------------------------------------------------------- + +-module(emqx_ft_app). + +-behaviour(application). + +-export([start/2, stop/1]). + +start(_StartType, _StartArgs) -> + {ok, Sup} = emqx_ft_sup:start_link(), + ok = emqx_ft_conf:load(), + {ok, Sup}. + +stop(_State) -> + ok = emqx_ft_conf:unload(), + ok. diff --git a/apps/emqx_ft/src/emqx_ft_assembler.erl b/apps/emqx_ft/src/emqx_ft_assembler.erl new file mode 100644 index 000000000..873efc6ff --- /dev/null +++ b/apps/emqx_ft/src/emqx_ft_assembler.erl @@ -0,0 +1,192 @@ +%%-------------------------------------------------------------------- +%% Copyright (c) 2023 EMQ Technologies Co., Ltd. All Rights Reserved. +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. +%%-------------------------------------------------------------------- + +-module(emqx_ft_assembler). + +-export([start_link/3]). + +-behaviour(gen_statem). +-export([callback_mode/0]). +-export([init/1]). +-export([handle_event/4]). +-export([terminate/3]). + +-export([where/1]). + +-type stdata() :: #{ + storage := emqx_ft_storage_fs:storage(), + transfer := emqx_ft:transfer(), + assembly := emqx_ft_assembly:t(), + export => emqx_ft_storage_exporter:export() +}. + +-define(NAME(Transfer), {n, l, {?MODULE, Transfer}}). +-define(REF(Transfer), {via, gproc, ?NAME(Transfer)}). + +%% + +start_link(Storage, Transfer, Size) -> + gen_statem:start_link(?REF(Transfer), ?MODULE, {Storage, Transfer, Size}, []). + +where(Transfer) -> + gproc:where(?NAME(Transfer)). + +%% + +-type state() :: + idle + | list_local_fragments + | {list_remote_fragments, [node()]} + | start_assembling + | {assemble, [{node(), emqx_ft_storage_fs:filefrag()}]} + | complete. + +-define(internal(C), {next_event, internal, C}). + +callback_mode() -> + handle_event_function. + +-spec init(_Args) -> {ok, state(), stdata()}. +init({Storage, Transfer, Size}) -> + _ = erlang:process_flag(trap_exit, true), + St = #{ + storage => Storage, + transfer => Transfer, + assembly => emqx_ft_assembly:new(Size) + }, + {ok, idle, St}. + +-spec handle_event(info | internal, _, state(), stdata()) -> + {next_state, state(), stdata(), {next_event, internal, _}} + | {stop, {shutdown, ok | {error, _}}, stdata()}. +handle_event(info, kickoff, idle, St) -> + % NOTE + % Someone's told us to start the work, which usually means that it has set up a monitor. + % We could wait for this message and handle it at the end of the assembling rather than at + % the beginning, however it would make error handling much more messier. + {next_state, list_local_fragments, St, ?internal([])}; +handle_event(info, kickoff, _, _St) -> + keep_state_and_data; +handle_event( + internal, + _, + list_local_fragments, + St = #{storage := Storage, transfer := Transfer, assembly := Asm} +) -> + % TODO: what we do with non-transients errors here (e.g. `eacces`)? + {ok, Fragments} = emqx_ft_storage_fs:list(Storage, Transfer, fragment), + NAsm = emqx_ft_assembly:update(emqx_ft_assembly:append(Asm, node(), Fragments)), + NSt = St#{assembly := NAsm}, + case emqx_ft_assembly:status(NAsm) of + complete -> + {next_state, start_assembling, NSt, ?internal([])}; + {incomplete, _} -> + Nodes = mria_mnesia:running_nodes() -- [node()], + {next_state, {list_remote_fragments, Nodes}, NSt, ?internal([])}; + % TODO: recovery? + {error, _} = Error -> + {stop, {shutdown, Error}} + end; +handle_event( + internal, + _, + {list_remote_fragments, Nodes}, + St = #{transfer := Transfer, assembly := Asm} +) -> + % TODO + % Async would better because we would not need to wait for some lagging nodes if + % the coverage is already complete. + % TODO: portable "storage" ref + Results = emqx_ft_storage_fs_proto_v1:multilist(Nodes, Transfer, fragment), + NodeResults = lists:zip(Nodes, Results), + NAsm = emqx_ft_assembly:update( + lists:foldl( + fun + ({Node, {ok, {ok, Fragments}}}, Acc) -> + emqx_ft_assembly:append(Acc, Node, Fragments); + ({_Node, _Result}, Acc) -> + % TODO: log? + Acc + end, + Asm, + NodeResults + ) + ), + NSt = St#{assembly := NAsm}, + case emqx_ft_assembly:status(NAsm) of + complete -> + {next_state, start_assembling, NSt, ?internal([])}; + % TODO: retries / recovery? + {incomplete, _} = Status -> + {stop, {shutdown, {error, Status}}}; + {error, _} = Error -> + {stop, {shutdown, Error}} + end; +handle_event( + internal, + _, + start_assembling, + St = #{storage := Storage, transfer := Transfer, assembly := Asm} +) -> + Filemeta = emqx_ft_assembly:filemeta(Asm), + Coverage = emqx_ft_assembly:coverage(Asm), + case emqx_ft_storage_exporter:start_export(Storage, Transfer, Filemeta) of + {ok, Export} -> + {next_state, {assemble, Coverage}, St#{export => Export}, ?internal([])}; + {error, _} = Error -> + {stop, {shutdown, Error}} + end; +handle_event(internal, _, {assemble, [{Node, Segment} | Rest]}, St = #{export := Export}) -> + % TODO + % Currently, race is possible between getting segment info from the remote node and + % this node garbage collecting the segment itself. + % TODO: pipelining + % TODO: better error handling + {ok, Content} = pread(Node, Segment, St), + case emqx_ft_storage_exporter:write(Export, Content) of + {ok, NExport} -> + {next_state, {assemble, Rest}, St#{export := NExport}, ?internal([])}; + {error, _} = Error -> + {stop, {shutdown, Error}, maps:remove(export, St)} + end; +handle_event(internal, _, {assemble, []}, St = #{}) -> + {next_state, complete, St, ?internal([])}; +handle_event(internal, _, complete, St = #{export := Export}) -> + Result = emqx_ft_storage_exporter:complete(Export), + _ = maybe_garbage_collect(Result, St), + {stop, {shutdown, Result}, maps:remove(export, St)}. + +-spec terminate(_Reason, state(), stdata()) -> _. +terminate(_Reason, _StateName, #{export := Export}) -> + emqx_ft_storage_exporter:discard(Export); +terminate(_Reason, _StateName, #{}) -> + ok. + +pread(Node, Segment, #{storage := Storage, transfer := Transfer}) when Node =:= node() -> + emqx_ft_storage_fs:pread(Storage, Transfer, Segment, 0, segsize(Segment)); +pread(Node, Segment, #{transfer := Transfer}) -> + emqx_ft_storage_fs_proto_v1:pread(Node, Transfer, Segment, 0, segsize(Segment)). + +%% + +maybe_garbage_collect(ok, #{storage := Storage, transfer := Transfer, assembly := Asm}) -> + Nodes = emqx_ft_assembly:nodes(Asm), + emqx_ft_storage_fs_gc:collect(Storage, Transfer, Nodes); +maybe_garbage_collect({error, _}, _St) -> + ok. + +segsize(#{fragment := {segment, Info}}) -> + maps:get(size, Info). diff --git a/apps/emqx_ft/src/emqx_ft_assembler_sup.erl b/apps/emqx_ft/src/emqx_ft_assembler_sup.erl new file mode 100644 index 000000000..4ba65c290 --- /dev/null +++ b/apps/emqx_ft/src/emqx_ft_assembler_sup.erl @@ -0,0 +1,47 @@ +%%-------------------------------------------------------------------- +%% Copyright (c) 2023 EMQ Technologies Co., Ltd. All Rights Reserved. +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. +%%-------------------------------------------------------------------- + +-module(emqx_ft_assembler_sup). + +-export([start_link/0]). +-export([ensure_child/3]). + +-behaviour(supervisor). +-export([init/1]). + +start_link() -> + supervisor:start_link({local, ?MODULE}, ?MODULE, []). + +ensure_child(Storage, Transfer, Size) -> + Childspec = #{ + id => Transfer, + start => {emqx_ft_assembler, start_link, [Storage, Transfer, Size]}, + restart => temporary + }, + case supervisor:start_child(?MODULE, Childspec) of + {ok, Pid} -> + {ok, Pid}; + {error, {already_started, Pid}} -> + {ok, Pid} + end. + +init(_) -> + SupFlags = #{ + strategy => one_for_one, + intensity => 10, + period => 1000 + }, + {ok, {SupFlags, []}}. diff --git a/apps/emqx_ft/src/emqx_ft_assembly.erl b/apps/emqx_ft/src/emqx_ft_assembly.erl new file mode 100644 index 000000000..d765a2bd2 --- /dev/null +++ b/apps/emqx_ft/src/emqx_ft_assembly.erl @@ -0,0 +1,416 @@ +%%-------------------------------------------------------------------- +%% Copyright (c) 2023 EMQ Technologies Co., Ltd. All Rights Reserved. +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. +%%-------------------------------------------------------------------- + +-module(emqx_ft_assembly). + +-export([new/1]). +-export([append/3]). +-export([update/1]). + +-export([status/1]). +-export([filemeta/1]). +-export([nodes/1]). +-export([coverage/1]). +-export([properties/1]). + +-export_type([t/0]). + +-type filemeta() :: emqx_ft:filemeta(). +-type filefrag() :: emqx_ft_storage_fs:filefrag(). +-type filefrag(T) :: emqx_ft_storage_fs:filefrag(T). +-type segmentinfo() :: emqx_ft_storage_fs:segmentinfo(). + +-record(asm, { + status :: status(), + coverage :: coverage() | undefined, + properties :: properties() | undefined, + meta :: #{filemeta() => {node(), filefrag({filemeta, filemeta()})}}, + segs :: emqx_wdgraph:t(emqx_ft:offset(), {node(), filefrag({segment, segmentinfo()})}), + size :: emqx_ft:bytes() +}). + +-type status() :: + {incomplete, {missing, _}} + | complete + | {error, {inconsistent, _}}. + +-type coverage() :: [{node(), filefrag({segment, segmentinfo()})}]. + +-type properties() :: #{ + %% Node where "most" of the segments are located. + dominant => node() +}. + +-opaque t() :: #asm{}. + +-spec new(emqx_ft:bytes()) -> t(). +new(Size) -> + #asm{ + status = {incomplete, {missing, filemeta}}, + meta = #{}, + segs = emqx_wdgraph:new(), + size = Size + }. + +-spec append(t(), node(), filefrag() | [filefrag()]) -> t(). +append(Asm, Node, Fragments) when is_list(Fragments) -> + lists:foldl(fun(F, AsmIn) -> append(AsmIn, Node, F) end, Asm, Fragments); +append(Asm, Node, Fragment = #{fragment := {filemeta, _}}) -> + append_filemeta(Asm, Node, Fragment); +append(Asm, Node, Segment = #{fragment := {segment, _}}) -> + append_segmentinfo(Asm, Node, Segment). + +-spec update(t()) -> t(). +update(Asm) -> + case status(meta, Asm) of + {complete, _Meta} -> + case status(coverage, Asm) of + {complete, Coverage, Props} -> + Asm#asm{ + status = complete, + coverage = Coverage, + properties = Props + }; + Status -> + Asm#asm{status = Status} + end; + Status -> + Asm#asm{status = Status} + end. + +-spec status(t()) -> status(). +status(#asm{status = Status}) -> + Status. + +-spec filemeta(t()) -> filemeta(). +filemeta(Asm) -> + case status(meta, Asm) of + {complete, Meta} -> Meta; + _Other -> undefined + end. + +-spec coverage(t()) -> coverage() | undefined. +coverage(#asm{coverage = Coverage}) -> + Coverage. + +-spec nodes(t()) -> [node()]. +nodes(#asm{meta = Meta, segs = Segs}) -> + S1 = maps:fold( + fun(_Meta, {Node, _Fragment}, Acc) -> + ordsets:add_element(Node, Acc) + end, + ordsets:new(), + Meta + ), + S2 = emqx_wdgraph:fold( + fun(_Offset, {_End, _, {Node, _Fragment}}, Acc) -> + ordsets:add_element(Node, Acc) + end, + ordsets:new(), + Segs + ), + ordsets:to_list(ordsets:union(S1, S2)). + +properties(#asm{properties = Properties}) -> + Properties. + +status(meta, #asm{meta = Meta}) -> + status(meta, maps:to_list(Meta)); +status(meta, [{Meta, {_Node, _Frag}}]) -> + {complete, Meta}; +status(meta, []) -> + {incomplete, {missing, filemeta}}; +status(meta, [_M1, _M2 | _] = Metas) -> + {error, {inconsistent, [Frag#{node => Node} || {_, {Node, Frag}} <- Metas]}}; +status(coverage, #asm{segs = Segments, size = Size}) -> + case coverage(Segments, Size) of + Coverage when is_list(Coverage) -> + {complete, Coverage, #{ + dominant => dominant(Coverage) + }}; + Missing = {missing, _} -> + {incomplete, Missing} + end. + +append_filemeta(Asm, Node, Fragment = #{fragment := {filemeta, Meta}}) -> + Asm#asm{ + meta = maps:put(Meta, {Node, Fragment}, Asm#asm.meta) + }. + +append_segmentinfo(Asm, _Node, #{fragment := {segment, #{size := 0}}}) -> + % NOTE + % Empty segments are valid but meaningless for coverage. + Asm; +append_segmentinfo(Asm, Node, Fragment = #{fragment := {segment, Info}}) -> + Offset = maps:get(offset, Info), + Size = maps:get(size, Info), + End = Offset + Size, + Segs = add_edge(Asm#asm.segs, Offset, End, locality(Node) * Size, {Node, Fragment}), + Asm#asm{ + % TODO + % In theory it's possible to have two segments with same offset + size on + % different nodes but with differing content. We'd need a checksum to + % be able to disambiguate them though. + segs = Segs + }. + +add_edge(Segs, Offset, End, Weight, Label) -> + % NOTE + % We are expressing coverage problem as a shortest path problem on weighted directed + % graph, where nodes are segments offsets, two nodes are connected with edge if + % there is a segment which "covers" these offsets (i.e. it starts at first node's + % offset and ends at second node's offst) and weights are segments sizes adjusted + % for locality (i.e. weight are always 0 for any local segment). + case emqx_wdgraph:find_edge(Offset, End, Segs) of + {WeightWas, _Label} when WeightWas =< Weight -> + % NOTE + % Discarding any edges with higher weight here. This is fine as long as we + % optimize for locality. + Segs; + _ -> + emqx_wdgraph:insert_edge(Offset, End, Weight, Label, Segs) + end. + +coverage(Segs, Size) -> + case emqx_wdgraph:find_shortest_path(0, Size, Segs) of + Path when is_list(Path) -> + Path; + {false, LastOffset} -> + % NOTE + % This is far from being accurate, but needs no hairy specifics in the + % `emqx_wdgraph` interface. + {missing, {segment, LastOffset, Size}} + end. + +dominant(Coverage) -> + % TODO: needs improvement, better defined _dominance_, maybe some score + Freqs = frequencies(fun({Node, Segment}) -> {Node, segsize(Segment)} end, Coverage), + maxfreq(Freqs, node()). + +frequencies(Fun, List) -> + lists:foldl( + fun(E, Acc) -> + {K, N} = Fun(E), + maps:update_with(K, fun(M) -> M + N end, N, Acc) + end, + #{}, + List + ). + +maxfreq(Freqs, Init) -> + {_, Max} = maps:fold( + fun + (F, N, {M, _MF}) when N > M -> {N, F}; + (_F, _N, {M, MF}) -> {M, MF} + end, + {0, Init}, + Freqs + ), + Max. + +locality(Node) when Node =:= node() -> + % NOTE + % This should prioritize locally available segments over those on remote nodes. + 0; +locality(_RemoteNode) -> + 1. + +segsize(#{fragment := {segment, Info}}) -> + maps:get(size, Info). + +-ifdef(TEST). + +-include_lib("eunit/include/eunit.hrl"). + +incomplete_new_test() -> + ?assertEqual( + {incomplete, {missing, filemeta}}, + status(update(new(42))) + ). + +incomplete_test() -> + ?assertEqual( + {incomplete, {missing, filemeta}}, + status( + update( + append(new(142), node(), [ + segment(p1, 0, 42), + segment(p1, 42, 100) + ]) + ) + ) + ). + +consistent_test() -> + Asm1 = append(new(42), n1, [filemeta(m1, "blarg")]), + Asm2 = append(Asm1, n2, [segment(s2, 0, 42)]), + Asm3 = append(Asm2, n3, [filemeta(m3, "blarg")]), + ?assertMatch({complete, _}, status(meta, Asm3)). + +inconsistent_test() -> + Asm1 = append(new(42), node(), [segment(s1, 0, 42)]), + Asm2 = append(Asm1, n1, [filemeta(m1, "blarg")]), + Asm3 = append(Asm2, n2, [segment(s2, 0, 42), filemeta(m1, "blorg")]), + Asm4 = append(Asm3, n3, [filemeta(m3, "blarg")]), + ?assertMatch( + {error, + {inconsistent, [ + % blarg < blorg + #{node := n3, path := m3, fragment := {filemeta, #{name := "blarg"}}}, + #{node := n2, path := m1, fragment := {filemeta, #{name := "blorg"}}} + ]}}, + status(meta, Asm4) + ). + +simple_coverage_test() -> + Node = node(), + Segs = [ + {node42, segment(n1, 20, 30)}, + {Node, segment(n2, 0, 10)}, + {Node, segment(n3, 50, 50)}, + {Node, segment(n4, 10, 10)} + ], + Asm = append_many(new(100), Segs), + ?assertMatch( + {complete, + [ + {Node, #{path := n2}}, + {Node, #{path := n4}}, + {node42, #{path := n1}}, + {Node, #{path := n3}} + ], + #{dominant := Node}}, + status(coverage, Asm) + ). + +redundant_coverage_test() -> + Node = node(), + Segs = [ + {Node, segment(n1, 0, 20)}, + {node1, segment(n2, 0, 10)}, + {Node, segment(n3, 20, 40)}, + {node2, segment(n4, 10, 10)}, + {node2, segment(n5, 50, 20)}, + {node3, segment(n6, 20, 20)}, + {Node, segment(n7, 50, 10)}, + {node1, segment(n8, 40, 10)} + ], + Asm = append_many(new(70), Segs), + ?assertMatch( + {complete, + [ + {Node, #{path := n1}}, + {node3, #{path := n6}}, + {node1, #{path := n8}}, + {node2, #{path := n5}} + ], + #{dominant := _}}, + status(coverage, Asm) + ). + +redundant_coverage_prefer_local_test() -> + Node = node(), + Segs = [ + {node1, segment(n1, 0, 20)}, + {Node, segment(n2, 0, 10)}, + {Node, segment(n3, 10, 10)}, + {node2, segment(n4, 20, 20)}, + {Node, segment(n5, 30, 10)}, + {Node, segment(n6, 20, 10)} + ], + Asm = append_many(new(40), Segs), + ?assertMatch( + {complete, + [ + {Node, #{path := n2}}, + {Node, #{path := n3}}, + {Node, #{path := n6}}, + {Node, #{path := n5}} + ], + #{dominant := Node}}, + status(coverage, Asm) + ). + +missing_coverage_test() -> + Node = node(), + Segs = [ + {Node, segment(n1, 0, 10)}, + {node1, segment(n3, 10, 20)}, + {Node, segment(n2, 0, 20)}, + {node2, segment(n4, 50, 50)}, + {Node, segment(n5, 40, 60)} + ], + Asm = append_many(new(100), Segs), + ?assertEqual( + % {incomplete, {missing, {segment, 30, 40}}} would be more accurate + {incomplete, {missing, {segment, 30, 100}}}, + status(coverage, Asm) + ). + +missing_end_coverage_test() -> + Node = node(), + Segs = [ + {Node, segment(n1, 0, 15)}, + {node1, segment(n3, 10, 10)} + ], + Asm = append_many(new(20), Segs), + ?assertEqual( + {incomplete, {missing, {segment, 15, 20}}}, + status(coverage, Asm) + ). + +missing_coverage_with_redudancy_test() -> + Segs = [ + {node(), segment(n1, 0, 10)}, + {node(), segment(n2, 0, 20)}, + {node42, segment(n3, 10, 20)}, + {node43, segment(n4, 10, 50)}, + {node(), segment(n5, 40, 60)} + ], + Asm = append_many(new(100), Segs), + ?assertEqual( + % {incomplete, {missing, {segment, 50, 60}}}, ??? + {incomplete, {missing, {segment, 60, 100}}}, + status(coverage, Asm) + ). + +append_many(Asm, List) -> + lists:foldl( + fun({Node, Frag}, Acc) -> append(Acc, Node, Frag) end, + Asm, + List + ). + +filemeta(Path, Name) -> + #{ + path => Path, + fragment => + {filemeta, #{ + name => Name + }} + }. + +segment(Path, Offset, Size) -> + #{ + path => Path, + fragment => + {segment, #{ + offset => Offset, + size => Size + }} + }. + +-endif. diff --git a/apps/emqx_ft/src/emqx_ft_conf.erl b/apps/emqx_ft/src/emqx_ft_conf.erl new file mode 100644 index 000000000..2e994925c --- /dev/null +++ b/apps/emqx_ft/src/emqx_ft_conf.erl @@ -0,0 +1,143 @@ +%%-------------------------------------------------------------------- +%% Copyright (c) 2023 EMQ Technologies Co., Ltd. All Rights Reserved. +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. +%%-------------------------------------------------------------------- + +%% @doc File Transfer configuration management module + +-module(emqx_ft_conf). + +-behaviour(emqx_config_handler). + +-include_lib("emqx/include/logger.hrl"). + +%% Accessors +-export([enabled/0]). +-export([storage/0]). +-export([gc_interval/1]). +-export([segments_ttl/1]). +-export([init_timeout/0]). +-export([store_segment_timeout/0]). +-export([assemble_timeout/0]). + +%% Load/Unload +-export([ + load/0, + unload/0 +]). + +%% callbacks for emqx_config_handler +-export([ + pre_config_update/3, + post_config_update/5 +]). + +-type milliseconds() :: non_neg_integer(). +-type seconds() :: non_neg_integer(). + +%%-------------------------------------------------------------------- +%% Accessors +%%-------------------------------------------------------------------- + +-spec enabled() -> boolean(). +enabled() -> + emqx_config:get([file_transfer, enable], false). + +-spec storage() -> emqx_config:config(). +storage() -> + emqx_config:get([file_transfer, storage]). + +-spec gc_interval(emqx_ft_storage_fs:storage()) -> + emqx_maybe:t(milliseconds()). +gc_interval(Storage) -> + emqx_utils_maps:deep_get([segments, gc, interval], Storage, undefined). + +-spec segments_ttl(emqx_ft_storage_fs:storage()) -> + emqx_maybe:t({_Min :: seconds(), _Max :: seconds()}). +segments_ttl(Storage) -> + Min = emqx_utils_maps:deep_get([segments, gc, minimum_segments_ttl], Storage, undefined), + Max = emqx_utils_maps:deep_get([segments, gc, maximum_segments_ttl], Storage, undefined), + case is_integer(Min) andalso is_integer(Max) of + true -> + {Min, Max}; + false -> + undefined + end. + +init_timeout() -> + emqx_config:get([file_transfer, init_timeout]). + +assemble_timeout() -> + emqx_config:get([file_transfer, assemble_timeout]). + +store_segment_timeout() -> + emqx_config:get([file_transfer, store_segment_timeout]). + +%%-------------------------------------------------------------------- +%% API +%%-------------------------------------------------------------------- + +-spec load() -> ok. +load() -> + ok = maybe_start(), + emqx_conf:add_handler([file_transfer], ?MODULE). + +-spec unload() -> ok. +unload() -> + ok = stop(), + emqx_conf:remove_handler([file_transfer]). + +%%-------------------------------------------------------------------- +%% emqx_config_handler callbacks +%%-------------------------------------------------------------------- + +-spec pre_config_update(list(atom()), emqx_config:update_request(), emqx_config:raw_config()) -> + {ok, emqx_config:update_request()} | {error, term()}. +pre_config_update(_, Req, _Config) -> + {ok, Req}. + +-spec post_config_update( + list(atom()), + emqx_config:update_request(), + emqx_config:config(), + emqx_config:config(), + emqx_config:app_envs() +) -> + ok | {ok, Result :: any()} | {error, Reason :: term()}. +post_config_update([file_transfer | _], _Req, NewConfig, OldConfig, _AppEnvs) -> + on_config_update(OldConfig, NewConfig). + +on_config_update(#{enable := false}, #{enable := false}) -> + ok; +on_config_update(#{enable := true, storage := OldStorage}, #{enable := false}) -> + ok = emqx_ft_storage:on_config_update(OldStorage, undefined), + ok = emqx_ft:unhook(); +on_config_update(#{enable := false}, #{enable := true, storage := NewStorage}) -> + ok = emqx_ft_storage:on_config_update(undefined, NewStorage), + ok = emqx_ft:hook(); +on_config_update(#{enable := true, storage := OldStorage}, #{enable := true, storage := NewStorage}) -> + ok = emqx_ft_storage:on_config_update(OldStorage, NewStorage). + +maybe_start() -> + case emqx_config:get([file_transfer]) of + #{enable := true, storage := Storage} -> + ok = emqx_ft_storage:on_config_update(undefined, Storage), + ok = emqx_ft:hook(); + _ -> + ok + end. + +stop() -> + ok = emqx_ft:unhook(), + ok = emqx_ft_storage:on_config_update(storage(), undefined). diff --git a/apps/emqx_ft/src/emqx_ft_fs_iterator.erl b/apps/emqx_ft/src/emqx_ft_fs_iterator.erl new file mode 100644 index 000000000..7a58c5b38 --- /dev/null +++ b/apps/emqx_ft/src/emqx_ft_fs_iterator.erl @@ -0,0 +1,235 @@ +%%-------------------------------------------------------------------- +%% Copyright (c) 2023 EMQ Technologies Co., Ltd. All Rights Reserved. +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. +%%-------------------------------------------------------------------- + +-module(emqx_ft_fs_iterator). + +-export([new/2]). +-export([next/1]). +-export([next_leaf/1]). + +-export([seek/3]). + +-export([fold/3]). +-export([fold_n/4]). + +-export_type([t/0]). +-export_type([glob/0]). +-export_type([pathstack/0]). + +-type root() :: file:name(). +-type glob() :: ['*' | globfun()]. +-type globfun() :: + fun((_Filename :: file:name()) -> boolean()) + | fun((_Filename :: file:name(), pathstack()) -> boolean()). + +% A path stack is a list of path components, in reverse order. +-type pathstack() :: [file:name(), ...]. + +-opaque t() :: #{ + root := root(), + queue := [_PathStack :: [file:name()]], + head := glob(), + stack := [{[pathstack()], glob()}] +}. + +-type entry() :: entry_leaf() | entry_node(). +-type entry_leaf() :: + {leaf, file:name(), file:file_info() | {error, file:posix()}, pathstack()}. +-type entry_node() :: + {node, file:name(), {error, file:posix()}, pathstack()}. + +-spec new(root(), glob()) -> + t(). +new(Root, Glob) -> + #{ + root => Root, + queue => [[]], + head => Glob, + stack => [] + }. + +-spec next(t()) -> + {entry(), t()} | none. +next(It = #{queue := [PathStack | Rest], head := []}) -> + {emit(PathStack, It), It#{queue => Rest}}; +next(It = #{queue := [PathStack | Rest], head := [Pat | _], root := Root}) -> + Filepath = mk_filepath(PathStack), + case emqx_ft_fs_util:list_dir(filename:join(Root, Filepath)) of + {ok, Filenames} -> + Sorted = lists:sort(Filenames), + Matches = [[Fn | PathStack] || Fn <- Sorted, matches_glob(Pat, Fn, [Fn | PathStack])], + ItNext = windup(It), + next(ItNext#{queue => Matches}); + {error, _} = Error -> + {{node, Filepath, Error, PathStack}, It#{queue => Rest}} + end; +next(It = #{queue := []}) -> + unwind(It). + +windup(It = #{queue := [_ | Rest], head := [Pat | Glob], stack := Stack}) -> + % NOTE + % Preserve unfinished paths and glob in the stack, so that we can resume traversal + % when the lower levels of the tree are exhausted. + It#{ + head => Glob, + stack => [{Rest, [Pat | Glob]} | Stack] + }. + +unwind(It = #{stack := [{Queue, Glob} | StackRest]}) -> + % NOTE + % Resume traversal of unfinished paths from the upper levels of the tree. + next(It#{ + queue => Queue, + head => Glob, + stack => StackRest + }); +unwind(#{stack := []}) -> + none. + +emit(PathStack, #{root := Root}) -> + Filepath = mk_filepath(PathStack), + case emqx_ft_fs_util:read_info(filename:join(Root, Filepath)) of + {ok, Fileinfo} -> + {leaf, Filepath, Fileinfo, PathStack}; + {error, _} = Error -> + {leaf, Filepath, Error, PathStack} + end. + +mk_filepath([]) -> + ""; +mk_filepath(PathStack) -> + filename:join(lists:reverse(PathStack)). + +matches_glob('*', _, _) -> + true; +matches_glob(FilterFun, Filename, _PathStack) when is_function(FilterFun, 1) -> + FilterFun(Filename); +matches_glob(FilterFun, Filename, PathStack) when is_function(FilterFun, 2) -> + FilterFun(Filename, PathStack). + +%% + +-spec next_leaf(t()) -> + {entry_leaf(), t()} | none. +next_leaf(It) -> + case next(It) of + {{leaf, _, _, _} = Leaf, ItNext} -> + {Leaf, ItNext}; + {{node, _Filename, _Error, _PathStack}, ItNext} -> + % NOTE + % Intentionally skipping intermediate traversal errors here, for simplicity. + next_leaf(ItNext); + none -> + none + end. + +%% + +-spec seek([file:name()], root(), glob()) -> + t(). +seek(PathSeek, Root, Glob) -> + SeekGlob = mk_seek_glob(PathSeek, Glob), + SeekStack = lists:reverse(PathSeek), + case next_leaf(new(Root, SeekGlob)) of + {{leaf, _Filepath, _Info, SeekStack}, It} -> + fixup_glob(Glob, It); + {{leaf, _Filepath, _Info, Successor}, It = #{queue := Queue}} -> + fixup_glob(Glob, It#{queue => [Successor | Queue]}); + none -> + none(Root) + end. + +mk_seek_glob(PathSeek, Glob) -> + % NOTE + % The seek glob is a glob that skips all the nodes / leaves that are lexicographically + % smaller than the seek path. For example, if the seek path is ["a", "b", "c"], and + % the glob is ['*', '*', '*', '*'], then the seek glob is: + % [ fun(Path) -> Path >= ["a"] end, + % fun(Path) -> Path >= ["a", "b"] end, + % fun(Path) -> Path >= ["a", "b", "c"] end, + % '*' + % ] + L = min(length(PathSeek), length(Glob)), + merge_glob([mk_seek_pat(lists:sublist(PathSeek, N)) || N <- lists:seq(1, L)], Glob). + +mk_seek_pat(PathSeek) -> + % NOTE + % The `PathStack` and `PathSeek` are of the same length here. + fun(_Filename, PathStack) -> lists:reverse(PathStack) >= PathSeek end. + +merge_glob([Pat | SeekRest], [PatOrig | Rest]) -> + [merge_pat(Pat, PatOrig) | merge_glob(SeekRest, Rest)]; +merge_glob([], [PatOrig | Rest]) -> + [PatOrig | merge_glob([], Rest)]; +merge_glob([], []) -> + []. + +merge_pat(Pat, PatOrig) -> + fun(Filename, PathStack) -> + Pat(Filename, PathStack) andalso matches_glob(PatOrig, Filename, PathStack) + end. + +fixup_glob(Glob, It = #{head := [], stack := Stack}) -> + % NOTE + % Restoring original glob through the stack. Strictly speaking, this is not usually + % necessary, it's a kind of optimization. + fixup_glob(Glob, lists:reverse(Stack), It#{stack => []}). + +fixup_glob(Glob = [_ | Rest], [{Queue, _} | StackRest], It = #{stack := Stack}) -> + fixup_glob(Rest, StackRest, It#{stack => [{Queue, Glob} | Stack]}); +fixup_glob(Rest, [], It) -> + It#{head => Rest}. + +%% + +-spec fold(fun((entry(), Acc) -> Acc), Acc, t()) -> + Acc. +fold(FoldFun, Acc, It) -> + case next(It) of + {Entry, ItNext} -> + fold(FoldFun, FoldFun(Entry, Acc), ItNext); + none -> + Acc + end. + +%% NOTE +%% Passing negative `N` is allowed, in which case the iterator will be exhausted +%% completely, like in `fold/3`. +-spec fold_n(fun((entry(), Acc) -> Acc), Acc, t(), _N :: integer()) -> + {Acc, {more, t()} | none}. +fold_n(_FoldFun, Acc, It, 0) -> + {Acc, {more, It}}; +fold_n(FoldFun, Acc, It, N) -> + case next(It) of + {Entry, ItNext} -> + fold_n(FoldFun, FoldFun(Entry, Acc), ItNext, N - 1); + none -> + {Acc, none} + end. + +%% + +-spec none(root()) -> + t(). +none(Root) -> + % NOTE + % The _none_ iterator is a valid iterator, but it will never yield any entries. + #{ + root => Root, + queue => [], + head => [], + stack => [] + }. diff --git a/apps/emqx_ft/src/emqx_ft_fs_util.erl b/apps/emqx_ft/src/emqx_ft_fs_util.erl new file mode 100644 index 000000000..9028722aa --- /dev/null +++ b/apps/emqx_ft/src/emqx_ft_fs_util.erl @@ -0,0 +1,180 @@ +%%-------------------------------------------------------------------- +%% Copyright (c) 2023 EMQ Technologies Co., Ltd. All Rights Reserved. +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. +%%-------------------------------------------------------------------- + +-module(emqx_ft_fs_util). + +-include_lib("snabbkaffe/include/trace.hrl"). +-include_lib("kernel/include/file.hrl"). + +-export([is_filename_safe/1]). +-export([escape_filename/1]). +-export([unescape_filename/1]). + +-export([read_decode_file/2]). +-export([read_info/1]). +-export([list_dir/1]). + +-export([fold/4]). + +-type foldfun(Acc) :: + fun( + ( + _Filepath :: file:name(), + _Info :: file:file_info() | {error, file:posix()}, + _Stack :: emqx_ft_fs_iterator:pathstack(), + Acc + ) -> Acc + ). + +-define(IS_UNSAFE(C), + ((C) =:= $% orelse + (C) =:= $: orelse + (C) =:= $\\ orelse + (C) =:= $/) +). + +-define(IS_PRINTABLE(C), + % NOTE: See `io_lib:printable_unicode_list/1` + (((C) >= 32 andalso (C) =< 126) orelse + ((C) >= 16#A0 andalso (C) < 16#D800) orelse + ((C) > 16#DFFF andalso (C) < 16#FFFE) orelse + ((C) > 16#FFFF andalso (C) =< 16#10FFFF)) +). + +%% + +-spec is_filename_safe(file:filename_all()) -> ok | {error, atom()}. +is_filename_safe(FN) when is_binary(FN) -> + is_filename_safe(unicode:characters_to_list(FN)); +is_filename_safe("") -> + {error, empty}; +is_filename_safe(FN) when FN == "." orelse FN == ".." -> + {error, special}; +is_filename_safe(FN) -> + verify_filename_safe(FN). + +verify_filename_safe([$% | Rest]) -> + verify_filename_safe(Rest); +verify_filename_safe([C | _]) when ?IS_UNSAFE(C) -> + {error, unsafe}; +verify_filename_safe([C | _]) when not ?IS_PRINTABLE(C) -> + {error, nonprintable}; +verify_filename_safe([_ | Rest]) -> + verify_filename_safe(Rest); +verify_filename_safe([]) -> + ok. + +-spec escape_filename(binary()) -> file:name(). +escape_filename(Name) when Name == <<".">> orelse Name == <<"..">> -> + lists:reverse(percent_encode(Name, "")); +escape_filename(Name) -> + escape(Name, ""). + +escape(<>, Acc) when ?IS_UNSAFE(C) -> + escape(Rest, percent_encode(<>, Acc)); +escape(<>, Acc) when not ?IS_PRINTABLE(C) -> + escape(Rest, percent_encode(<>, Acc)); +escape(<>, Acc) -> + escape(Rest, [C | Acc]); +escape(<<>>, Acc) -> + lists:reverse(Acc). + +-spec unescape_filename(file:name()) -> binary(). +unescape_filename(Name) -> + unescape(Name, <<>>). + +unescape([$%, A, B | Rest], Acc) -> + unescape(Rest, percent_decode(A, B, Acc)); +unescape([C | Rest], Acc) -> + unescape(Rest, <>); +unescape([], Acc) -> + Acc. + +percent_encode(<>, Acc) -> + percent_encode(Rest, [dec2hex(B), dec2hex(A), $% | Acc]); +percent_encode(<<>>, Acc) -> + Acc. + +percent_decode(A, B, Acc) -> + <>. + +dec2hex(X) when (X >= 0) andalso (X =< 9) -> X + $0; +dec2hex(X) when (X >= 10) andalso (X =< 15) -> X + $A - 10. + +hex2dec(X) when (X >= $0) andalso (X =< $9) -> X - $0; +hex2dec(X) when (X >= $A) andalso (X =< $F) -> X - $A + 10; +hex2dec(X) when (X >= $a) andalso (X =< $f) -> X - $a + 10; +hex2dec(_) -> error(badarg). + +%% + +-spec read_decode_file(file:name(), fun((binary()) -> Value)) -> + {ok, Value} | {error, _IoError}. +read_decode_file(Filepath, DecodeFun) -> + case file:read_file(Filepath) of + {ok, Content} -> + safe_decode(Content, DecodeFun); + {error, _} = Error -> + Error + end. + +safe_decode(Content, DecodeFun) -> + try + {ok, DecodeFun(Content)} + catch + C:E:Stacktrace -> + ?tp(warning, "safe_decode_failed", #{ + class => C, + exception => E, + stacktrace => Stacktrace + }), + {error, corrupted} + end. + +-spec read_info(file:name_all()) -> + {ok, file:file_info()} | {error, file:posix() | badarg}. +read_info(AbsPath) -> + % NOTE + % Be aware that this function is occasionally mocked in `emqx_ft_fs_util_SUITE`. + file:read_link_info(AbsPath, [{time, posix}, raw]). + +-spec list_dir(file:name_all()) -> + {ok, [file:name()]} | {error, file:posix() | badarg}. +list_dir(AbsPath) -> + case ?MODULE:read_info(AbsPath) of + {ok, #file_info{type = directory}} -> + file:list_dir(AbsPath); + {ok, #file_info{}} -> + {error, enotdir}; + {error, Reason} -> + {error, Reason} + end. + +-spec fold(foldfun(Acc), Acc, _Root :: file:name(), emqx_ft_fs_iterator:glob()) -> + Acc. +fold(FoldFun, Acc, Root, Glob) -> + fold(FoldFun, Acc, emqx_ft_fs_iterator:new(Root, Glob)). + +fold(FoldFun, Acc, It) -> + case emqx_ft_fs_iterator:next(It) of + {{node, _Path, {error, enotdir}, _PathStack}, ItNext} -> + fold(FoldFun, Acc, ItNext); + {{_Type, Path, Info, PathStack}, ItNext} -> + AccNext = FoldFun(Path, Info, PathStack, Acc), + fold(FoldFun, AccNext, ItNext); + none -> + Acc + end. diff --git a/apps/emqx_ft/src/emqx_ft_responder.erl b/apps/emqx_ft/src/emqx_ft_responder.erl new file mode 100644 index 000000000..c2c62e1c2 --- /dev/null +++ b/apps/emqx_ft/src/emqx_ft_responder.erl @@ -0,0 +1,116 @@ +%%-------------------------------------------------------------------- +%% Copyright (c) 2023 EMQ Technologies Co., Ltd. All Rights Reserved. +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. +%%-------------------------------------------------------------------- + +-module(emqx_ft_responder). + +-behaviour(gen_server). + +-include_lib("emqx/include/logger.hrl"). +-include_lib("emqx/include/types.hrl"). + +-include_lib("snabbkaffe/include/snabbkaffe.hrl"). + +%% API +-export([start/3]). +-export([kickoff/2]). +-export([ack/2]). + +%% Supervisor API +-export([start_link/3]). + +-export([init/1, handle_call/3, handle_cast/2, handle_info/2, terminate/2]). + +-define(REF(Key), {via, gproc, {n, l, {?MODULE, Key}}}). + +-type key() :: term(). +-type respfun() :: fun(({ack, _Result} | {down, _Result} | timeout) -> _SideEffect). + +%%-------------------------------------------------------------------- +%% API +%% ------------------------------------------------------------------- + +-spec start(key(), respfun(), timeout()) -> startlink_ret(). +start(Key, RespFun, Timeout) -> + emqx_ft_responder_sup:start_child(Key, RespFun, Timeout). + +-spec kickoff(key(), pid()) -> ok. +kickoff(Key, Pid) -> + gen_server:call(?REF(Key), {kickoff, Pid}). + +-spec ack(key(), _Result) -> _Return. +ack(Key, Result) -> + % TODO: it's possible to avoid term copy + gen_server:call(?REF(Key), {ack, Result}, infinity). + +-spec start_link(key(), timeout(), respfun()) -> startlink_ret(). +start_link(Key, RespFun, Timeout) -> + gen_server:start_link(?REF(Key), ?MODULE, {Key, RespFun, Timeout}, []). + +%%-------------------------------------------------------------------- +%% gen_server callbacks +%% ------------------------------------------------------------------- + +init({Key, RespFun, Timeout}) -> + _ = erlang:process_flag(trap_exit, true), + _TRef = erlang:send_after(Timeout, self(), timeout), + {ok, {Key, RespFun}}. + +handle_call({kickoff, Pid}, _From, St) -> + % TODO: more state? + _MRef = erlang:monitor(process, Pid), + _ = Pid ! kickoff, + {reply, ok, St}; +handle_call({ack, Result}, _From, {Key, RespFun}) -> + Ret = apply(RespFun, [{ack, Result}]), + ?tp(debug, ft_responder_ack, #{key => Key, result => Result, return => Ret}), + {stop, {shutdown, Ret}, Ret, undefined}; +handle_call(Msg, _From, State) -> + ?SLOG(warning, #{msg => "unknown_call", call_msg => Msg}), + {reply, {error, unknown_call}, State}. + +handle_cast(Msg, State) -> + ?SLOG(warning, #{msg => "unknown_cast", cast_msg => Msg}), + {noreply, State}. + +handle_info(timeout, {Key, RespFun}) -> + Ret = apply(RespFun, [timeout]), + ?tp(debug, ft_responder_timeout, #{key => Key, return => Ret}), + {stop, {shutdown, Ret}, undefined}; +handle_info({'DOWN', _MRef, process, _Pid, Reason}, {Key, RespFun}) -> + Ret = apply(RespFun, [{down, map_down_reason(Reason)}]), + ?tp(debug, ft_responder_procdown, #{key => Key, reason => Reason, return => Ret}), + {stop, {shutdown, Ret}, undefined}; +handle_info(Msg, State) -> + ?SLOG(warning, #{msg => "unknown_message", info_msg => Msg}), + {noreply, State}. + +terminate(_Reason, undefined) -> + ok; +terminate(Reason, {Key, RespFun}) -> + Ret = apply(RespFun, [timeout]), + ?tp(debug, ft_responder_shutdown, #{key => Key, reason => Reason, return => Ret}), + ok. + +map_down_reason(normal) -> + ok; +map_down_reason(shutdown) -> + ok; +map_down_reason({shutdown, Result}) -> + Result; +map_down_reason(noproc) -> + {error, noproc}; +map_down_reason(Error) -> + {error, {internal_error, Error}}. diff --git a/apps/emqx_ft/src/emqx_ft_responder_sup.erl b/apps/emqx_ft/src/emqx_ft_responder_sup.erl new file mode 100644 index 000000000..fb3932425 --- /dev/null +++ b/apps/emqx_ft/src/emqx_ft_responder_sup.erl @@ -0,0 +1,48 @@ +%%-------------------------------------------------------------------- +%% Copyright (c) 2023 EMQ Technologies Co., Ltd. All Rights Reserved. +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. +%%-------------------------------------------------------------------- + +-module(emqx_ft_responder_sup). + +-export([start_link/0]). +-export([start_child/3]). + +-behaviour(supervisor). +-export([init/1]). + +-define(SUPERVISOR, ?MODULE). + +%% + +-spec start_link() -> {ok, pid()}. +start_link() -> + supervisor:start_link({local, ?SUPERVISOR}, ?MODULE, []). + +start_child(Key, RespFun, Timeout) -> + supervisor:start_child(?SUPERVISOR, [Key, RespFun, Timeout]). + +-spec init(_) -> {ok, {supervisor:sup_flags(), [supervisor:child_spec()]}}. +init(_) -> + Flags = #{ + strategy => simple_one_for_one, + intensity => 100, + period => 100 + }, + ChildSpec = #{ + id => responder, + start => {emqx_ft_responder, start_link, []}, + restart => temporary + }, + {ok, {Flags, [ChildSpec]}}. diff --git a/apps/emqx_ft/src/emqx_ft_schema.erl b/apps/emqx_ft/src/emqx_ft_schema.erl new file mode 100644 index 000000000..09e9ab0a5 --- /dev/null +++ b/apps/emqx_ft/src/emqx_ft_schema.erl @@ -0,0 +1,317 @@ +%%-------------------------------------------------------------------- +%% Copyright (c) 2023 EMQ Technologies Co., Ltd. All Rights Reserved. +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. +%%-------------------------------------------------------------------- + +-module(emqx_ft_schema). + +-behaviour(hocon_schema). + +-include_lib("hocon/include/hoconsc.hrl"). +-include_lib("typerefl/include/types.hrl"). + +-export([namespace/0, roots/0, fields/1, tags/0, desc/1]). + +-export([schema/1]). + +-export([translate/1]). + +-type json_value() :: + null + | boolean() + | binary() + | number() + | [json_value()] + | #{binary() => json_value()}. + +-reflect_type([json_value/0]). + +%% NOTE +%% This is rather conservative limit, mostly dictated by the filename limitations +%% on most filesystems. Even though, say, S3 does not have such limitations, it's +%% still useful to have a limit on the filename length, to avoid having to deal with +%% limits in the storage backends. +-define(MAX_FILENAME_BYTELEN, 255). + +-import(hoconsc, [ref/2, mk/2]). + +namespace() -> file_transfer. + +tags() -> + [<<"File Transfer">>]. + +roots() -> [file_transfer]. + +fields(file_transfer) -> + [ + {enable, + mk( + boolean(), + #{ + desc => ?DESC("enable"), + required => false, + default => false + } + )}, + {init_timeout, + mk( + emqx_schema:duration_ms(), + #{ + desc => ?DESC("init_timeout"), + required => false, + default => "10s" + } + )}, + {store_segment_timeout, + mk( + emqx_schema:duration_ms(), + #{ + desc => ?DESC("store_segment_timeout"), + required => false, + default => "5m" + } + )}, + {assemble_timeout, + mk( + emqx_schema:duration_ms(), + #{ + desc => ?DESC("assemble_timeout"), + required => false, + default => "5m" + } + )}, + {storage, + mk( + ref(storage_backend), + #{ + desc => ?DESC("storage_backend"), + required => false, + validator => validator(backend), + default => #{ + <<"local">> => #{} + } + } + )} + ]; +fields(storage_backend) -> + [ + {local, + mk( + ref(local_storage), + #{ + desc => ?DESC("local_storage"), + required => {false, recursively} + } + )} + ]; +fields(local_storage) -> + [ + {segments, + mk( + ref(local_storage_segments), + #{ + desc => ?DESC("local_storage_segments"), + required => false, + default => #{ + <<"gc">> => #{} + } + } + )}, + {exporter, + mk( + ref(local_storage_exporter_backend), + #{ + desc => ?DESC("local_storage_exporter_backend"), + required => false, + validator => validator(backend), + default => #{ + <<"local">> => #{} + } + } + )} + ]; +fields(local_storage_segments) -> + [ + {root, + mk( + binary(), + #{ + desc => ?DESC("local_storage_segments_root"), + required => false + } + )}, + {gc, + mk( + ref(local_storage_segments_gc), #{ + desc => ?DESC("local_storage_segments_gc"), + required => false + } + )} + ]; +fields(local_storage_exporter_backend) -> + [ + {local, + mk( + ref(local_storage_exporter), + #{ + desc => ?DESC("local_storage_exporter"), + required => {false, recursively} + } + )}, + {s3, + mk( + ref(s3_exporter), + #{ + desc => ?DESC("s3_exporter"), + required => {false, recursively} + } + )} + ]; +fields(local_storage_exporter) -> + [ + {root, + mk( + binary(), + #{ + desc => ?DESC("local_storage_exporter_root"), + required => false + } + )} + ]; +fields(s3_exporter) -> + emqx_s3_schema:fields(s3); +fields(local_storage_segments_gc) -> + [ + {interval, + mk( + emqx_schema:duration_ms(), + #{ + desc => ?DESC("storage_gc_interval"), + required => false, + default => "1h" + } + )}, + {maximum_segments_ttl, + mk( + emqx_schema:duration_s(), + #{ + desc => ?DESC("storage_gc_max_segments_ttl"), + required => false, + default => "24h" + } + )}, + {minimum_segments_ttl, + mk( + emqx_schema:duration_s(), + #{ + desc => ?DESC("storage_gc_min_segments_ttl"), + required => false, + default => "5m", + % NOTE + % This setting does not seem to be useful to an end-user. + hidden => true + } + )} + ]. + +desc(file_transfer) -> + "File transfer settings"; +desc(local_storage) -> + "File transfer local storage settings"; +desc(local_storage_segments) -> + "File transfer local segments storage settings"; +desc(local_storage_exporter) -> + "Local Exporter settings for the File transfer local storage backend"; +desc(s3_exporter) -> + "S3 Exporter settings for the File transfer local storage backend"; +desc(local_storage_segments_gc) -> + "Garbage collection settings for the File transfer local segments storage"; +desc(local_storage_exporter_backend) -> + "Exporter for the local file system storage backend"; +desc(storage_backend) -> + "Storage backend settings for file transfer"; +desc(_) -> + undefined. + +schema(filemeta) -> + #{ + roots => [ + {name, + hoconsc:mk(string(), #{ + required => true, + validator => validator(filename), + converter => converter(unicode_string) + })}, + {size, hoconsc:mk(non_neg_integer())}, + {expire_at, hoconsc:mk(non_neg_integer())}, + {checksum, hoconsc:mk({atom(), binary()}, #{converter => converter(checksum)})}, + {segments_ttl, hoconsc:mk(pos_integer())}, + {user_data, hoconsc:mk(json_value())} + ] + }. + +validator(filename) -> + [ + fun(Value) -> + Bin = unicode:characters_to_binary(Value), + byte_size(Bin) =< ?MAX_FILENAME_BYTELEN orelse {error, max_length_exceeded} + end, + fun emqx_ft_fs_util:is_filename_safe/1 + ]; +validator(backend) -> + fun(Config) -> + case maps:keys(Config) of + [_Type] -> + ok; + _Conflicts = [_ | _] -> + {error, multiple_conflicting_backends} + end + end. + +converter(checksum) -> + fun + (undefined, #{}) -> + undefined; + ({sha256, Bin}, #{make_serializable := true}) -> + _ = is_binary(Bin) orelse throw({expected_type, string}), + _ = byte_size(Bin) =:= 32 orelse throw({expected_length, 32}), + binary:encode_hex(Bin); + (Hex, #{}) -> + _ = is_binary(Hex) orelse throw({expected_type, string}), + _ = byte_size(Hex) =:= 64 orelse throw({expected_length, 64}), + {sha256, binary:decode_hex(Hex)} + end; +converter(unicode_string) -> + fun + (undefined, #{}) -> + undefined; + (Str, #{make_serializable := true}) -> + _ = is_list(Str) orelse throw({expected_type, string}), + unicode:characters_to_binary(Str); + (Str, #{}) -> + _ = is_binary(Str) orelse throw({expected_type, string}), + unicode:characters_to_list(Str) + end. + +ref(Ref) -> + ref(?MODULE, Ref). + +translate(Conf) -> + [Root] = roots(), + maps:get( + Root, + hocon_tconf:check_plain( + ?MODULE, #{atom_to_binary(Root) => Conf}, #{atom_key => true}, [Root] + ) + ). diff --git a/apps/emqx_ft/src/emqx_ft_storage.erl b/apps/emqx_ft/src/emqx_ft_storage.erl new file mode 100644 index 000000000..4e1060d88 --- /dev/null +++ b/apps/emqx_ft/src/emqx_ft_storage.erl @@ -0,0 +1,195 @@ +%%-------------------------------------------------------------------- +%% Copyright (c) 2023 EMQ Technologies Co., Ltd. All Rights Reserved. +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. +%%-------------------------------------------------------------------- + +-module(emqx_ft_storage). + +-export( + [ + store_filemeta/2, + store_segment/2, + assemble/2, + + files/0, + files/1, + + with_storage_type/2, + with_storage_type/3, + + backend/0, + on_config_update/2 + ] +). + +-type type() :: local. +-type backend() :: {type(), storage()}. +-type storage() :: config(). +-type config() :: emqx_config:config(). + +-export_type([backend/0]). + +-export_type([assemble_callback/0]). + +-export_type([query/1]). +-export_type([page/2]). +-export_type([file_info/0]). +-export_type([export_data/0]). +-export_type([reader/0]). + +-type assemble_callback() :: fun((ok | {error, term()}) -> any()). + +-type query(Cursor) :: + #{transfer => emqx_ft:transfer()} + | #{ + limit => non_neg_integer(), + following => Cursor + }. + +-type page(Item, Cursor) :: #{ + items := [Item], + cursor => Cursor +}. + +-type file_info() :: #{ + transfer := emqx_ft:transfer(), + name := file:name(), + size := _Bytes :: non_neg_integer(), + timestamp := emqx_datetime:epoch_second(), + uri => uri_string:uri_string(), + meta => emqx_ft:filemeta() +}. + +-type export_data() :: binary() | qlc:query_handle(). +-type reader() :: pid(). + +%%-------------------------------------------------------------------- +%% Behaviour +%%-------------------------------------------------------------------- + +%% NOTE +%% An async task will wait for a `kickoff` message to start processing, to give some time +%% to set up monitors, etc. Async task will not explicitly report the processing result, +%% you are expected to receive and handle exit reason of the process, which is +%% -type result() :: `{shutdown, ok | {error, _}}`. + +-callback store_filemeta(storage(), emqx_ft:transfer(), emqx_ft:filemeta()) -> + ok | {async, pid()} | {error, term()}. +-callback store_segment(storage(), emqx_ft:transfer(), emqx_ft:segment()) -> + ok | {async, pid()} | {error, term()}. +-callback assemble(storage(), emqx_ft:transfer(), _Size :: emqx_ft:bytes()) -> + ok | {async, pid()} | {error, term()}. + +-callback files(storage(), query(Cursor)) -> + {ok, page(file_info(), Cursor)} | {error, term()}. + +-callback start(emqx_config:config()) -> any(). +-callback stop(emqx_config:config()) -> any(). + +-callback on_config_update(_OldConfig :: emqx_config:config(), _NewConfig :: emqx_config:config()) -> + any(). + +%%-------------------------------------------------------------------- +%% API +%%-------------------------------------------------------------------- + +-spec store_filemeta(emqx_ft:transfer(), emqx_ft:filemeta()) -> + ok | {async, pid()} | {error, term()}. +store_filemeta(Transfer, FileMeta) -> + dispatch(store_filemeta, [Transfer, FileMeta]). + +-spec store_segment(emqx_ft:transfer(), emqx_ft:segment()) -> + ok | {async, pid()} | {error, term()}. +store_segment(Transfer, Segment) -> + dispatch(store_segment, [Transfer, Segment]). + +-spec assemble(emqx_ft:transfer(), emqx_ft:bytes()) -> + ok | {async, pid()} | {error, term()}. +assemble(Transfer, Size) -> + dispatch(assemble, [Transfer, Size]). + +-spec files() -> + {ok, page(file_info(), _)} | {error, term()}. +files() -> + files(#{}). + +-spec files(query(Cursor)) -> + {ok, page(file_info(), Cursor)} | {error, term()}. +files(Query) -> + dispatch(files, [Query]). + +-spec dispatch(atom(), list(term())) -> any(). +dispatch(Fun, Args) when is_atom(Fun) -> + {Type, Storage} = backend(), + apply(mod(Type), Fun, [Storage | Args]). + +%% + +-spec with_storage_type(atom(), atom() | function()) -> any(). +with_storage_type(Type, Fun) -> + with_storage_type(Type, Fun, []). + +-spec with_storage_type(atom(), atom() | function(), list(term())) -> any(). +with_storage_type(Type, Fun, Args) -> + case backend() of + {Type, Storage} when is_atom(Fun) -> + apply(mod(Type), Fun, [Storage | Args]); + {Type, Storage} when is_function(Fun) -> + apply(Fun, [Storage | Args]); + {_, _} = Backend -> + {error, {invalid_storage_backend, Backend}} + end. + +%% + +-spec backend() -> backend(). +backend() -> + backend(emqx_ft_conf:storage()). + +-spec on_config_update(_Old :: emqx_maybe:t(config()), _New :: emqx_maybe:t(config())) -> + ok. +on_config_update(ConfigOld, ConfigNew) -> + on_backend_update( + emqx_maybe:apply(fun backend/1, ConfigOld), + emqx_maybe:apply(fun backend/1, ConfigNew) + ). + +on_backend_update({Type, _} = Backend, {Type, _} = Backend) -> + ok; +on_backend_update({Type, StorageOld}, {Type, StorageNew}) -> + ok = (mod(Type)):on_config_update(StorageOld, StorageNew); +on_backend_update(BackendOld, BackendNew) when + (BackendOld =:= undefined orelse is_tuple(BackendOld)) andalso + (BackendNew =:= undefined orelse is_tuple(BackendNew)) +-> + _ = emqx_maybe:apply(fun on_storage_stop/1, BackendOld), + _ = emqx_maybe:apply(fun on_storage_start/1, BackendNew), + ok. + +%%-------------------------------------------------------------------- +%% Local API +%%-------------------------------------------------------------------- + +-spec backend(config()) -> backend(). +backend(#{local := Storage}) -> + {local, Storage}. + +on_storage_start({Type, Storage}) -> + (mod(Type)):start(Storage). + +on_storage_stop({Type, Storage}) -> + (mod(Type)):stop(Storage). + +mod(local) -> + emqx_ft_storage_fs. diff --git a/apps/emqx_ft/src/emqx_ft_storage_exporter.erl b/apps/emqx_ft/src/emqx_ft_storage_exporter.erl new file mode 100644 index 000000000..591173615 --- /dev/null +++ b/apps/emqx_ft/src/emqx_ft_storage_exporter.erl @@ -0,0 +1,195 @@ +%%-------------------------------------------------------------------- +%% Copyright (c) 2023 EMQ Technologies Co., Ltd. All Rights Reserved. +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. +%%-------------------------------------------------------------------- + +%% Filesystem storage exporter +%% +%% This is conceptually a part of the Filesystem storage backend that defines +%% how and where complete transfers are assembled into files and stored. + +-module(emqx_ft_storage_exporter). + +%% Export API +-export([start_export/3]). +-export([write/2]). +-export([complete/1]). +-export([discard/1]). + +%% Listing API +-export([list/2]). + +%% Lifecycle API +-export([on_config_update/2]). + +%% Internal API +-export([exporter/1]). + +-export_type([export/0]). + +-type storage() :: emxt_ft_storage_fs:storage(). +-type transfer() :: emqx_ft:transfer(). +-type filemeta() :: emqx_ft:filemeta(). +-type checksum() :: emqx_ft:checksum(). + +-type exporter_conf() :: map(). +-type export_st() :: term(). +-type hash_state() :: term(). +-opaque export() :: #{ + mod := module(), + st := export_st(), + hash := hash_state(), + filemeta := filemeta() +}. + +%%------------------------------------------------------------------------------ +%% Behaviour +%%------------------------------------------------------------------------------ + +-callback start_export(exporter_conf(), transfer(), filemeta()) -> + {ok, export_st()} | {error, _Reason}. + +%% Exprter must discard the export itself in case of error +-callback write(ExportSt :: export_st(), iodata()) -> + {ok, ExportSt :: export_st()} | {error, _Reason}. + +-callback complete(_ExportSt :: export_st(), _Checksum :: checksum()) -> + ok | {error, _Reason}. + +-callback discard(ExportSt :: export_st()) -> + ok | {error, _Reason}. + +-callback list(exporter_conf(), emqx_ft_storage:query(Cursor)) -> + {ok, emqx_ft_storage:page(emqx_ft_storage:file_info(), Cursor)} | {error, _Reason}. + +%% Lifecycle callbacks + +-callback start(exporter_conf()) -> + ok | {error, _Reason}. + +-callback stop(exporter_conf()) -> + ok. + +-callback update(exporter_conf(), exporter_conf()) -> + ok | {error, _Reason}. + +%%------------------------------------------------------------------------------ +%% API +%%------------------------------------------------------------------------------ + +-spec start_export(storage(), transfer(), filemeta()) -> + {ok, export()} | {error, _Reason}. +start_export(Storage, Transfer, Filemeta) -> + {ExporterMod, ExporterConf} = exporter(Storage), + case ExporterMod:start_export(ExporterConf, Transfer, Filemeta) of + {ok, ExportSt} -> + {ok, #{ + mod => ExporterMod, + st => ExportSt, + hash => init_checksum(Filemeta), + filemeta => Filemeta + }}; + {error, _} = Error -> + Error + end. + +-spec write(export(), iodata()) -> + {ok, export()} | {error, _Reason}. +write(#{mod := ExporterMod, st := ExportSt, hash := Hash} = Export, Content) -> + case ExporterMod:write(ExportSt, Content) of + {ok, ExportStNext} -> + {ok, Export#{ + st := ExportStNext, + hash := update_checksum(Hash, Content) + }}; + {error, _} = Error -> + Error + end. + +-spec complete(export()) -> + ok | {error, _Reason}. +complete(#{mod := ExporterMod, st := ExportSt, hash := Hash, filemeta := Filemeta}) -> + case verify_checksum(Hash, Filemeta) of + {ok, Checksum} -> + ExporterMod:complete(ExportSt, Checksum); + {error, _} = Error -> + _ = ExporterMod:discard(ExportSt), + Error + end. + +-spec discard(export()) -> + ok | {error, _Reason}. +discard(#{mod := ExporterMod, st := ExportSt}) -> + ExporterMod:discard(ExportSt). + +-spec list(storage(), emqx_ft_storage:query(Cursor)) -> + {ok, emqx_ft_storage:page(emqx_ft_storage:file_info(), Cursor)} | {error, _Reason}. +list(Storage, Query) -> + {ExporterMod, ExporterOpts} = exporter(Storage), + ExporterMod:list(ExporterOpts, Query). + +%% Lifecycle + +-spec on_config_update(storage(), storage()) -> ok | {error, term()}. +on_config_update(StorageOld, StorageNew) -> + on_exporter_update( + emqx_maybe:apply(fun exporter/1, StorageOld), + emqx_maybe:apply(fun exporter/1, StorageNew) + ). + +on_exporter_update(Config, Config) -> + ok; +on_exporter_update({ExporterMod, ConfigOld}, {ExporterMod, ConfigNew}) -> + ExporterMod:update(ConfigOld, ConfigNew); +on_exporter_update(ExporterOld, ExporterNew) -> + _ = emqx_maybe:apply(fun stop/1, ExporterOld), + _ = emqx_maybe:apply(fun start/1, ExporterNew), + ok. + +start({ExporterMod, ExporterOpts}) -> + ok = ExporterMod:start(ExporterOpts). + +stop({ExporterMod, ExporterOpts}) -> + ok = ExporterMod:stop(ExporterOpts). + +%%------------------------------------------------------------------------------ +%% Internal functions +%%------------------------------------------------------------------------------ + +exporter(Storage) -> + case maps:get(exporter, Storage) of + #{local := Options} -> + {emqx_ft_storage_exporter_fs, Options}; + #{s3 := Options} -> + {emqx_ft_storage_exporter_s3, Options} + end. + +init_checksum(#{checksum := {Algo, _}}) -> + crypto:hash_init(Algo); +init_checksum(#{}) -> + crypto:hash_init(sha256). + +update_checksum(Ctx, IoData) -> + crypto:hash_update(Ctx, IoData). + +verify_checksum(Ctx, #{checksum := {Algo, Digest} = Checksum}) -> + case crypto:hash_final(Ctx) of + Digest -> + {ok, Checksum}; + Mismatch -> + {error, {checksum, Algo, binary:encode_hex(Mismatch)}} + end; +verify_checksum(Ctx, #{}) -> + Digest = crypto:hash_final(Ctx), + {ok, {sha256, Digest}}. diff --git a/apps/emqx_ft/src/emqx_ft_storage_exporter_fs.erl b/apps/emqx_ft/src/emqx_ft_storage_exporter_fs.erl new file mode 100644 index 000000000..6738d6fef --- /dev/null +++ b/apps/emqx_ft/src/emqx_ft_storage_exporter_fs.erl @@ -0,0 +1,489 @@ +%%-------------------------------------------------------------------- +%% Copyright (c) 2023 EMQ Technologies Co., Ltd. All Rights Reserved. +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. +%%-------------------------------------------------------------------- + +-module(emqx_ft_storage_exporter_fs). + +-include_lib("kernel/include/file.hrl"). +-include_lib("emqx/include/logger.hrl"). + +%% Exporter API +-behaviour(emqx_ft_storage_exporter). + +-export([start_export/3]). +-export([write/2]). +-export([complete/2]). +-export([discard/1]). +-export([list/1]). + +-export([ + start/1, + stop/1, + update/2 +]). + +%% Internal API for RPC +-export([list_local/1]). +-export([list_local/2]). +-export([list_local_transfer/2]). +-export([start_reader/3]). + +-export([list/2]). + +-export_type([export_st/0]). +-export_type([options/0]). + +-type options() :: #{ + root => file:name(), + _ => _ +}. + +-type query() :: emqx_ft_storage:query(cursor()). +-type page(T) :: emqx_ft_storage:page(T, cursor()). +-type cursor() :: iodata(). + +-type transfer() :: emqx_ft:transfer(). +-type filemeta() :: emqx_ft:filemeta(). +-type exportinfo() :: emqx_ft_storage:file_info(). +-type file_error() :: emqx_ft_storage_fs:file_error(). + +-type export_st() :: #{ + path := file:name(), + handle := io:device(), + result := file:name(), + meta := filemeta() +}. + +-type reader() :: pid(). + +-define(TEMPDIR, "tmp"). +-define(MANIFEST, ".MANIFEST.json"). + +%% NOTE +%% Bucketing of resulting files to accomodate the storage backend for considerably +%% large (e.g. > 10s of millions) amount of files. +-define(BUCKET_HASH, sha). + +%% 2 symbols = at most 256 directories on the upper level +-define(BUCKET1_LEN, 2). +%% 2 symbols = at most 256 directories on the second level +-define(BUCKET2_LEN, 2). + +%%-------------------------------------------------------------------- +%% Exporter behaviour +%%-------------------------------------------------------------------- + +-spec start_export(options(), transfer(), filemeta()) -> + {ok, export_st()} | {error, file_error()}. +start_export(Options, Transfer, Filemeta = #{name := Filename}) -> + TempFilepath = mk_temp_absfilepath(Options, Transfer, Filename), + ResultFilepath = mk_absfilepath(Options, Transfer, result, Filename), + _ = filelib:ensure_dir(TempFilepath), + case file:open(TempFilepath, [write, raw, binary]) of + {ok, Handle} -> + {ok, #{ + path => TempFilepath, + handle => Handle, + result => ResultFilepath, + meta => Filemeta + }}; + {error, _} = Error -> + Error + end. + +-spec write(export_st(), iodata()) -> + {ok, export_st()} | {error, file_error()}. +write(ExportSt = #{handle := Handle}, IoData) -> + case file:write(Handle, IoData) of + ok -> + {ok, ExportSt}; + {error, _} = Error -> + _ = discard(ExportSt), + Error + end. + +-spec complete(export_st(), emqx_ft:checksum()) -> + ok | {error, {checksum, _Algo, _Computed}} | {error, file_error()}. +complete( + #{ + path := Filepath, + handle := Handle, + result := ResultFilepath, + meta := FilemetaIn + }, + Checksum +) -> + Filemeta = FilemetaIn#{checksum => Checksum}, + ok = file:close(Handle), + _ = filelib:ensure_dir(ResultFilepath), + _ = file:write_file(mk_manifest_filename(ResultFilepath), encode_filemeta(Filemeta)), + file:rename(Filepath, ResultFilepath). + +-spec discard(export_st()) -> + ok. +discard(#{path := Filepath, handle := Handle}) -> + ok = file:close(Handle), + file:delete(Filepath). + +%%-------------------------------------------------------------------- +%% Exporter behaviour (lifecycle) +%%-------------------------------------------------------------------- + +%% FS Exporter does not have require any stateful entities, +%% so lifecycle callbacks are no-op. + +-spec start(options()) -> ok. +start(_Options) -> ok. + +-spec stop(options()) -> ok. +stop(_Options) -> ok. + +-spec update(options(), options()) -> ok. +update(_OldOptions, _NewOptions) -> ok. + +%%-------------------------------------------------------------------- +%% Internal API +%%-------------------------------------------------------------------- + +-type local_query() :: emqx_ft_storage:query({transfer(), file:name()}). + +-spec list_local_transfer(options(), transfer()) -> + {ok, [exportinfo()]} | {error, file_error()}. +list_local_transfer(Options, Transfer) -> + It = emqx_ft_fs_iterator:new( + mk_absdir(Options, Transfer, result), + [fun filter_manifest/1] + ), + Result = emqx_ft_fs_iterator:fold( + fun + ({leaf, _Path, Fileinfo = #file_info{type = regular}, [Filename | _]}, Acc) -> + RelFilepath = filename:join(mk_result_reldir(Transfer) ++ [Filename]), + Info = mk_exportinfo(Options, Filename, RelFilepath, Transfer, Fileinfo), + [Info | Acc]; + ({node, _Path, {error, Reason}, []}, []) -> + {error, Reason}; + (Entry, Acc) -> + ok = log_invalid_entry(Options, Entry), + Acc + end, + [], + It + ), + case Result of + Infos = [_ | _] -> + {ok, lists:reverse(Infos)}; + [] -> + {error, enoent}; + {error, Reason} -> + {error, Reason} + end. + +-spec list_local(options()) -> + {ok, [exportinfo()]} | {error, file_error()}. +list_local(Options) -> + list_local(Options, #{}). + +-spec list_local(options(), local_query()) -> + {ok, [exportinfo()]} | {error, file_error()}. +list_local(Options, #{transfer := Transfer}) -> + list_local_transfer(Options, Transfer); +list_local(Options, #{} = Query) -> + Root = get_storage_root(Options), + Glob = [ + _Bucket1 = '*', + _Bucket2 = '*', + _Rest = '*', + _ClientId = '*', + _FileId = '*', + fun filter_manifest/1 + ], + It = + case Query of + #{following := Cursor} -> + emqx_ft_fs_iterator:seek(mk_path_seek(Cursor), Root, Glob); + #{} -> + emqx_ft_fs_iterator:new(Root, Glob) + end, + % NOTE + % In the rare case when some transfer contain more than one file, the paging mechanic + % here may skip over some files, when the cursor is transfer-only. + Limit = maps:get(limit, Query, -1), + {Exports, _} = emqx_ft_fs_iterator:fold_n( + fun(Entry, Acc) -> read_exportinfo(Options, Entry, Acc) end, + [], + It, + Limit + ), + {ok, Exports}. + +mk_path_seek(#{transfer := Transfer, name := Filename}) -> + mk_result_reldir(Transfer) ++ [Filename]; +mk_path_seek(#{transfer := Transfer}) -> + % NOTE: Any bitstring is greater than any list. + mk_result_reldir(Transfer) ++ [<<>>]. + +%%-------------------------------------------------------------------- +%% Helpers +%%-------------------------------------------------------------------- + +filter_manifest(?MANIFEST) -> + % Filename equals `?MANIFEST`, there should also be a manifest for it. + false; +filter_manifest(Filename) -> + ?MANIFEST =/= string:find(Filename, ?MANIFEST, trailing). + +read_exportinfo( + Options, + {leaf, RelFilepath, Fileinfo = #file_info{type = regular}, [Filename, FileId, ClientId | _]}, + Acc +) -> + % NOTE + % There might be more than one file for a single transfer (though + % extremely bad luck is needed for that, e.g. concurrent assemblers with + % different filemetas from different nodes). This might be unexpected for a + % client given the current protocol, yet might be helpful in the future. + Transfer = dirnames_to_transfer(ClientId, FileId), + Info = mk_exportinfo(Options, Filename, RelFilepath, Transfer, Fileinfo), + [Info | Acc]; +read_exportinfo(_Options, {node, _Root = "", {error, enoent}, []}, Acc) -> + % NOTE: Root directory does not exist, this is not an error. + Acc; +read_exportinfo(Options, Entry, Acc) -> + ok = log_invalid_entry(Options, Entry), + Acc. + +mk_exportinfo(Options, Filename, RelFilepath, Transfer, Fileinfo) -> + Root = get_storage_root(Options), + try_read_filemeta( + filename:join(Root, mk_manifest_filename(RelFilepath)), + #{ + transfer => Transfer, + name => Filename, + uri => mk_export_uri(RelFilepath), + timestamp => Fileinfo#file_info.mtime, + size => Fileinfo#file_info.size, + path => filename:join(Root, RelFilepath) + } + ). + +try_read_filemeta(Filepath, Info) -> + case emqx_ft_fs_util:read_decode_file(Filepath, fun decode_filemeta/1) of + {ok, Filemeta} -> + Info#{meta => Filemeta}; + {error, Reason} -> + ?SLOG(warning, "filemeta_inaccessible", #{ + path => Filepath, + reason => Reason + }), + Info + end. + +mk_export_uri(RelFilepath) -> + emqx_ft_storage_exporter_fs_api:mk_export_uri(node(), RelFilepath). + +log_invalid_entry(Options, {_Type, RelFilepath, Fileinfo = #file_info{}, _Stack}) -> + ?SLOG(notice, "filesystem_object_unexpected", #{ + relpath => RelFilepath, + fileinfo => Fileinfo, + options => Options + }); +log_invalid_entry(Options, {_Type, RelFilepath, {error, Reason}, _Stack}) -> + ?SLOG(warning, "filesystem_object_inaccessible", #{ + relpath => RelFilepath, + reason => Reason, + options => Options + }). + +-spec start_reader(options(), file:name(), _Caller :: pid()) -> + {ok, reader()} | {error, enoent}. +start_reader(Options, RelFilepath, CallerPid) -> + Root = get_storage_root(Options), + case filelib:safe_relative_path(RelFilepath, Root) of + SafeFilepath when SafeFilepath /= unsafe -> + AbsFilepath = filename:join(Root, SafeFilepath), + emqx_ft_storage_fs_reader:start_supervised(CallerPid, AbsFilepath); + unsafe -> + {error, enoent} + end. + +%% + +-spec list(options(), query()) -> + {ok, page(exportinfo())} | {error, [{node(), _Reason}]}. +list(_Options, Query = #{transfer := _Transfer}) -> + case list(Query) of + #{items := Exports = [_ | _]} -> + {ok, #{items => Exports}}; + #{items := [], errors := NodeErrors} -> + {error, NodeErrors}; + #{items := []} -> + {ok, #{items => []}} + end; +list(_Options, Query) -> + Result = list(Query), + case Result of + #{errors := NodeErrors} -> + ?SLOG(warning, "list_exports_errors", #{ + query => Query, + errors => NodeErrors + }); + #{} -> + ok + end, + case Result of + #{items := Exports, cursor := Cursor} -> + {ok, #{items => lists:reverse(Exports), cursor => encode_cursor(Cursor)}}; + #{items := Exports} -> + {ok, #{items => lists:reverse(Exports)}} + end. + +list(QueryIn) -> + {Nodes, NodeQuery} = decode_query(QueryIn, lists:sort(mria_mnesia:running_nodes())), + list_nodes(NodeQuery, Nodes, #{items => []}). + +list_nodes(Query, Nodes = [Node | Rest], Acc) -> + case emqx_ft_storage_exporter_fs_proto_v1:list_exports([Node], Query) of + [{ok, Result}] -> + list_accumulate(Result, Query, Nodes, Acc); + [Failure] -> + ?SLOG(warning, #{ + msg => "list_remote_exports_failed", + node => Node, + query => Query, + failure => Failure + }), + list_next(Query, Rest, Acc) + end; +list_nodes(_Query, [], Acc) -> + Acc. + +list_accumulate({ok, Exports}, Query, [Node | Rest], Acc = #{items := EAcc}) -> + NExports = length(Exports), + AccNext = Acc#{items := Exports ++ EAcc}, + case Query of + #{limit := Limit} when NExports < Limit -> + list_next(Query#{limit => Limit - NExports}, Rest, AccNext); + #{limit := _} -> + AccNext#{cursor => mk_cursor(Node, Exports)}; + #{} -> + list_next(Query, Rest, AccNext) + end; +list_accumulate({error, Reason}, Query, [Node | Rest], Acc) -> + EAcc = maps:get(errors, Acc, []), + list_next(Query, Rest, Acc#{errors => [{Node, Reason} | EAcc]}). + +list_next(Query, Nodes, Acc) -> + list_nodes(maps:remove(following, Query), Nodes, Acc). + +decode_query(Query = #{following := Cursor}, Nodes) -> + {Node, NodeCursor} = decode_cursor(Cursor), + {skip_query_nodes(Node, Nodes), Query#{following => NodeCursor}}; +decode_query(Query = #{}, Nodes) -> + {Nodes, Query}. + +skip_query_nodes(CNode, Nodes) -> + lists:dropwhile(fun(N) -> N < CNode end, Nodes). + +mk_cursor(Node, [_Last = #{transfer := Transfer, name := Name} | _]) -> + {Node, #{transfer => Transfer, name => Name}}. + +encode_cursor({Node, #{transfer := {ClientId, FileId}, name := Name}}) -> + emqx_utils_json:encode(#{ + <<"n">> => Node, + <<"cid">> => ClientId, + <<"fid">> => FileId, + <<"fn">> => unicode:characters_to_binary(Name) + }). + +decode_cursor(Cursor) -> + try + #{ + <<"n">> := NodeIn, + <<"cid">> := ClientId, + <<"fid">> := FileId, + <<"fn">> := NameIn + } = emqx_utils_json:decode(Cursor), + true = is_binary(ClientId), + true = is_binary(FileId), + Node = binary_to_existing_atom(NodeIn), + Name = unicode:characters_to_list(NameIn), + true = is_list(Name), + {Node, #{transfer => {ClientId, FileId}, name => Name}} + catch + error:{_, invalid_json} -> + error({badarg, cursor}); + error:{badmatch, _} -> + error({badarg, cursor}); + error:badarg -> + error({badarg, cursor}) + end. + +%% + +-define(PRELUDE(Vsn, Meta), [<<"filemeta">>, Vsn, Meta]). + +encode_filemeta(Meta) -> + emqx_utils_json:encode(?PRELUDE(_Vsn = 1, emqx_ft:encode_filemeta(Meta))). + +decode_filemeta(Binary) when is_binary(Binary) -> + ?PRELUDE(_Vsn = 1, Map) = emqx_utils_json:decode(Binary, [return_maps]), + case emqx_ft:decode_filemeta(Map) of + {ok, Meta} -> + Meta; + {error, Reason} -> + error(Reason) + end. + +mk_manifest_filename(Filename) when is_list(Filename) -> + Filename ++ ?MANIFEST; +mk_manifest_filename(Filename) when is_binary(Filename) -> + <>. + +mk_temp_absfilepath(Options, Transfer, Filename) -> + Unique = erlang:unique_integer([positive]), + TempFilename = integer_to_list(Unique) ++ "." ++ Filename, + filename:join(mk_absdir(Options, Transfer, temporary), TempFilename). + +mk_absdir(Options, _Transfer, temporary) -> + filename:join([get_storage_root(Options), ?TEMPDIR]); +mk_absdir(Options, Transfer, result) -> + filename:join([get_storage_root(Options) | mk_result_reldir(Transfer)]). + +mk_absfilepath(Options, Transfer, What, Filename) -> + filename:join(mk_absdir(Options, Transfer, What), Filename). + +mk_result_reldir(Transfer = {ClientId, FileId}) -> + Hash = mk_transfer_hash(Transfer), + << + Bucket1:?BUCKET1_LEN/binary, + Bucket2:?BUCKET2_LEN/binary, + BucketRest/binary + >> = binary:encode_hex(Hash), + [ + binary_to_list(Bucket1), + binary_to_list(Bucket2), + binary_to_list(BucketRest), + emqx_ft_fs_util:escape_filename(ClientId), + emqx_ft_fs_util:escape_filename(FileId) + ]. + +dirnames_to_transfer(ClientId, FileId) -> + {emqx_ft_fs_util:unescape_filename(ClientId), emqx_ft_fs_util:unescape_filename(FileId)}. + +mk_transfer_hash(Transfer) -> + crypto:hash(?BUCKET_HASH, term_to_binary(Transfer)). + +get_storage_root(Options) -> + maps:get(root, Options, filename:join([emqx:data_dir(), file_transfer, exports])). diff --git a/apps/emqx_ft/src/emqx_ft_storage_exporter_fs_api.erl b/apps/emqx_ft/src/emqx_ft_storage_exporter_fs_api.erl new file mode 100644 index 000000000..abb774f82 --- /dev/null +++ b/apps/emqx_ft/src/emqx_ft_storage_exporter_fs_api.erl @@ -0,0 +1,182 @@ +%%-------------------------------------------------------------------- +%% Copyright (c) 2023 EMQ Technologies Co., Ltd. All Rights Reserved. +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. +%%-------------------------------------------------------------------- + +-module(emqx_ft_storage_exporter_fs_api). + +-behaviour(minirest_api). + +-include_lib("typerefl/include/types.hrl"). +-include_lib("hocon/include/hoconsc.hrl"). +-include_lib("emqx/include/logger.hrl"). + +%% Swagger specs from hocon schema +-export([ + api_spec/0, + paths/0, + schema/1, + namespace/0 +]). + +-export([ + fields/1, + roots/0 +]). + +%% API callbacks +-export([ + '/file_transfer/file'/2 +]). + +-export([mk_export_uri/2]). + +%% + +namespace() -> "file_transfer". + +api_spec() -> + emqx_dashboard_swagger:spec(?MODULE, #{ + check_schema => true, filter => fun emqx_ft_api:check_ft_enabled/2 + }). + +paths() -> + [ + "/file_transfer/file" + ]. + +schema("/file_transfer/file") -> + #{ + 'operationId' => '/file_transfer/file', + get => #{ + tags => [<<"file_transfer">>], + summary => <<"Download a particular file">>, + description => ?DESC("file_get"), + parameters => [ + hoconsc:ref(file_node), + hoconsc:ref(file_ref) + ], + responses => #{ + 200 => <<"Operation success">>, + 404 => emqx_dashboard_swagger:error_codes(['NOT_FOUND'], <<"Not found">>), + 503 => emqx_dashboard_swagger:error_codes( + ['SERVICE_UNAVAILABLE'], <<"Service unavailable">> + ) + } + } + }. + +roots() -> + [ + file_node, + file_ref + ]. + +-spec fields(hocon_schema:name()) -> hocon_schema:fields(). +fields(file_ref) -> + [ + {fileref, + hoconsc:mk(binary(), #{ + in => query, + desc => <<"File reference">>, + example => <<"file1">>, + required => true + })} + ]; +fields(file_node) -> + [ + {node, + hoconsc:mk(binary(), #{ + in => query, + desc => <<"Node under which the file is located">>, + example => atom_to_list(node()), + required => true + })} + ]. + +'/file_transfer/file'(get, #{query_string := Query}) -> + try + Node = parse_node(maps:get(<<"node">>, Query)), + Filepath = parse_filepath(maps:get(<<"fileref">>, Query)), + case emqx_ft_storage_exporter_fs_proto_v1:read_export_file(Node, Filepath, self()) of + {ok, ReaderPid} -> + FileData = emqx_ft_storage_fs_reader:table(ReaderPid), + {200, + #{ + <<"content-type">> => <<"application/data">>, + <<"content-disposition">> => <<"attachment">> + }, + FileData}; + {error, enoent} -> + {404, error_msg('NOT_FOUND', <<"Not found">>)}; + {error, Error} -> + ?SLOG(warning, #{msg => "get_ready_transfer_fail", error => Error}), + {503, error_msg('SERVICE_UNAVAILABLE', <<"Service unavailable">>)} + end + catch + throw:{invalid, Param} -> + {404, + error_msg( + 'NOT_FOUND', + iolist_to_binary(["Invalid query parameter: ", Param]) + )}; + error:{erpc, noconnection} -> + {503, error_msg('SERVICE_UNAVAILABLE', <<"Service unavailable">>)} + end. + +error_msg(Code, Msg) -> + #{code => Code, message => emqx_utils:readable_error_msg(Msg)}. + +-spec mk_export_uri(node(), file:name()) -> + uri_string:uri_string(). +mk_export_uri(Node, Filepath) -> + emqx_dashboard_swagger:relative_uri([ + "/file_transfer/file?", + uri_string:compose_query([ + {"node", atom_to_list(Node)}, + {"fileref", Filepath} + ]) + ]). + +%% + +parse_node(NodeBin) -> + case emqx_utils:safe_to_existing_atom(NodeBin) of + {ok, Node} -> + Node; + {error, _} -> + throw({invalid, NodeBin}) + end. + +parse_filepath(PathBin) -> + case filename:pathtype(PathBin) of + relative -> + ok; + absolute -> + throw({invalid, PathBin}) + end, + PathComponents = filename:split(PathBin), + case lists:any(fun is_special_component/1, PathComponents) of + false -> + filename:join(PathComponents); + true -> + throw({invalid, PathBin}) + end. + +is_special_component(<<".", _/binary>>) -> + true; +is_special_component([$. | _]) -> + true; +is_special_component(_) -> + false. diff --git a/apps/emqx_ft/src/emqx_ft_storage_exporter_fs_proxy.erl b/apps/emqx_ft/src/emqx_ft_storage_exporter_fs_proxy.erl new file mode 100644 index 000000000..50e02db6f --- /dev/null +++ b/apps/emqx_ft/src/emqx_ft_storage_exporter_fs_proxy.erl @@ -0,0 +1,50 @@ +%%-------------------------------------------------------------------- +%% Copyright (c) 2023 EMQ Technologies Co., Ltd. All Rights Reserved. +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. +%%-------------------------------------------------------------------- + +%% This methods are called via rpc by `emqx_ft_storage_exporter_fs` +%% They populate the call with actual storage which may be configured differently +%% on a concrete node. + +-module(emqx_ft_storage_exporter_fs_proxy). + +-export([ + list_exports_local/1, + read_export_file_local/2 +]). + +list_exports_local(Query) -> + emqx_ft_storage:with_storage_type(local, fun(Storage) -> + case emqx_ft_storage_exporter:exporter(Storage) of + {emqx_ft_storage_exporter_fs, Options} -> + emqx_ft_storage_exporter_fs:list_local(Options, Query) + % NOTE + % This case clause is currently deemed unreachable by dialyzer. + % InvalidExporter -> + % {error, {invalid_exporter, InvalidExporter}} + end + end). + +read_export_file_local(Filepath, CallerPid) -> + emqx_ft_storage:with_storage_type(local, fun(Storage) -> + case emqx_ft_storage_exporter:exporter(Storage) of + {emqx_ft_storage_exporter_fs, Options} -> + emqx_ft_storage_exporter_fs:start_reader(Options, Filepath, CallerPid) + % NOTE + % This case clause is currently deemed unreachable by dialyzer. + % InvalidExporter -> + % {error, {invalid_exporter, InvalidExporter}} + end + end). diff --git a/apps/emqx_ft/src/emqx_ft_storage_exporter_s3.erl b/apps/emqx_ft/src/emqx_ft_storage_exporter_s3.erl new file mode 100644 index 000000000..b9f07d5c0 --- /dev/null +++ b/apps/emqx_ft/src/emqx_ft_storage_exporter_s3.erl @@ -0,0 +1,251 @@ +%%-------------------------------------------------------------------- +%% Copyright (c) 2023 EMQ Technologies Co., Ltd. All Rights Reserved. +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. +%%-------------------------------------------------------------------- + +-module(emqx_ft_storage_exporter_s3). + +-include_lib("emqx/include/logger.hrl"). + +%% Exporter API +-export([start_export/3]). +-export([write/2]). +-export([complete/2]). +-export([discard/1]). +-export([list/2]). + +-export([ + start/1, + stop/1, + update/2 +]). + +-type options() :: emqx_s3:profile_config(). +-type transfer() :: emqx_ft:transfer(). +-type filemeta() :: emqx_ft:filemeta(). +-type exportinfo() :: #{ + transfer := transfer(), + name := file:name(), + uri := uri_string:uri_string(), + timestamp := emqx_datetime:epoch_second(), + size := _Bytes :: non_neg_integer(), + filemeta => filemeta() +}. + +-type query() :: emqx_ft_storage:query(cursor()). +-type page(T) :: emqx_ft_storage:page(T, cursor()). +-type cursor() :: iodata(). + +-type export_st() :: #{ + pid := pid(), + filemeta := filemeta(), + transfer := transfer() +}. + +-define(S3_PROFILE_ID, ?MODULE). +-define(FILEMETA_VSN, <<"1">>). +-define(S3_LIST_LIMIT, 500). + +%%-------------------------------------------------------------------- +%% Exporter behaviour +%%-------------------------------------------------------------------- + +-spec start_export(options(), transfer(), filemeta()) -> + {ok, export_st()} | {error, term()}. +start_export(_Options, Transfer, Filemeta) -> + Options = #{ + key => s3_key(Transfer, Filemeta), + headers => s3_headers(Transfer, Filemeta) + }, + case emqx_s3:start_uploader(?S3_PROFILE_ID, Options) of + {ok, Pid} -> + true = erlang:link(Pid), + {ok, #{filemeta => Filemeta, pid => Pid}}; + {error, _Reason} = Error -> + Error + end. + +-spec write(export_st(), iodata()) -> + {ok, export_st()} | {error, term()}. +write(#{pid := Pid} = ExportSt, IoData) -> + case emqx_s3_uploader:write(Pid, IoData) of + ok -> + {ok, ExportSt}; + {error, _Reason} = Error -> + Error + end. + +-spec complete(export_st(), emqx_ft:checksum()) -> + ok | {error, term()}. +complete(#{pid := Pid} = _ExportSt, _Checksum) -> + emqx_s3_uploader:complete(Pid). + +-spec discard(export_st()) -> + ok. +discard(#{pid := Pid} = _ExportSt) -> + emqx_s3_uploader:abort(Pid). + +-spec list(options(), query()) -> + {ok, page(exportinfo())} | {error, term()}. +list(Options, Query) -> + emqx_s3:with_client(?S3_PROFILE_ID, fun(Client) -> list(Client, Options, Query) end). + +%%-------------------------------------------------------------------- +%% Exporter behaviour (lifecycle) +%%-------------------------------------------------------------------- + +-spec start(options()) -> ok | {error, term()}. +start(Options) -> + emqx_s3:start_profile(?S3_PROFILE_ID, Options). + +-spec stop(options()) -> ok. +stop(_Options) -> + ok = emqx_s3:stop_profile(?S3_PROFILE_ID). + +-spec update(options(), options()) -> ok. +update(_OldOptions, NewOptions) -> + emqx_s3:update_profile(?S3_PROFILE_ID, NewOptions). + +%%-------------------------------------------------------------------- +%% Internal functions +%% ------------------------------------------------------------------- + +s3_key(Transfer, #{name := Filename}) -> + s3_prefix(Transfer) ++ "/" ++ Filename. + +s3_prefix({ClientId, FileId} = _Transfer) -> + emqx_ft_fs_util:escape_filename(ClientId) ++ "/" ++ emqx_ft_fs_util:escape_filename(FileId). + +s3_headers({ClientId, FileId}, Filemeta) -> + #{ + %% The ClientID MUST be a UTF-8 Encoded String + <<"x-amz-meta-clientid">> => ClientId, + %% It [Topic Name] MUST be a UTF-8 Encoded String + <<"x-amz-meta-fileid">> => FileId, + <<"x-amz-meta-filemeta">> => s3_header_filemeta(Filemeta), + <<"x-amz-meta-filemeta-vsn">> => ?FILEMETA_VSN + }. + +s3_header_filemeta(Filemeta) -> + emqx_utils_json:encode(emqx_ft:encode_filemeta(Filemeta), [force_utf8, uescape]). + +list(Client, _Options, #{transfer := Transfer}) -> + case list_key_info(Client, [{prefix, s3_prefix(Transfer)}, {max_keys, ?S3_LIST_LIMIT}]) of + {ok, {Exports, _Marker}} -> + {ok, #{items => Exports}}; + {error, _Reason} = Error -> + Error + end; +list(Client, _Options, Query) -> + Limit = maps:get(limit, Query, undefined), + Marker = emqx_maybe:apply(fun decode_cursor/1, maps:get(following, Query, undefined)), + case list_pages(Client, Marker, Limit, []) of + {ok, {Exports, undefined}} -> + {ok, #{items => Exports}}; + {ok, {Exports, NextMarker}} -> + {ok, #{items => Exports, cursor => encode_cursor(NextMarker)}}; + {error, _Reason} = Error -> + Error + end. + +list_pages(Client, Marker, Limit, Acc) -> + MaxKeys = min(?S3_LIST_LIMIT, Limit), + ListOptions = [{marker, Marker} || Marker =/= undefined], + case list_key_info(Client, [{max_keys, MaxKeys} | ListOptions]) of + {ok, {Exports, NextMarker}} -> + list_accumulate(Client, Limit, NextMarker, [Exports | Acc]); + {error, _Reason} = Error -> + Error + end. + +list_accumulate(_Client, _Limit, undefined, Acc) -> + {ok, {flatten_pages(Acc), undefined}}; +list_accumulate(Client, undefined, Marker, Acc) -> + list_pages(Client, Marker, undefined, Acc); +list_accumulate(Client, Limit, Marker, Acc = [Exports | _]) -> + case Limit - length(Exports) of + 0 -> + {ok, {flatten_pages(Acc), Marker}}; + Left -> + list_pages(Client, Marker, Left, Acc) + end. + +flatten_pages(Pages) -> + lists:append(lists:reverse(Pages)). + +list_key_info(Client, ListOptions) -> + case emqx_s3_client:list(Client, ListOptions) of + {ok, Result} -> + ?SLOG(debug, #{msg => "list_key_info", result => Result}), + KeyInfos = proplists:get_value(contents, Result, []), + Exports = lists:filtermap( + fun(KeyInfo) -> key_info_to_exportinfo(Client, KeyInfo) end, KeyInfos + ), + Marker = + case proplists:get_value(is_truncated, Result, false) of + true -> + next_marker(KeyInfos); + false -> + undefined + end, + {ok, {Exports, Marker}}; + {error, _Reason} = Error -> + Error + end. + +encode_cursor(Key) -> + unicode:characters_to_binary(Key). + +decode_cursor(Cursor) -> + case unicode:characters_to_list(Cursor) of + Key when is_list(Key) -> + Key; + _ -> + error({badarg, cursor}) + end. + +next_marker(KeyInfos) -> + proplists:get_value(key, lists:last(KeyInfos)). + +key_info_to_exportinfo(Client, KeyInfo) -> + Key = proplists:get_value(key, KeyInfo), + case parse_transfer_and_name(Key) of + {ok, {Transfer, Name}} -> + {true, #{ + transfer => Transfer, + name => unicode:characters_to_binary(Name), + uri => emqx_s3_client:uri(Client, Key), + timestamp => datetime_to_epoch_second(proplists:get_value(last_modified, KeyInfo)), + size => proplists:get_value(size, KeyInfo) + }}; + {error, _Reason} -> + false + end. + +-define(EPOCH_START, 62167219200). + +datetime_to_epoch_second(DateTime) -> + calendar:datetime_to_gregorian_seconds(DateTime) - ?EPOCH_START. + +parse_transfer_and_name(Key) -> + case string:split(Key, "/", all) of + [ClientId, FileId, Name] -> + Transfer = { + emqx_ft_fs_util:unescape_filename(ClientId), + emqx_ft_fs_util:unescape_filename(FileId) + }, + {ok, {Transfer, Name}}; + _ -> + {error, invalid_key} + end. diff --git a/apps/emqx_ft/src/emqx_ft_storage_fs.erl b/apps/emqx_ft/src/emqx_ft_storage_fs.erl new file mode 100644 index 000000000..010d004a1 --- /dev/null +++ b/apps/emqx_ft/src/emqx_ft_storage_fs.erl @@ -0,0 +1,506 @@ +%%-------------------------------------------------------------------- +%% Copyright (c) 2020-2023 EMQ Technologies Co., Ltd. All Rights Reserved. +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. +%%-------------------------------------------------------------------- + +%% Filesystem storage backend +%% +%% NOTE +%% If you plan to change storage layout please consult `emqx_ft_storage_fs_gc` +%% to see how much it would break or impair GC. + +-module(emqx_ft_storage_fs). + +-behaviour(emqx_ft_storage). + +-include_lib("emqx/include/logger.hrl"). +-include_lib("snabbkaffe/include/trace.hrl"). + +-export([child_spec/1]). + +% Segments-related API +-export([store_filemeta/3]). +-export([store_segment/3]). +-export([read_filemeta/2]). +-export([list/3]). +-export([pread/5]). +-export([lookup_local_assembler/1]). +-export([assemble/3]). + +-export([transfers/1]). + +% GC API +% TODO: This is quickly becomes hairy. +-export([get_root/1]). +-export([get_subdir/2]). +-export([get_subdir/3]). + +-export([files/2]). + +-export([on_config_update/2]). +-export([start/1]). +-export([stop/1]). + +-export_type([storage/0]). +-export_type([filefrag/1]). +-export_type([filefrag/0]). +-export_type([transferinfo/0]). + +-export_type([file_error/0]). + +-type transfer() :: emqx_ft:transfer(). +-type offset() :: emqx_ft:offset(). +-type filemeta() :: emqx_ft:filemeta(). +-type segment() :: emqx_ft:segment(). + +-type segmentinfo() :: #{ + offset := offset(), + size := _Bytes :: non_neg_integer() +}. + +-type transferinfo() :: #{ + filemeta => filemeta() +}. + +% TODO naming +-type filefrag(T) :: #{ + path := file:name(), + timestamp := emqx_datetime:epoch_second(), + size := _Bytes :: non_neg_integer(), + fragment := T +}. + +-type filefrag() :: filefrag( + {filemeta, filemeta()} + | {segment, segmentinfo()} +). + +-define(FRAGDIR, frags). +-define(TEMPDIR, tmp). +-define(MANIFEST, "MANIFEST.json"). +-define(SEGMENT, "SEG"). + +-type segments() :: #{ + root := file:name(), + gc := #{ + interval := non_neg_integer(), + maximum_segments_ttl := non_neg_integer(), + minimum_segments_ttl := non_neg_integer() + } +}. + +-type storage() :: #{ + type := 'local', + segments := segments(), + exporter := emqx_ft_storage_exporter:exporter() +}. + +-type file_error() :: + file:posix() + %% Filename is incompatible with the backing filesystem. + | badarg + %% System limit (e.g. number of ports) reached. + | system_limit. + +%% Related resources childspecs +-spec child_spec(storage()) -> + [supervisor:child_spec()]. +child_spec(Storage) -> + [ + #{ + id => emqx_ft_storage_fs_gc, + start => {emqx_ft_storage_fs_gc, start_link, [Storage]}, + restart => permanent + } + ]. + +%% Store manifest in the backing filesystem. +%% Atomic operation. +-spec store_filemeta(storage(), transfer(), filemeta()) -> + % Quota? Some lower level errors? + ok | {error, conflict} | {error, file_error()}. +store_filemeta(Storage, Transfer, Meta) -> + Filepath = mk_filepath(Storage, Transfer, get_subdirs_for(fragment), ?MANIFEST), + case read_file(Filepath, fun decode_filemeta/1) of + {ok, Meta} -> + _ = touch_file(Filepath), + ok; + {ok, Conflict} -> + ?SLOG(warning, #{ + msg => "filemeta_conflict", transfer => Transfer, new => Meta, old => Conflict + }), + % TODO + % We won't see conflicts in case of concurrent `store_filemeta` + % requests. It's rather odd scenario so it's fine not to worry + % about it too much now. + {error, conflict}; + {error, Reason} when Reason =:= notfound; Reason =:= corrupted; Reason =:= enoent -> + write_file_atomic(Storage, Transfer, Filepath, encode_filemeta(Meta)); + {error, _} = Error -> + Error + end. + +%% Store a segment in the backing filesystem. +%% Atomic operation. +-spec store_segment(storage(), transfer(), segment()) -> + % Where is the checksum gets verified? Upper level probably. + % Quota? Some lower level errors? + ok | {error, file_error()}. +store_segment(Storage, Transfer, Segment = {_Offset, Content}) -> + Filename = mk_segment_filename(Segment), + Filepath = mk_filepath(Storage, Transfer, get_subdirs_for(fragment), Filename), + write_file_atomic(Storage, Transfer, Filepath, Content). + +-spec read_filemeta(storage(), transfer()) -> + {ok, filemeta()} | {error, corrupted} | {error, file_error()}. +read_filemeta(Storage, Transfer) -> + Filepath = mk_filepath(Storage, Transfer, get_subdirs_for(fragment), ?MANIFEST), + read_file(Filepath, fun decode_filemeta/1). + +-spec list(storage(), transfer(), _What :: fragment) -> + % Some lower level errors? {error, notfound}? + % Result will contain zero or only one filemeta. + {ok, [filefrag({filemeta, filemeta()} | {segment, segmentinfo()})]} + | {error, file_error()}. +list(Storage, Transfer, What = fragment) -> + Dirname = mk_filedir(Storage, Transfer, get_subdirs_for(What)), + case file:list_dir(Dirname) of + {ok, Filenames} -> + % TODO + % In case of `What = result` there might be more than one file (though + % extremely bad luck is needed for that, e.g. concurrent assemblers with + % different filemetas from different nodes). This might be unexpected for a + % client given the current protocol, yet might be helpful in the future. + {ok, filtermap_files(fun mk_filefrag/2, Dirname, Filenames)}; + {error, enoent} -> + {ok, []}; + {error, _} = Error -> + Error + end. + +-spec pread(storage(), transfer(), filefrag(), offset(), _Size :: non_neg_integer()) -> + {ok, _Content :: iodata()} | {error, eof} | {error, file_error()}. +pread(_Storage, _Transfer, Frag, Offset, Size) -> + Filepath = maps:get(path, Frag), + case file:open(Filepath, [read, raw, binary]) of + {ok, IoDevice} -> + % NOTE + % Reading empty file is always `eof`. + Read = file:pread(IoDevice, Offset, Size), + ok = file:close(IoDevice), + case Read of + {ok, Content} -> + {ok, Content}; + eof -> + {error, eof}; + {error, Reason} -> + {error, Reason} + end; + {error, Reason} -> + {error, Reason} + end. + +-spec assemble(storage(), transfer(), emqx_ft:bytes()) -> + {async, _Assembler :: pid()} | ok | {error, _TODO}. +assemble(Storage, Transfer, Size) -> + LookupSources = [ + fun() -> lookup_local_assembler(Transfer) end, + fun() -> lookup_remote_assembler(Transfer) end, + fun() -> check_if_already_exported(Storage, Transfer) end, + fun() -> ensure_local_assembler(Storage, Transfer, Size) end + ], + lookup_assembler(LookupSources). + +%% + +files(Storage, Query) -> + emqx_ft_storage_exporter:list(Storage, Query). + +%% + +on_config_update(StorageOld, StorageNew) -> + % NOTE: this will reset GC timer, frequent changes would postpone GC indefinitely + ok = emqx_ft_storage_fs_gc:reset(StorageNew), + emqx_ft_storage_exporter:on_config_update(StorageOld, StorageNew). + +start(Storage) -> + ok = lists:foreach( + fun(ChildSpec) -> + {ok, _Child} = supervisor:start_child(emqx_ft_sup, ChildSpec) + end, + child_spec(Storage) + ), + ok = emqx_ft_storage_exporter:on_config_update(undefined, Storage), + ok. + +stop(Storage) -> + ok = emqx_ft_storage_exporter:on_config_update(Storage, undefined), + ok = lists:foreach( + fun(#{id := ChildId}) -> + _ = supervisor:terminate_child(emqx_ft_sup, ChildId), + ok = supervisor:delete_child(emqx_ft_sup, ChildId) + end, + child_spec(Storage) + ), + ok. + +%% + +lookup_assembler([LastSource]) -> + LastSource(); +lookup_assembler([Source | Sources]) -> + case Source() of + {error, not_found} -> lookup_assembler(Sources); + Result -> Result + end. + +check_if_already_exported(Storage, Transfer) -> + case files(Storage, #{transfer => Transfer}) of + {ok, #{items := [_ | _]}} -> ok; + _ -> {error, not_found} + end. + +lookup_local_assembler(Transfer) -> + case emqx_ft_assembler:where(Transfer) of + Pid when is_pid(Pid) -> {async, Pid}; + _ -> {error, not_found} + end. + +lookup_remote_assembler(Transfer) -> + Nodes = emqx:running_nodes() -- [node()], + Assemblers = lists:flatmap( + fun + ({ok, {async, Pid}}) -> [Pid]; + (_) -> [] + end, + emqx_ft_storage_fs_proto_v1:list_assemblers(Nodes, Transfer) + ), + case Assemblers of + [Pid | _] -> {async, Pid}; + _ -> {error, not_found} + end. + +ensure_local_assembler(Storage, Transfer, Size) -> + {ok, Pid} = emqx_ft_assembler_sup:ensure_child(Storage, Transfer, Size), + {async, Pid}. + +-spec transfers(storage()) -> + {ok, #{transfer() => transferinfo()}}. +transfers(Storage) -> + % TODO `Continuation` + % There might be millions of transfers on the node, we need a protocol and + % storage schema to iterate through them effectively. + ClientIds = try_list_dir(get_root(Storage)), + {ok, + lists:foldl( + fun(ClientId, Acc) -> transfers(Storage, ClientId, Acc) end, + #{}, + ClientIds + )}. + +transfers(Storage, ClientId, AccIn) -> + Dirname = filename:join(get_root(Storage), ClientId), + case file:list_dir(Dirname) of + {ok, FileIds} -> + lists:foldl( + fun(FileId, Acc) -> + Transfer = dirnames_to_transfer(ClientId, FileId), + read_transferinfo(Storage, Transfer, Acc) + end, + AccIn, + FileIds + ); + {error, _Reason} -> + ?tp(warning, "list_dir_failed", #{ + storage => Storage, + directory => Dirname + }), + AccIn + end. + +read_transferinfo(Storage, Transfer, Acc) -> + case read_filemeta(Storage, Transfer) of + {ok, Filemeta} -> + Acc#{Transfer => #{filemeta => Filemeta}}; + {error, enoent} -> + Acc#{Transfer => #{}}; + {error, Reason} -> + ?tp(warning, "read_transferinfo_failed", #{ + storage => Storage, + transfer => Transfer, + reason => Reason + }), + Acc + end. + +-spec get_root(storage()) -> + file:name(). +get_root(Storage) -> + case emqx_utils_maps:deep_find([segments, root], Storage) of + {ok, Root} -> + Root; + {not_found, _, _} -> + filename:join([emqx:data_dir(), file_transfer, segments]) + end. + +-spec get_subdir(storage(), transfer()) -> + file:name(). +get_subdir(Storage, Transfer) -> + mk_filedir(Storage, Transfer, []). + +-spec get_subdir(storage(), transfer(), fragment | temporary) -> + file:name(). +get_subdir(Storage, Transfer, What) -> + mk_filedir(Storage, Transfer, get_subdirs_for(What)). + +get_subdirs_for(fragment) -> + [?FRAGDIR]; +get_subdirs_for(temporary) -> + [?TEMPDIR]. + +-define(PRELUDE(Vsn, Meta), [<<"filemeta">>, Vsn, Meta]). + +encode_filemeta(Meta) -> + emqx_utils_json:encode(?PRELUDE(_Vsn = 1, emqx_ft:encode_filemeta(Meta))). + +decode_filemeta(Binary) when is_binary(Binary) -> + ?PRELUDE(_Vsn = 1, Map) = emqx_utils_json:decode(Binary, [return_maps]), + case emqx_ft:decode_filemeta(Map) of + {ok, Meta} -> + Meta; + {error, Reason} -> + error(Reason) + end. + +mk_segment_filename({Offset, Content}) -> + lists:concat([?SEGMENT, ".", Offset, ".", byte_size(Content)]). + +break_segment_filename(Filename) -> + Regex = "^" ?SEGMENT "[.]([0-9]+)[.]([0-9]+)$", + Result = re:run(Filename, Regex, [{capture, all_but_first, list}]), + case Result of + {match, [Offset, Size]} -> + {ok, #{offset => list_to_integer(Offset), size => list_to_integer(Size)}}; + nomatch -> + {error, invalid} + end. + +mk_filedir(Storage, {ClientId, FileId}, SubDirs) -> + filename:join([ + get_root(Storage), + emqx_ft_fs_util:escape_filename(ClientId), + emqx_ft_fs_util:escape_filename(FileId) + | SubDirs + ]). + +dirnames_to_transfer(ClientId, FileId) -> + {emqx_ft_fs_util:unescape_filename(ClientId), emqx_ft_fs_util:unescape_filename(FileId)}. + +mk_filepath(Storage, Transfer, SubDirs, Filename) -> + filename:join(mk_filedir(Storage, Transfer, SubDirs), Filename). + +try_list_dir(Dirname) -> + case file:list_dir(Dirname) of + {ok, List} -> List; + {error, _} -> [] + end. + +-include_lib("kernel/include/file.hrl"). + +read_file(Filepath, DecodeFun) -> + emqx_ft_fs_util:read_decode_file(Filepath, DecodeFun). + +write_file_atomic(Storage, Transfer, Filepath, Content) when is_binary(Content) -> + TempFilepath = mk_temp_filepath(Storage, Transfer, filename:basename(Filepath)), + Result = emqx_utils:pipeline( + [ + fun filelib:ensure_dir/1, + fun write_contents/2, + fun(_) -> mv_temp_file(TempFilepath, Filepath) end + ], + TempFilepath, + Content + ), + case Result of + {ok, _, _} -> + _ = file:delete(TempFilepath), + ok; + {error, Reason, _} -> + {error, Reason} + end. + +mk_temp_filepath(Storage, Transfer, Filename) -> + Unique = erlang:unique_integer([positive]), + filename:join(get_subdir(Storage, Transfer, temporary), mk_filename([Unique, ".", Filename])). + +mk_filename(Comps) -> + lists:append(lists:map(fun mk_filename_component/1, Comps)). + +mk_filename_component(I) when is_integer(I) -> integer_to_list(I); +mk_filename_component(A) when is_atom(A) -> atom_to_list(A); +mk_filename_component(B) when is_binary(B) -> unicode:characters_to_list(B); +mk_filename_component(S) when is_list(S) -> S. + +write_contents(Filepath, Content) -> + file:write_file(Filepath, Content). + +mv_temp_file(TempFilepath, Filepath) -> + _ = filelib:ensure_dir(Filepath), + file:rename(TempFilepath, Filepath). + +touch_file(Filepath) -> + Now = erlang:localtime(), + file:change_time(Filepath, _Mtime = Now, _Atime = Now). + +filtermap_files(Fun, Dirname, Filenames) -> + lists:filtermap(fun(Filename) -> Fun(Dirname, Filename) end, Filenames). + +mk_filefrag(Dirname, Filename = ?MANIFEST) -> + mk_filefrag(Dirname, Filename, filemeta, fun read_frag_filemeta/2); +mk_filefrag(Dirname, Filename = ?SEGMENT ++ _) -> + mk_filefrag(Dirname, Filename, segment, fun read_frag_segmentinfo/2); +mk_filefrag(_Dirname, _Filename) -> + ?tp(warning, "rogue_file_found", #{ + directory => _Dirname, + filename => _Filename + }), + false. + +mk_filefrag(Dirname, Filename, Tag, Fun) -> + Filepath = filename:join(Dirname, Filename), + % TODO error handling? + {ok, Fileinfo} = file:read_file_info(Filepath), + case Fun(Filename, Filepath) of + {ok, Frag} -> + {true, #{ + path => Filepath, + timestamp => Fileinfo#file_info.mtime, + size => Fileinfo#file_info.size, + fragment => {Tag, Frag} + }}; + {error, _Reason} -> + ?tp(warning, "mk_filefrag_failed", #{ + directory => Dirname, + filename => Filename, + type => Tag, + reason => _Reason + }), + false + end. + +read_frag_filemeta(_Filename, Filepath) -> + read_file(Filepath, fun decode_filemeta/1). + +read_frag_segmentinfo(Filename, _Filepath) -> + break_segment_filename(Filename). diff --git a/apps/emqx_ft/src/emqx_ft_storage_fs_gc.erl b/apps/emqx_ft/src/emqx_ft_storage_fs_gc.erl new file mode 100644 index 000000000..4e9a6d56c --- /dev/null +++ b/apps/emqx_ft/src/emqx_ft_storage_fs_gc.erl @@ -0,0 +1,393 @@ +%%-------------------------------------------------------------------- +%% Copyright (c) 2023 EMQ Technologies Co., Ltd. All Rights Reserved. +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. +%%-------------------------------------------------------------------- + +%% Filesystem storage GC +%% +%% This is conceptually a part of the Filesystem storage backend, even +%% though it's tied to the backend module with somewhat narrow interface. + +-module(emqx_ft_storage_fs_gc). + +-include_lib("emqx_ft/include/emqx_ft_storage_fs.hrl"). +-include_lib("emqx/include/logger.hrl"). +-include_lib("emqx/include/types.hrl"). +-include_lib("kernel/include/file.hrl"). +-include_lib("snabbkaffe/include/trace.hrl"). + +-export([start_link/1]). + +-export([collect/0]). +-export([collect/3]). +-export([reset/0]). +-export([reset/1]). + +-behaviour(gen_server). +-export([init/1]). +-export([handle_call/3]). +-export([handle_cast/2]). +-export([handle_info/2]). + +-record(st, { + next_gc_timer :: maybe(reference()), + last_gc :: maybe(gcstats()) +}). + +-type gcstats() :: #gcstats{}. + +-define(IS_ENABLED(INTERVAL), (is_integer(INTERVAL) andalso INTERVAL > 0)). + +%% + +start_link(Storage) -> + gen_server:start_link(mk_server_ref(global), ?MODULE, Storage, []). + +-spec collect() -> gcstats(). +collect() -> + gen_server:call(mk_server_ref(global), {collect, erlang:system_time()}, infinity). + +-spec reset() -> ok | {error, _}. +reset() -> + emqx_ft_storage:with_storage_type(local, fun reset/1). + +-spec reset(emqx_ft_storage_fs:storage()) -> ok. +reset(Storage) -> + gen_server:cast(mk_server_ref(global), {reset, gc_interval(Storage)}). + +collect(Storage, Transfer, Nodes) -> + gc_enabled(Storage) andalso cast_collect(mk_server_ref(global), Storage, Transfer, Nodes). + +mk_server_ref(Name) -> + % TODO + {via, gproc, {n, l, {?MODULE, Name}}}. + +%% + +init(Storage) -> + St = #st{}, + {ok, start_timer(gc_interval(Storage), St)}. + +handle_call({collect, CalledAt}, _From, St) -> + StNext = maybe_collect_garbage(CalledAt, St), + {reply, StNext#st.last_gc, StNext}; +handle_call(Call, From, St) -> + ?SLOG(error, #{msg => "unexpected_call", call => Call, from => From}), + {noreply, St}. + +handle_cast({collect, Storage, Transfer, [Node | Rest]}, St) -> + ok = do_collect_transfer(Storage, Transfer, Node, St), + case Rest of + [_ | _] -> + cast_collect(self(), Storage, Transfer, Rest); + [] -> + ok + end, + {noreply, St}; +handle_cast({reset, Interval}, St) -> + {noreply, start_timer(Interval, cancel_timer(St))}; +handle_cast(Cast, St) -> + ?SLOG(error, #{msg => "unexpected_cast", cast => Cast}), + {noreply, St}. + +handle_info({timeout, TRef, collect}, St = #st{next_gc_timer = TRef}) -> + StNext = do_collect_garbage(St), + {noreply, start_timer(StNext#st{next_gc_timer = undefined})}. + +do_collect_transfer(Storage, Transfer, Node, St = #st{}) when Node == node() -> + Stats = try_collect_transfer(Storage, Transfer, complete, init_gcstats()), + ok = maybe_report(Stats, St), + ok; +do_collect_transfer(_Storage, _Transfer, _Node, _St = #st{}) -> + % TODO + ok. + +cast_collect(Ref, Storage, Transfer, Nodes) -> + gen_server:cast(Ref, {collect, Storage, Transfer, Nodes}). + +maybe_collect_garbage(_CalledAt, St = #st{last_gc = undefined}) -> + do_collect_garbage(St); +maybe_collect_garbage(CalledAt, St = #st{last_gc = #gcstats{finished_at = FinishedAt}}) -> + case FinishedAt > CalledAt of + true -> + St; + false -> + start_timer(do_collect_garbage(cancel_timer(St))) + end. + +do_collect_garbage(St = #st{}) -> + emqx_ft_storage:with_storage_type(local, fun(Storage) -> + Stats = collect_garbage(Storage), + ok = maybe_report(Stats, Storage), + St#st{last_gc = Stats} + end). + +maybe_report(#gcstats{errors = Errors}, Storage) when map_size(Errors) > 0 -> + ?tp(warning, "garbage_collection_errors", #{errors => Errors, storage => Storage}); +maybe_report(#gcstats{} = _Stats, _Storage) -> + ?tp(garbage_collection, #{stats => _Stats, storage => _Storage}). + +start_timer(St) -> + Interval = emqx_ft_storage:with_storage_type(local, fun gc_interval/1), + start_timer(Interval, St). + +start_timer(Interval, St = #st{next_gc_timer = undefined}) when ?IS_ENABLED(Interval) -> + St#st{next_gc_timer = emqx_utils:start_timer(Interval, collect)}; +start_timer(Interval, St) -> + ?SLOG(warning, #{msg => "periodic_gc_disabled", interval => Interval}), + St. + +cancel_timer(St = #st{next_gc_timer = undefined}) -> + St; +cancel_timer(St = #st{next_gc_timer = TRef}) -> + ok = emqx_utils:cancel_timer(TRef), + St#st{next_gc_timer = undefined}. + +gc_enabled(Storage) -> + ?IS_ENABLED(gc_interval(Storage)). + +gc_interval(Storage) -> + emqx_ft_conf:gc_interval(Storage). + +%% + +collect_garbage(Storage) -> + Stats = init_gcstats(), + {ok, Transfers} = emqx_ft_storage_fs:transfers(Storage), + collect_garbage(Storage, Transfers, Stats). + +collect_garbage(Storage, Transfers, Stats) -> + finish_gcstats( + maps:fold( + fun(Transfer, TransferInfo, StatsAcc) -> + % TODO: throttling? + try_collect_transfer(Storage, Transfer, TransferInfo, StatsAcc) + end, + Stats, + Transfers + ) + ). + +try_collect_transfer(Storage, Transfer, TransferInfo = #{}, Stats) -> + % File transfer might still be incomplete. + % Any outdated fragments and temporary files should be collectable. As a kind of + % heuristic we only delete transfer directory itself only if it is also outdated + % _and was empty at the start of GC_, as a precaution against races between + % writers and GCs. + Cutoff = + case get_segments_ttl(Storage, TransferInfo) of + TTL when is_integer(TTL) -> + erlang:system_time(second) - TTL; + undefined -> + 0 + end, + {FragCleaned, Stats1} = collect_outdated_fragments(Storage, Transfer, Cutoff, Stats), + {TempCleaned, Stats2} = collect_outdated_tempfiles(Storage, Transfer, Cutoff, Stats1), + % TODO: collect empty directories separately + case FragCleaned and TempCleaned of + true -> + collect_transfer_directory(Storage, Transfer, Cutoff, Stats2); + false -> + Stats2 + end; +try_collect_transfer(Storage, Transfer, complete, Stats) -> + % File transfer is complete. + % We should be good to delete fragments and temporary files with their respective + % directories altogether. + {_, Stats1} = collect_fragments(Storage, Transfer, Stats), + {_, Stats2} = collect_tempfiles(Storage, Transfer, Stats1), + Stats2. + +collect_fragments(Storage, Transfer, Stats) -> + Dirname = emqx_ft_storage_fs:get_subdir(Storage, Transfer, fragment), + maybe_collect_directory(Dirname, true, Stats). + +collect_tempfiles(Storage, Transfer, Stats) -> + Dirname = emqx_ft_storage_fs:get_subdir(Storage, Transfer, temporary), + maybe_collect_directory(Dirname, true, Stats). + +collect_outdated_fragments(Storage, Transfer, Cutoff, Stats) -> + Dirname = emqx_ft_storage_fs:get_subdir(Storage, Transfer, fragment), + maybe_collect_directory(Dirname, filter_older_than(Cutoff), Stats). + +collect_outdated_tempfiles(Storage, Transfer, Cutoff, Stats) -> + Dirname = emqx_ft_storage_fs:get_subdir(Storage, Transfer, temporary), + maybe_collect_directory(Dirname, filter_older_than(Cutoff), Stats). + +collect_transfer_directory(Storage, Transfer, Cutoff, Stats) -> + Dirname = emqx_ft_storage_fs:get_subdir(Storage, Transfer), + Filter = + case Stats of + #gcstats{directories = 0} -> + % Nothing were collected, this is a leftover from a past complete transfer GC. + filter_older_than(Cutoff); + #gcstats{} -> + % Usual incomplete transfer GC, collect directories unconditionally. + true + end, + case collect_empty_directory(Dirname, Filter, Stats) of + {true, StatsNext} -> + collect_parents(Dirname, get_segments_root(Storage), StatsNext); + {false, StatsNext} -> + StatsNext + end. + +filter_older_than(Cutoff) -> + fun(_Filepath, #file_info{mtime = ModifiedAt}) -> ModifiedAt =< Cutoff end. + +collect_parents(Dirname, Until, Stats) -> + Parent = filename:dirname(Dirname), + case is_same_filepath(Parent, Until) orelse file:del_dir(Parent) of + true -> + Stats; + ok -> + ?tp(garbage_collected_directory, #{path => Dirname}), + collect_parents(Parent, Until, account_gcstat_directory(Stats)); + {error, eexist} -> + Stats; + {error, Reason} -> + register_gcstat_error({directory, Parent}, Reason, Stats) + end. + +maybe_collect_directory(Dirpath, Filter, Stats) -> + case filelib:is_dir(Dirpath) of + true -> + collect_filepath(Dirpath, Filter, Stats); + false -> + {true, Stats} + end. + +-spec collect_filepath(file:name(), Filter, gcstats()) -> {boolean(), gcstats()} when + Filter :: boolean() | fun((file:name(), file:file_info()) -> boolean()). +collect_filepath(Filepath, Filter, Stats) -> + case file:read_link_info(Filepath, [{time, posix}, raw]) of + {ok, Fileinfo} -> + collect_filepath(Filepath, Fileinfo, Filter, Stats); + {error, Reason} -> + {Reason == enoent, register_gcstat_error({path, Filepath}, Reason, Stats)} + end. + +collect_filepath(Filepath, #file_info{type = directory} = Fileinfo, Filter, Stats) -> + collect_directory(Filepath, Fileinfo, Filter, Stats); +collect_filepath(Filepath, #file_info{type = regular} = Fileinfo, Filter, Stats) -> + case filter_filepath(Filter, Filepath, Fileinfo) andalso file:delete(Filepath, [raw]) of + false -> + {false, Stats}; + ok -> + ?tp(garbage_collected_file, #{path => Filepath}), + {true, account_gcstat(Fileinfo, Stats)}; + {error, Reason} -> + {Reason == enoent, register_gcstat_error({file, Filepath}, Reason, Stats)} + end; +collect_filepath(Filepath, Fileinfo, _Filter, Stats) -> + {false, register_gcstat_error({file, Filepath}, {unexpected, Fileinfo}, Stats)}. + +collect_directory(Dirpath, Fileinfo, Filter, Stats) -> + case file:list_dir(Dirpath) of + {ok, Filenames} -> + {Clean, StatsNext} = collect_files(Dirpath, Filenames, Filter, Stats), + case Clean of + true -> + collect_empty_directory(Dirpath, Fileinfo, Filter, StatsNext); + false -> + {false, StatsNext} + end; + {error, Reason} -> + {false, register_gcstat_error({directory, Dirpath}, Reason, Stats)} + end. + +collect_files(Dirname, Filenames, Filter, Stats) -> + lists:foldl( + fun(Filename, {Complete, StatsAcc}) -> + Filepath = filename:join(Dirname, Filename), + {Collected, StatsNext} = collect_filepath(Filepath, Filter, StatsAcc), + {Collected andalso Complete, StatsNext} + end, + {true, Stats}, + Filenames + ). + +collect_empty_directory(Dirpath, Filter, Stats) -> + case file:read_link_info(Dirpath, [{time, posix}, raw]) of + {ok, Dirinfo} -> + collect_empty_directory(Dirpath, Dirinfo, Filter, Stats); + {error, Reason} -> + {Reason == enoent, register_gcstat_error({directory, Dirpath}, Reason, Stats)} + end. + +collect_empty_directory(Dirpath, Dirinfo, Filter, Stats) -> + case filter_filepath(Filter, Dirpath, Dirinfo) andalso file:del_dir(Dirpath) of + false -> + {false, Stats}; + ok -> + ?tp(garbage_collected_directory, #{path => Dirpath}), + {true, account_gcstat_directory(Stats)}; + {error, Reason} -> + {false, register_gcstat_error({directory, Dirpath}, Reason, Stats)} + end. + +filter_filepath(Filter, _, _) when is_boolean(Filter) -> + Filter; +filter_filepath(Filter, Filepath, Fileinfo) when is_function(Filter) -> + Filter(Filepath, Fileinfo). + +is_same_filepath(P1, P2) when is_binary(P1) andalso is_binary(P2) -> + filename:absname(P1) == filename:absname(P2); +is_same_filepath(P1, P2) when is_list(P1) andalso is_list(P2) -> + filename:absname(P1) == filename:absname(P2); +is_same_filepath(P1, P2) when is_binary(P1) -> + is_same_filepath(P1, filepath_to_binary(P2)). + +filepath_to_binary(S) -> + unicode:characters_to_binary(S, unicode, file:native_name_encoding()). + +get_segments_ttl(Storage, TransferInfo) -> + clamp(emqx_ft_conf:segments_ttl(Storage), try_get_filemeta_ttl(TransferInfo)). + +try_get_filemeta_ttl(#{filemeta := Filemeta}) -> + maps:get(segments_ttl, Filemeta, undefined); +try_get_filemeta_ttl(#{}) -> + undefined. + +clamp({Min, Max}, V) -> + min(Max, max(Min, V)); +clamp(undefined, V) -> + V. + +%% + +init_gcstats() -> + #gcstats{started_at = erlang:system_time()}. + +finish_gcstats(Stats) -> + Stats#gcstats{finished_at = erlang:system_time()}. + +account_gcstat(Fileinfo, Stats = #gcstats{files = Files, space = Space}) -> + Stats#gcstats{ + files = Files + 1, + space = Space + Fileinfo#file_info.size + }. + +account_gcstat_directory(Stats = #gcstats{directories = Directories}) -> + Stats#gcstats{ + directories = Directories + 1 + }. + +register_gcstat_error(Subject, Error, Stats = #gcstats{errors = Errors}) -> + Stats#gcstats{errors = Errors#{Subject => Error}}. + +%% + +get_segments_root(Storage) -> + emqx_ft_storage_fs:get_root(Storage). diff --git a/apps/emqx_ft/src/emqx_ft_storage_fs_proxy.erl b/apps/emqx_ft/src/emqx_ft_storage_fs_proxy.erl new file mode 100644 index 000000000..e6358cb14 --- /dev/null +++ b/apps/emqx_ft/src/emqx_ft_storage_fs_proxy.erl @@ -0,0 +1,36 @@ +%%-------------------------------------------------------------------- +%% Copyright (c) 2023 EMQ Technologies Co., Ltd. All Rights Reserved. +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. +%%-------------------------------------------------------------------- + +%% This methods are called via rpc by `emqx_ft_storage_fs` +%% They populate the call with actual storage which may be configured differently +%% on a concrete node. + +-module(emqx_ft_storage_fs_proxy). + +-export([ + list_local/2, + pread_local/4, + lookup_local_assembler/1 +]). + +list_local(Transfer, What) -> + emqx_ft_storage:with_storage_type(local, list, [Transfer, What]). + +pread_local(Transfer, Frag, Offset, Size) -> + emqx_ft_storage:with_storage_type(local, pread, [Transfer, Frag, Offset, Size]). + +lookup_local_assembler(Transfer) -> + emqx_ft_storage:with_storage_type(local, lookup_local_assembler, [Transfer]). diff --git a/apps/emqx_ft/src/emqx_ft_storage_fs_reader.erl b/apps/emqx_ft/src/emqx_ft_storage_fs_reader.erl new file mode 100644 index 000000000..513872edd --- /dev/null +++ b/apps/emqx_ft/src/emqx_ft_storage_fs_reader.erl @@ -0,0 +1,139 @@ +%%-------------------------------------------------------------------- +%% Copyright (c) 2023 EMQ Technologies Co., Ltd. All Rights Reserved. +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. +%%-------------------------------------------------------------------- + +-module(emqx_ft_storage_fs_reader). + +-behaviour(gen_server). + +-include_lib("emqx/include/logger.hrl"). +-include_lib("emqx/include/types.hrl"). + +%% API +-export([ + start_link/2, + start_supervised/2, + table/1, + table/2, + read/2 +]). + +%% gen_server callbacks +-export([ + init/1, + handle_call/3, + handle_cast/2, + handle_info/2, + terminate/2, + code_change/3 +]). + +-define(DEFAULT_CHUNK_SIZE, 1024). +-define(IS_FILENAME(Filename), (is_list(Filename) or is_binary(Filename))). + +%%-------------------------------------------------------------------- +%% API +%%-------------------------------------------------------------------- + +-spec table(pid()) -> qlc:query_handle(). +table(ReaderPid) when is_pid(ReaderPid) -> + table(ReaderPid, ?DEFAULT_CHUNK_SIZE). + +-spec table(pid(), pos_integer()) -> qlc:query_handle(). +table(ReaderPid, Bytes) when is_pid(ReaderPid) andalso is_integer(Bytes) andalso Bytes > 0 -> + NextFun = fun NextFun(Pid) -> + case emqx_ft_storage_fs_reader_proto_v1:read(node(Pid), Pid, Bytes) of + eof -> + []; + {ok, Data} -> + [Data] ++ fun() -> NextFun(Pid) end; + {ErrorKind, Reason} when ErrorKind =:= badrpc; ErrorKind =:= error -> + ?SLOG(warning, #{msg => "file_read_error", kind => ErrorKind, reason => Reason}), + [] + end + end, + qlc:table(fun() -> NextFun(ReaderPid) end, []). + +-spec start_link(pid(), filename:filename()) -> startlink_ret(). +start_link(CallerPid, Filename) when + is_pid(CallerPid) andalso + ?IS_FILENAME(Filename) +-> + gen_server:start_link(?MODULE, [CallerPid, Filename], []). + +-spec start_supervised(pid(), filename:filename()) -> startlink_ret(). +start_supervised(CallerPid, Filename) when + is_pid(CallerPid) andalso + ?IS_FILENAME(Filename) +-> + emqx_ft_storage_fs_reader_sup:start_child(CallerPid, Filename). + +-spec read(pid(), pos_integer()) -> {ok, binary()} | eof | {error, term()}. +read(Pid, Bytes) when + is_pid(Pid) andalso + is_integer(Bytes) andalso + Bytes > 0 +-> + gen_server:call(Pid, {read, Bytes}). + +%%-------------------------------------------------------------------- +%% gen_server callbacks +%%-------------------------------------------------------------------- + +init([CallerPid, Filename]) -> + MRef = erlang:monitor(process, CallerPid), + case file:open(Filename, [read, raw, binary]) of + {ok, File} -> + {ok, #{ + filename => Filename, + file => File, + caller_pid => CallerPid, + mref => MRef + }}; + {error, Reason} -> + {stop, Reason} + end. + +handle_call({read, Bytes}, _From, #{file := File} = State) -> + case file:read(File, Bytes) of + {ok, Data} -> + ?SLOG(debug, #{msg => "read", bytes => byte_size(Data)}), + {reply, {ok, Data}, State}; + eof -> + ?SLOG(debug, #{msg => "read", eof => true}), + {stop, normal, eof, State}; + {error, Reason} = Error -> + {stop, Reason, Error, State} + end; +handle_call(Msg, _From, State) -> + {reply, {error, {bad_call, Msg}}, State}. + +handle_info( + {'DOWN', MRef, process, CallerPid, _Reason}, #{mref := MRef, caller_pid := CallerPid} = State +) -> + {stop, {caller_down, CallerPid}, State}; +handle_info(Msg, State) -> + ?SLOG(warning, #{msg => "unexpected_message", info_msg => Msg}), + {noreply, State}. + +handle_cast(Msg, State) -> + ?SLOG(warning, #{msg => "unexpected_message", case_msg => Msg}), + {noreply, State}. + +terminate(_Reason, _State) -> + ok. + +code_change(_OldVsn, State, _Extra) -> + {ok, State}. diff --git a/apps/emqx_ft/src/emqx_ft_storage_fs_reader_sup.erl b/apps/emqx_ft/src/emqx_ft_storage_fs_reader_sup.erl new file mode 100644 index 000000000..8c8aea6b3 --- /dev/null +++ b/apps/emqx_ft/src/emqx_ft_storage_fs_reader_sup.erl @@ -0,0 +1,49 @@ +%%-------------------------------------------------------------------- +%% Copyright (c) 2023 EMQ Technologies Co., Ltd. All Rights Reserved. +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. +%%-------------------------------------------------------------------- + +-module(emqx_ft_storage_fs_reader_sup). + +-behaviour(supervisor). + +-export([ + init/1, + start_link/0, + start_child/2 +]). + +start_link() -> + supervisor:start_link({local, ?MODULE}, ?MODULE, []). + +start_child(CallerPid, Filename) -> + Childspec = #{ + id => {CallerPid, Filename}, + start => {emqx_ft_storage_fs_reader, start_link, [CallerPid, Filename]}, + restart => temporary + }, + case supervisor:start_child(?MODULE, Childspec) of + {ok, Pid} -> + {ok, Pid}; + {error, {Reason, _Child}} -> + {error, Reason} + end. + +init(_) -> + SupFlags = #{ + strategy => one_for_one, + intensity => 10, + period => 1000 + }, + {ok, {SupFlags, []}}. diff --git a/apps/emqx_ft/src/emqx_ft_sup.erl b/apps/emqx_ft/src/emqx_ft_sup.erl new file mode 100644 index 000000000..0308668ab --- /dev/null +++ b/apps/emqx_ft/src/emqx_ft_sup.erl @@ -0,0 +1,65 @@ +%%-------------------------------------------------------------------- +%% Copyright (c) 2023 EMQ Technologies Co., Ltd. All Rights Reserved. +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. +%%-------------------------------------------------------------------- + +-module(emqx_ft_sup). + +-behaviour(supervisor). + +-export([start_link/0]). + +-export([init/1]). + +-define(SERVER, ?MODULE). + +start_link() -> + supervisor:start_link({local, ?SERVER}, ?MODULE, []). + +init([]) -> + SupFlags = #{ + strategy => one_for_one, + intensity => 100, + period => 10 + }, + + AssemblerSup = #{ + id => emqx_ft_assembler_sup, + start => {emqx_ft_assembler_sup, start_link, []}, + restart => permanent, + shutdown => infinity, + type => supervisor, + modules => [emqx_ft_assembler_sup] + }, + + FileReaderSup = #{ + id => emqx_ft_storage_fs_reader_sup, + start => {emqx_ft_storage_fs_reader_sup, start_link, []}, + restart => permanent, + shutdown => infinity, + type => supervisor, + modules => [emqx_ft_storage_fs_reader_sup] + }, + + Responder = #{ + id => emqx_ft_responder_sup, + start => {emqx_ft_responder_sup, start_link, []}, + restart => permanent, + shutdown => infinity, + type => worker, + modules => [emqx_ft_responder_sup] + }, + + ChildSpecs = [Responder, AssemblerSup, FileReaderSup], + {ok, {SupFlags, ChildSpecs}}. diff --git a/apps/emqx_ft/src/proto/emqx_ft_storage_exporter_fs_proto_v1.erl b/apps/emqx_ft/src/proto/emqx_ft_storage_exporter_fs_proto_v1.erl new file mode 100644 index 000000000..222891f54 --- /dev/null +++ b/apps/emqx_ft/src/proto/emqx_ft_storage_exporter_fs_proto_v1.erl @@ -0,0 +1,54 @@ +%%-------------------------------------------------------------------- +%% Copyright (c) 2023 EMQ Technologies Co., Ltd. All Rights Reserved. +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. +%%-------------------------------------------------------------------- + +-module(emqx_ft_storage_exporter_fs_proto_v1). + +-behaviour(emqx_bpapi). + +-export([introduced_in/0]). + +-export([list_exports/2]). +-export([read_export_file/3]). + +-include_lib("emqx/include/bpapi.hrl"). + +introduced_in() -> + "5.0.17". + +-spec list_exports([node()], emqx_ft_storage:query(_LocalCursor)) -> + emqx_rpc:erpc_multicall( + {ok, [emqx_ft_storage:file_info()]} + | {error, file:posix() | disabled | {invalid_storage_type, _}} + ). +list_exports(Nodes, Query) -> + erpc:multicall( + Nodes, + emqx_ft_storage_exporter_fs_proxy, + list_exports_local, + [Query] + ). + +-spec read_export_file(node(), file:name(), pid()) -> + {ok, emqx_ft_storage:reader()} + | {error, term()} + | no_return(). +read_export_file(Node, Filepath, CallerPid) -> + erpc:call( + Node, + emqx_ft_storage_exporter_fs_proxy, + read_export_file_local, + [Filepath, CallerPid] + ). diff --git a/apps/emqx_ft/src/proto/emqx_ft_storage_fs_proto_v1.erl b/apps/emqx_ft/src/proto/emqx_ft_storage_fs_proto_v1.erl new file mode 100644 index 000000000..989a48555 --- /dev/null +++ b/apps/emqx_ft/src/proto/emqx_ft_storage_fs_proto_v1.erl @@ -0,0 +1,49 @@ +%%-------------------------------------------------------------------- +%% Copyright (c) 2023 EMQ Technologies Co., Ltd. All Rights Reserved. +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. +%%-------------------------------------------------------------------- + +-module(emqx_ft_storage_fs_proto_v1). + +-behaviour(emqx_bpapi). + +-export([introduced_in/0]). + +-export([multilist/3]). +-export([pread/5]). +-export([list_assemblers/2]). + +-type offset() :: emqx_ft:offset(). +-type transfer() :: emqx_ft:transfer(). +-type filefrag() :: emqx_ft_storage_fs:filefrag(). + +-include_lib("emqx/include/bpapi.hrl"). + +introduced_in() -> + "5.0.17". + +-spec multilist([node()], transfer(), fragment | result) -> + emqx_rpc:erpc_multicall({ok, [filefrag()]} | {error, term()}). +multilist(Nodes, Transfer, What) -> + erpc:multicall(Nodes, emqx_ft_storage_fs_proxy, list_local, [Transfer, What]). + +-spec pread(node(), transfer(), filefrag(), offset(), _Size :: non_neg_integer()) -> + {ok, [filefrag()]} | {error, term()} | no_return(). +pread(Node, Transfer, Frag, Offset, Size) -> + erpc:call(Node, emqx_ft_storage_fs_proxy, pread_local, [Transfer, Frag, Offset, Size]). + +-spec list_assemblers([node()], transfer()) -> + emqx_rpc:erpc_multicall([pid()]). +list_assemblers(Nodes, Transfer) -> + erpc:multicall(Nodes, emqx_ft_storage_fs_proxy, lookup_local_assembler, [Transfer]). diff --git a/apps/emqx_ft/src/proto/emqx_ft_storage_fs_reader_proto_v1.erl b/apps/emqx_ft/src/proto/emqx_ft_storage_fs_reader_proto_v1.erl new file mode 100644 index 000000000..ea089111d --- /dev/null +++ b/apps/emqx_ft/src/proto/emqx_ft_storage_fs_reader_proto_v1.erl @@ -0,0 +1,35 @@ +%%-------------------------------------------------------------------- +%% Copyright (c) 2023 EMQ Technologies Co., Ltd. All Rights Reserved. +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. +%%-------------------------------------------------------------------- + +-module(emqx_ft_storage_fs_reader_proto_v1). + +-behaviour(emqx_bpapi). + +-export([introduced_in/0]). + +-export([read/3]). + +-include_lib("emqx/include/bpapi.hrl"). + +introduced_in() -> + "5.0.17". + +-spec read(node(), pid(), pos_integer()) -> + {ok, binary()} | eof | {error, term()} | no_return(). +read(Node, Pid, Bytes) when + is_atom(Node) andalso is_pid(Pid) andalso is_integer(Bytes) andalso Bytes > 0 +-> + emqx_rpc:call(Node, emqx_ft_storage_fs_reader, read, [Pid, Bytes]). diff --git a/apps/emqx_ft/test/emqx_ft_SUITE.erl b/apps/emqx_ft/test/emqx_ft_SUITE.erl new file mode 100644 index 000000000..7d64f9716 --- /dev/null +++ b/apps/emqx_ft/test/emqx_ft_SUITE.erl @@ -0,0 +1,782 @@ +%%-------------------------------------------------------------------- +%% Copyright (c) 2023 EMQ Technologies Co., Ltd. All Rights Reserved. +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. +%%-------------------------------------------------------------------- + +-module(emqx_ft_SUITE). + +-compile(export_all). +-compile(nowarn_export_all). + +-include_lib("common_test/include/ct.hrl"). +-include_lib("stdlib/include/assert.hrl"). + +-define(assertRCName(RCName, PublishRes), + ?assertMatch( + {ok, #{reason_code_name := RCName}}, + PublishRes + ) +). + +all() -> + [ + {group, single_node}, + {group, cluster} + ]. + +groups() -> + [ + {single_node, [parallel], [ + t_assemble_crash, + t_corrupted_segment_retry, + t_invalid_checksum, + t_invalid_fileid, + t_invalid_filename, + t_invalid_meta, + t_invalid_topic_format, + t_meta_conflict, + t_nasty_clientids_fileids, + t_no_meta, + t_no_segment, + t_simple_transfer + ]}, + {cluster, [], [ + t_switch_node, + t_unreliable_migrating_client, + {g_concurrent_fins, [{repeat_until_any_fail, 8}], [ + t_concurrent_fins + ]} + ]} + ]. + +init_per_suite(Config) -> + ok = emqx_common_test_helpers:start_apps([emqx_ft], set_special_configs(Config)), + Config. + +end_per_suite(_Config) -> + ok = emqx_common_test_helpers:stop_apps([emqx_ft]), + ok. + +set_special_configs(Config) -> + fun + (emqx_ft) -> + % NOTE + % Inhibit local fs GC to simulate it isn't fast enough to collect + % complete transfers. + Storage = emqx_utils_maps:deep_merge( + emqx_ft_test_helpers:local_storage(Config), + #{<<"local">> => #{<<"segments">> => #{<<"gc">> => #{<<"interval">> => 0}}}} + ), + emqx_ft_test_helpers:load_config(#{ + <<"enable">> => true, + <<"storage">> => Storage + }); + (_) -> + ok + end. + +init_per_testcase(Case, Config) -> + ClientId = atom_to_binary(Case), + case ?config(group, Config) of + cluster -> + [{clientid, ClientId} | Config]; + _ -> + {ok, C} = emqtt:start_link([{proto_ver, v5}, {clientid, ClientId}]), + {ok, _} = emqtt:connect(C), + [{client, C}, {clientid, ClientId} | Config] + end. +end_per_testcase(_Case, Config) -> + _ = [ok = emqtt:stop(C) || {client, C} <- Config], + ok. + +init_per_group(Group = cluster, Config) -> + Cluster = mk_cluster_specs(Config), + ct:pal("Starting ~p", [Cluster]), + Nodes = [ + emqx_common_test_helpers:start_slave(Name, Opts#{join_to => node()}) + || {Name, Opts} <- Cluster + ], + [{group, Group}, {cluster_nodes, Nodes} | Config]; +init_per_group(Group, Config) -> + [{group, Group} | Config]. + +end_per_group(cluster, Config) -> + ok = lists:foreach( + fun emqx_ft_test_helpers:stop_additional_node/1, + ?config(cluster_nodes, Config) + ); +end_per_group(_Group, _Config) -> + ok. + +mk_cluster_specs(Config) -> + Specs = [ + {core, emqx_ft_SUITE1, #{listener_ports => [{tcp, 2883}]}}, + {core, emqx_ft_SUITE2, #{listener_ports => [{tcp, 3883}]}} + ], + CommOpts = [ + {env, [{emqx, boot_modules, [broker, listeners]}]}, + {apps, [emqx_ft]}, + {conf, [{[listeners, Proto, default, enabled], false} || Proto <- [ssl, ws, wss]]}, + {env_handler, set_special_configs(Config)} + ], + emqx_common_test_helpers:emqx_cluster( + Specs, + CommOpts + ). + +%%-------------------------------------------------------------------- +%% Tests +%%-------------------------------------------------------------------- + +t_invalid_topic_format(Config) -> + C = ?config(client, Config), + + ?assertRCName( + unspecified_error, + emqtt:publish(C, <<"$file/fileid">>, <<>>, 1) + ), + ?assertRCName( + unspecified_error, + emqtt:publish(C, <<"$file/fileid/">>, <<>>, 1) + ), + ?assertRCName( + unspecified_error, + emqtt:publish(C, <<"$file/fileid/offset">>, <<>>, 1) + ), + ?assertRCName( + unspecified_error, + emqtt:publish(C, <<"$file/fileid/fin/offset">>, <<>>, 1) + ), + ?assertRCName( + unspecified_error, + emqtt:publish(C, <<"$file/">>, <<>>, 1) + ), + ?assertRCName( + unspecified_error, + emqtt:publish(C, <<"$file/X/Y/Z">>, <<>>, 1) + ), + %% should not be handled by `emqx_ft` + ?assertRCName( + no_matching_subscribers, + emqtt:publish(C, <<"$file">>, <<>>, 1) + ). + +t_invalid_fileid(Config) -> + C = ?config(client, Config), + ?assertRCName( + unspecified_error, + emqtt:publish(C, <<"$file//init">>, <<>>, 1) + ). + +t_invalid_filename(Config) -> + C = ?config(client, Config), + ?assertRCName( + unspecified_error, + emqtt:publish(C, mk_init_topic(<<"f1">>), encode_meta(meta(".", <<>>)), 1) + ), + ?assertRCName( + unspecified_error, + emqtt:publish(C, mk_init_topic(<<"f2">>), encode_meta(meta("..", <<>>)), 1) + ), + ?assertRCName( + unspecified_error, + emqtt:publish(C, mk_init_topic(<<"f2">>), encode_meta(meta("../nice", <<>>)), 1) + ), + ?assertRCName( + unspecified_error, + emqtt:publish(C, mk_init_topic(<<"f3">>), encode_meta(meta("/etc/passwd", <<>>)), 1) + ), + ?assertRCName( + unspecified_error, + emqtt:publish( + C, + mk_init_topic(<<"f4">>), + encode_meta(meta(lists:duplicate(1000, $A), <<>>)), + 1 + ) + ), + ?assertRCName( + success, + emqtt:publish(C, mk_init_topic(<<"f5">>), encode_meta(meta("146%", <<>>)), 1) + ). + +t_simple_transfer(Config) -> + C = ?config(client, Config), + + Filename = "topsecret.pdf", + FileId = <<"f1">>, + + Data = [<<"first">>, <<"second">>, <<"third">>], + + Meta = #{size := Filesize} = meta(Filename, Data), + + ?assertRCName( + success, + emqtt:publish(C, mk_init_topic(FileId), encode_meta(Meta), 1) + ), + + lists:foreach( + fun({Chunk, Offset}) -> + ?assertRCName( + success, + emqtt:publish(C, mk_segment_topic(FileId, Offset), Chunk, 1) + ) + end, + with_offsets(Data) + ), + + ?assertRCName( + success, + emqtt:publish(C, mk_fin_topic(FileId, Filesize), <<>>, 1) + ), + + [Export] = list_files(?config(clientid, Config)), + ?assertEqual( + {ok, iolist_to_binary(Data)}, + read_export(Export) + ). + +t_nasty_clientids_fileids(_Config) -> + Transfers = [ + {<<".">>, <<".">>}, + {<<"🌚"/utf8>>, <<"šŸŒ"/utf8>>}, + {<<"../..">>, <<"😤"/utf8>>}, + {<<"/etc/passwd">>, <<"whitehat">>}, + {<<"; rm -rf / ;">>, <<"whitehat">>} + ], + + ok = lists:foreach( + fun({ClientId, FileId}) -> + ok = emqx_ft_test_helpers:upload_file(ClientId, FileId, "justfile", ClientId), + [Export] = list_files(ClientId), + ?assertEqual({ok, ClientId}, read_export(Export)) + end, + Transfers + ). + +t_meta_conflict(Config) -> + C = ?config(client, Config), + + Filename = "topsecret.pdf", + FileId = <<"f1">>, + + Meta = meta(Filename, [<<"x">>]), + + ?assertRCName( + success, + emqtt:publish(C, mk_init_topic(FileId), encode_meta(Meta), 1) + ), + + ConflictMeta = Meta#{name => "conflict.pdf"}, + + ?assertRCName( + unspecified_error, + emqtt:publish(C, mk_init_topic(FileId), encode_meta(ConflictMeta), 1) + ). + +t_no_meta(Config) -> + C = ?config(client, Config), + + FileId = <<"f1">>, + Data = <<"first">>, + + ?assertRCName( + success, + emqtt:publish(C, mk_segment_topic(FileId, 0), Data, 1) + ), + + ?assertRCName( + unspecified_error, + emqtt:publish(C, mk_fin_topic(FileId, 42), <<>>, 1) + ). + +t_no_segment(Config) -> + C = ?config(client, Config), + + Filename = "topsecret.pdf", + FileId = <<"f1">>, + + Data = [<<"first">>, <<"second">>, <<"third">>], + + Meta = #{size := Filesize} = meta(Filename, Data), + + ?assertRCName( + success, + emqtt:publish(C, mk_init_topic(FileId), encode_meta(Meta), 1) + ), + + lists:foreach( + fun({Chunk, Offset}) -> + ?assertRCName( + success, + emqtt:publish(C, mk_segment_topic(FileId, Offset), Chunk, 1) + ) + end, + %% Skip the first segment + tl(with_offsets(Data)) + ), + + ?assertRCName( + unspecified_error, + emqtt:publish(C, mk_fin_topic(FileId, Filesize), <<>>, 1) + ). + +t_invalid_meta(Config) -> + C = ?config(client, Config), + + FileId = <<"f1">>, + + %% Invalid schema + Meta = #{foo => <<"bar">>}, + MetaPayload = emqx_utils_json:encode(Meta), + ?assertRCName( + unspecified_error, + emqtt:publish(C, mk_init_topic(FileId), MetaPayload, 1) + ), + + %% Invalid JSON + ?assertRCName( + unspecified_error, + emqtt:publish(C, mk_init_topic(FileId), <<"{oops;">>, 1) + ). + +t_invalid_checksum(Config) -> + C = ?config(client, Config), + + Filename = "topsecret.pdf", + FileId = <<"f1">>, + + Data = [<<"first">>, <<"second">>, <<"third">>], + + Meta = #{size := Filesize} = meta(Filename, Data), + MetaPayload = encode_meta(Meta#{checksum => {sha256, sha256(<<"invalid">>)}}), + + ?assertRCName( + success, + emqtt:publish(C, mk_init_topic(FileId), MetaPayload, 1) + ), + + lists:foreach( + fun({Chunk, Offset}) -> + ?assertRCName( + success, + emqtt:publish(C, mk_segment_topic(FileId, Offset), Chunk, 1) + ) + end, + with_offsets(Data) + ), + + ?assertRCName( + unspecified_error, + emqtt:publish(C, mk_fin_topic(FileId, Filesize), <<>>, 1) + ). + +t_corrupted_segment_retry(Config) -> + C = ?config(client, Config), + + Filename = "corruption.pdf", + FileId = <<"4242-4242">>, + + Data = [<<"first">>, <<"second">>, <<"third">>], + [ + {Seg1, Offset1}, + {Seg2, Offset2}, + {Seg3, Offset3} + ] = with_offsets(Data), + [ + Checksum1, + Checksum2, + Checksum3 + ] = [binary:encode_hex(sha256(S)) || S <- Data], + + Meta = #{size := Filesize} = meta(Filename, Data), + + ?assertRCName(success, emqtt:publish(C, mk_init_topic(FileId), encode_meta(Meta), 1)), + + ?assertRCName( + success, + emqtt:publish(C, mk_segment_topic(FileId, Offset1, Checksum1), Seg1, 1) + ), + + % segment is corrupted + ?assertRCName( + unspecified_error, + emqtt:publish(C, mk_segment_topic(FileId, Offset2, Checksum2), <>, 1) + ), + + % retry + ?assertRCName( + success, + emqtt:publish(C, mk_segment_topic(FileId, Offset2, Checksum2), Seg2, 1) + ), + + ?assertRCName( + success, + emqtt:publish(C, mk_segment_topic(FileId, Offset3, Checksum3), Seg3, 1) + ), + + ?assertRCName( + success, + emqtt:publish(C, mk_fin_topic(FileId, Filesize), <<>>, 1) + ). + +t_switch_node(Config) -> + [Node | _] = ?config(cluster_nodes, Config), + AdditionalNodePort = emqx_ft_test_helpers:tcp_port(Node), + + ClientId = <<"t_switch_node-migrating_client">>, + + {ok, C1} = emqtt:start_link([{proto_ver, v5}, {clientid, ClientId}, {port, AdditionalNodePort}]), + {ok, _} = emqtt:connect(C1), + + Filename = "multinode_upload.txt", + FileId = <<"f1">>, + + Data = [<<"first">>, <<"second">>, <<"third">>], + [{Data0, Offset0}, {Data1, Offset1}, {Data2, Offset2}] = with_offsets(Data), + + %% First, publist metadata and the first segment to the additional node + + Meta = #{size := Filesize} = meta(Filename, Data), + + ?assertRCName( + success, + emqtt:publish(C1, mk_init_topic(FileId), encode_meta(Meta), 1) + ), + ?assertRCName( + success, + emqtt:publish(C1, mk_segment_topic(FileId, Offset0), Data0, 1) + ), + + %% Then, switch the client to the main node + %% and publish the rest of the segments + + ok = emqtt:stop(C1), + {ok, C2} = emqtt:start_link([{proto_ver, v5}, {clientid, ClientId}]), + {ok, _} = emqtt:connect(C2), + + ?assertRCName( + success, + emqtt:publish(C2, mk_segment_topic(FileId, Offset1), Data1, 1) + ), + ?assertRCName( + success, + emqtt:publish(C2, mk_segment_topic(FileId, Offset2), Data2, 1) + ), + + ?assertRCName( + success, + emqtt:publish(C2, mk_fin_topic(FileId, Filesize), <<>>, 1) + ), + + ok = emqtt:stop(C2), + + %% Now check consistency of the file + + [Export] = list_files(ClientId), + ?assertEqual( + {ok, iolist_to_binary(Data)}, + read_export(Export) + ). + +t_assemble_crash(Config) -> + C = ?config(client, Config), + + meck:new(emqx_ft_storage_fs), + meck:expect(emqx_ft_storage_fs, assemble, fun(_, _, _) -> meck:exception(error, oops) end), + + ?assertRCName( + unspecified_error, + emqtt:publish(C, <<"$file/someid/fin">>, <<>>, 1) + ). + +t_unreliable_migrating_client(Config) -> + NodeSelf = node(), + [Node1, Node2] = ?config(cluster_nodes, Config), + + ClientId = ?config(clientid, Config), + FileId = emqx_guid:to_hexstr(emqx_guid:gen()), + Filename = "migratory-birds-in-southern-hemisphere-2013.pdf", + Filesize = 1000, + Gen = emqx_ft_content_gen:new({{ClientId, FileId}, Filesize}, 16), + Payload = iolist_to_binary(emqx_ft_content_gen:consume(Gen, fun({Chunk, _, _}) -> Chunk end)), + Meta = meta(Filename, Payload), + + Context = #{ + clientid => ClientId, + fileid => FileId, + filesize => Filesize, + payload => Payload + }, + Commands = [ + % Connect to the broker on the current node + {fun connect_mqtt_client/2, [NodeSelf]}, + % Send filemeta and 3 initial segments + % (assuming client chose 100 bytes as a desired segment size) + {fun send_filemeta/2, [Meta]}, + {fun send_segment/3, [0, 100]}, + {fun send_segment/3, [100, 100]}, + {fun send_segment/3, [200, 100]}, + % Disconnect the client cleanly + {fun stop_mqtt_client/1, []}, + % Connect to the broker on `Node1` + {fun connect_mqtt_client/2, [Node1]}, + % Connect to the broker on `Node2` without first disconnecting from `Node1` + % Client forgot the state for some reason and started the transfer again. + % (assuming this is usual for a client on a device that was rebooted) + {fun connect_mqtt_client/2, [Node2]}, + {fun send_filemeta/2, [Meta]}, + % This time it chose 200 bytes as a segment size + {fun send_segment/3, [0, 200]}, + {fun send_segment/3, [200, 200]}, + % But now it downscaled back to 100 bytes segments + {fun send_segment/3, [400, 100]}, + % Client lost connectivity and reconnected + % (also had last few segments unacked and decided to resend them) + {fun connect_mqtt_client/2, [Node2]}, + {fun send_segment/3, [200, 200]}, + {fun send_segment/3, [400, 200]}, + % Client lost connectivity and reconnected, this time to another node + % (also had last segment unacked and decided to resend it) + {fun connect_mqtt_client/2, [Node1]}, + {fun send_segment/3, [400, 200]}, + {fun send_segment/3, [600, eof]}, + {fun send_finish/1, []}, + % Client lost connectivity and reconnected, this time to the current node + % (client had `fin` unacked and decided to resend it) + {fun connect_mqtt_client/2, [NodeSelf]}, + {fun send_finish/1, []} + ], + _Context = run_commands(Commands, Context), + + Exports = list_files(?config(clientid, Config)), + + Node1Str = atom_to_list(Node1), + % TODO: this testcase is specific to local fs storage backend + ?assertMatch( + [#{"node" := Node1Str}], + fs_exported_file_attributes(Exports) + ), + + [ + ?assertEqual({ok, Payload}, read_export(Export)) + || Export <- Exports + ]. + +t_concurrent_fins(Config) -> + ct:timetrap({seconds, 10}), + + NodeSelf = node(), + [Node1, Node2] = ?config(cluster_nodes, Config), + + ClientId = iolist_to_binary([ + ?config(clientid, Config), + integer_to_list(erlang:unique_integer()) + ]), + FileId = emqx_guid:to_hexstr(emqx_guid:gen()), + Filename = "migratory-birds-in-southern-hemisphere-2013.pdf", + Filesize = 100, + Gen = emqx_ft_content_gen:new({{ClientId, FileId}, Filesize}, 16), + Payload = iolist_to_binary(emqx_ft_content_gen:consume(Gen, fun({Chunk, _, _}) -> Chunk end)), + Meta = meta(Filename, Payload), + + %% Send filemeta and segments to Node1 + Context0 = #{ + clientid => ClientId, + fileid => FileId, + filesize => Filesize, + payload => Payload + }, + + Context1 = run_commands( + [ + {fun connect_mqtt_client/2, [Node1]}, + {fun send_filemeta/2, [Meta]}, + {fun send_segment/3, [0, 100]}, + {fun stop_mqtt_client/1, []} + ], + Context0 + ), + + %% Now send fins concurrently to the 3 nodes + Nodes = [Node1, Node2, NodeSelf], + SendFin = fun(Node) -> + run_commands( + [ + {fun connect_mqtt_client/2, [Node]}, + {fun send_finish/1, []} + ], + Context1 + ) + end, + + PidMons = lists:map( + fun(Node) -> + erlang:spawn_monitor(fun F() -> + _ = erlang:process_flag(trap_exit, true), + try + SendFin(Node) + catch + C:E -> + % NOTE: random delay to avoid livelock conditions + ct:pal("Node ~p did not send finish successfully: ~p:~p", [Node, C, E]), + ok = timer:sleep(rand:uniform(10)), + F() + end + end) + end, + Nodes + ), + ok = lists:foreach( + fun({Pid, MRef}) -> + receive + {'DOWN', MRef, process, Pid, normal} -> ok + end + end, + PidMons + ), + + %% Only one node should have the file + Exports = list_files(ClientId), + case fs_exported_file_attributes(Exports) of + [#{"node" := _Node}] -> + ok; + [#{"node" := _Node} | _] = Files -> + % ...But we can't really guarantee that + ct:comment({multiple_files_on_different_nodes, Files}) + end. + +%%------------------------------------------------------------------------------ +%% Command helpers +%%------------------------------------------------------------------------------ + +%% Command runners + +run_commands(Commands, Context) -> + lists:foldl(fun run_command/2, Context, Commands). + +run_command({Command, Args}, Context) -> + ct:pal("COMMAND ~p ~p", [erlang:fun_info(Command, name), Args]), + erlang:apply(Command, Args ++ [Context]). + +%% Commands + +connect_mqtt_client(Node, ContextIn) -> + Context = #{clientid := ClientId} = disown_mqtt_client(ContextIn), + NodePort = emqx_ft_test_helpers:tcp_port(Node), + {ok, Client} = emqtt:start_link([{proto_ver, v5}, {clientid, ClientId}, {port, NodePort}]), + {ok, _} = emqtt:connect(Client), + Context#{client => Client}. + +stop_mqtt_client(Context = #{client := Client}) -> + _ = emqtt:stop(Client), + maps:remove(client, Context). + +disown_mqtt_client(Context = #{client := Client}) -> + _ = erlang:unlink(Client), + maps:remove(client, Context); +disown_mqtt_client(Context = #{}) -> + Context. + +send_filemeta(Meta, Context = #{client := Client, fileid := FileId}) -> + ?assertRCName( + success, + emqtt:publish(Client, mk_init_topic(FileId), encode_meta(Meta), 1) + ), + Context. + +send_segment(Offset, Size, Context = #{client := Client, fileid := FileId, payload := Payload}) -> + Data = + case Size of + eof -> + binary:part(Payload, Offset, byte_size(Payload) - Offset); + N -> + binary:part(Payload, Offset, N) + end, + ?assertRCName( + success, + emqtt:publish(Client, mk_segment_topic(FileId, Offset), Data, 1) + ), + Context. + +send_finish(Context = #{client := Client, fileid := FileId, filesize := Filesize}) -> + ?assertRCName( + success, + emqtt:publish(Client, mk_fin_topic(FileId, Filesize), <<>>, 1) + ), + Context. + +%%------------------------------------------------------------------------------ +%% Helpers +%%------------------------------------------------------------------------------ + +fs_exported_file_attributes(FSExports) -> + lists:map( + fun(#{uri := URIString}) -> + #{query := QS} = uri_string:parse(URIString), + maps:from_list(uri_string:dissect_query(QS)) + end, + lists:sort(FSExports) + ). + +mk_init_topic(FileId) -> + <<"$file/", FileId/binary, "/init">>. + +mk_segment_topic(FileId, Offset) when is_integer(Offset) -> + mk_segment_topic(FileId, integer_to_binary(Offset)); +mk_segment_topic(FileId, Offset) when is_binary(Offset) -> + <<"$file/", FileId/binary, "/", Offset/binary>>. + +mk_segment_topic(FileId, Offset, Checksum) when is_integer(Offset) -> + mk_segment_topic(FileId, integer_to_binary(Offset), Checksum); +mk_segment_topic(FileId, Offset, Checksum) when is_binary(Offset) -> + <<"$file/", FileId/binary, "/", Offset/binary, "/", Checksum/binary>>. + +mk_fin_topic(FileId, Size) when is_integer(Size) -> + mk_fin_topic(FileId, integer_to_binary(Size)); +mk_fin_topic(FileId, Size) when is_binary(Size) -> + <<"$file/", FileId/binary, "/fin/", Size/binary>>. + +with_offsets(Items) -> + {List, _} = lists:mapfoldl( + fun(Item, Offset) -> + {{Item, integer_to_binary(Offset)}, Offset + byte_size(Item)} + end, + 0, + Items + ), + List. + +sha256(Data) -> + crypto:hash(sha256, Data). + +meta(FileName, Data) -> + FullData = iolist_to_binary(Data), + #{ + name => FileName, + checksum => {sha256, sha256(FullData)}, + expire_at => erlang:system_time(_Unit = second) + 3600, + size => byte_size(FullData) + }. + +encode_meta(Meta) -> + emqx_utils_json:encode(emqx_ft:encode_filemeta(Meta)). + +list_files(ClientId) -> + {ok, #{items := Files}} = emqx_ft_storage:files(), + [File || File = #{transfer := {CId, _}} <- Files, CId == ClientId]. + +read_export(#{path := AbsFilepath}) -> + % TODO: only works for the local filesystem exporter right now + file:read_file(AbsFilepath). diff --git a/apps/emqx_ft/test/emqx_ft_api_SUITE.erl b/apps/emqx_ft/test/emqx_ft_api_SUITE.erl new file mode 100644 index 000000000..f69e13a6d --- /dev/null +++ b/apps/emqx_ft/test/emqx_ft_api_SUITE.erl @@ -0,0 +1,304 @@ +%%-------------------------------------------------------------------- +%% Copyright (c) 2020-2023 EMQ Technologies Co., Ltd. All Rights Reserved. +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. +%%-------------------------------------------------------------------- + +-module(emqx_ft_api_SUITE). + +-compile(export_all). +-compile(nowarn_export_all). + +-include_lib("common_test/include/ct.hrl"). +-include_lib("stdlib/include/assert.hrl"). + +-import(emqx_dashboard_api_test_helpers, [host/0, uri/1]). + +all() -> + [ + {group, single}, + {group, cluster} + ]. + +groups() -> + [ + {single, [], emqx_common_test_helpers:all(?MODULE)}, + {cluster, [], emqx_common_test_helpers:all(?MODULE)} + ]. + +init_per_suite(Config) -> + ok = emqx_mgmt_api_test_util:init_suite( + [emqx_conf, emqx_ft], emqx_ft_test_helpers:env_handler(Config) + ), + {ok, _} = emqx:update_config([rpc, port_discovery], manual), + Config. +end_per_suite(_Config) -> + ok = emqx_mgmt_api_test_util:end_suite([emqx_ft, emqx_conf]), + ok. + +init_per_group(Group = cluster, Config) -> + Cluster = mk_cluster_specs(Config), + ct:pal("Starting ~p", [Cluster]), + Nodes = [ + emqx_common_test_helpers:start_slave(Name, Opts#{join_to => node()}) + || {Name, Opts} <- Cluster + ], + [{group, Group}, {cluster_nodes, Nodes} | Config]; +init_per_group(Group, Config) -> + [{group, Group} | Config]. + +end_per_group(cluster, Config) -> + ok = lists:foreach( + fun emqx_ft_test_helpers:stop_additional_node/1, + ?config(cluster_nodes, Config) + ); +end_per_group(_Group, _Config) -> + ok. + +mk_cluster_specs(Config) -> + Specs = [ + {core, emqx_ft_api_SUITE1, #{listener_ports => [{tcp, 2883}]}}, + {core, emqx_ft_api_SUITE2, #{listener_ports => [{tcp, 3883}]}} + ], + CommOpts = [ + {env, [{emqx, boot_modules, [broker, listeners]}]}, + {apps, [emqx_ft]}, + {conf, [{[listeners, Proto, default, enabled], false} || Proto <- [ssl, ws, wss]]}, + {env_handler, emqx_ft_test_helpers:env_handler(Config)} + ], + emqx_common_test_helpers:emqx_cluster( + Specs, + CommOpts + ). + +init_per_testcase(Case, Config) -> + [{tc, Case} | Config]. +end_per_testcase(t_ft_disabled, _Config) -> + emqx_config:put([file_transfer, enable], true); +end_per_testcase(_Case, _Config) -> + ok. + +%%-------------------------------------------------------------------- +%% Tests +%%-------------------------------------------------------------------- + +t_list_files(Config) -> + ClientId = client_id(Config), + FileId = <<"f1">>, + + Node = lists:last(cluster(Config)), + ok = emqx_ft_test_helpers:upload_file(ClientId, FileId, "f1", <<"data">>, Node), + + {ok, 200, #{<<"files">> := Files}} = + request_json(get, uri(["file_transfer", "files"])), + + ?assertMatch( + [#{<<"clientid">> := ClientId, <<"fileid">> := <<"f1">>}], + [File || File = #{<<"clientid">> := CId} <- Files, CId == ClientId] + ), + + {ok, 200, #{<<"files">> := FilesTransfer}} = + request_json(get, uri(["file_transfer", "files", ClientId, FileId])), + + ?assertMatch( + [#{<<"clientid">> := ClientId, <<"fileid">> := <<"f1">>}], + FilesTransfer + ), + + ?assertMatch( + {ok, 404, #{<<"code">> := <<"FILES_NOT_FOUND">>}}, + request_json(get, uri(["file_transfer", "files", ClientId, <<"no-such-file">>])) + ). + +t_download_transfer(Config) -> + ClientId = client_id(Config), + FileId = <<"f1">>, + + Node = lists:last(cluster(Config)), + ok = emqx_ft_test_helpers:upload_file(ClientId, FileId, "f1", <<"data">>, Node), + + ?assertMatch( + {ok, 400, #{<<"code">> := <<"BAD_REQUEST">>}}, + request_json( + get, + uri(["file_transfer", "file"]) ++ query(#{fileref => FileId}) + ) + ), + + ?assertMatch( + {ok, 503, _}, + request( + get, + uri(["file_transfer", "file"]) ++ + query(#{ + fileref => FileId, + node => <<"nonode@nohost">> + }) + ) + ), + + ?assertMatch( + {ok, 404, _}, + request( + get, + uri(["file_transfer", "file"]) ++ + query(#{ + fileref => <<"unknown_file">>, + node => node() + }) + ) + ), + + {ok, 200, #{<<"files">> := [File]}} = + request_json(get, uri(["file_transfer", "files", ClientId, FileId])), + + {ok, 200, Response} = request(get, host() ++ maps:get(<<"uri">>, File)), + + ?assertEqual( + <<"data">>, + Response + ). + +t_list_files_paging(Config) -> + ClientId = client_id(Config), + NFiles = 20, + Nodes = cluster(Config), + Uploads = [ + {mk_file_id("file:", N), mk_file_name(N), pick(N, Nodes)} + || N <- lists:seq(1, NFiles) + ], + ok = lists:foreach( + fun({FileId, Name, Node}) -> + ok = emqx_ft_test_helpers:upload_file(ClientId, FileId, Name, <<"data">>, Node) + end, + Uploads + ), + + ?assertMatch( + {ok, 200, #{<<"files">> := [_, _, _], <<"cursor">> := _}}, + request_json(get, uri(["file_transfer", "files"]) ++ query(#{limit => 3})) + ), + + {ok, 200, #{<<"files">> := Files}} = + request_json(get, uri(["file_transfer", "files"]) ++ query(#{limit => 100})), + + ?assert(length(Files) >= NFiles), + + ?assertNotMatch( + {ok, 200, #{<<"cursor">> := _}}, + request_json(get, uri(["file_transfer", "files"]) ++ query(#{limit => 100})) + ), + + ?assertMatch( + {ok, 400, #{<<"code">> := <<"BAD_REQUEST">>}}, + request_json(get, uri(["file_transfer", "files"]) ++ query(#{limit => 0})) + ), + + ?assertMatch( + {ok, 400, #{<<"code">> := <<"BAD_REQUEST">>}}, + request_json( + get, + uri(["file_transfer", "files"]) ++ query(#{following => <<"whatsthat!?">>}) + ) + ), + + PageThrough = fun PageThrough(Query, Acc) -> + case request_json(get, uri(["file_transfer", "files"]) ++ query(Query)) of + {ok, 200, #{<<"files">> := FilesPage, <<"cursor">> := Cursor}} -> + PageThrough(Query#{following => Cursor}, Acc ++ FilesPage); + {ok, 200, #{<<"files">> := FilesPage}} -> + Acc ++ FilesPage + end + end, + + ?assertEqual(Files, PageThrough(#{limit => 1}, [])), + ?assertEqual(Files, PageThrough(#{limit => 8}, [])), + ?assertEqual(Files, PageThrough(#{limit => NFiles}, [])). + +t_ft_disabled(_Config) -> + ?assertMatch( + {ok, 200, _}, + request_json(get, uri(["file_transfer", "files"])) + ), + + ?assertMatch( + {ok, 400, _}, + request_json( + get, + uri(["file_transfer", "file"]) ++ query(#{fileref => <<"f1">>}) + ) + ), + + ok = emqx_config:put([file_transfer, enable], false), + + ?assertMatch( + {ok, 503, _}, + request_json(get, uri(["file_transfer", "files"])) + ), + + ?assertMatch( + {ok, 503, _}, + request_json( + get, + uri(["file_transfer", "file"]) ++ query(#{fileref => <<"f1">>, node => node()}) + ) + ). + +%%-------------------------------------------------------------------- +%% Helpers +%%-------------------------------------------------------------------- + +cluster(Config) -> + [node() | proplists:get_value(cluster_nodes, Config, [])]. + +client_id(Config) -> + iolist_to_binary(io_lib:format("~s.~s", [?config(group, Config), ?config(tc, Config)])). + +mk_file_id(Prefix, N) -> + iolist_to_binary([Prefix, integer_to_list(N)]). + +mk_file_name(N) -> + "file." ++ integer_to_list(N). + +request(Method, Url) -> + emqx_mgmt_api_test_util:request(Method, Url, []). + +request_json(Method, Url) -> + case emqx_mgmt_api_test_util:request(Method, Url, []) of + {ok, Code, Body} -> + {ok, Code, json(Body)}; + Otherwise -> + Otherwise + end. + +json(Body) when is_binary(Body) -> + emqx_utils_json:decode(Body, [return_maps]). + +query(Params) -> + KVs = lists:map(fun({K, V}) -> uri_encode(K) ++ "=" ++ uri_encode(V) end, maps:to_list(Params)), + "?" ++ string:join(KVs, "&"). + +uri_encode(T) -> + emqx_http_lib:uri_encode(to_list(T)). + +to_list(A) when is_atom(A) -> + atom_to_list(A); +to_list(A) when is_integer(A) -> + integer_to_list(A); +to_list(B) when is_binary(B) -> + binary_to_list(B); +to_list(L) when is_list(L) -> + L. + +pick(N, List) -> + lists:nth(1 + (N rem length(List)), List). diff --git a/apps/emqx_ft/test/emqx_ft_assembler_SUITE.erl b/apps/emqx_ft/test/emqx_ft_assembler_SUITE.erl new file mode 100644 index 000000000..c1deeb3bc --- /dev/null +++ b/apps/emqx_ft/test/emqx_ft_assembler_SUITE.erl @@ -0,0 +1,265 @@ +%%-------------------------------------------------------------------- +%% Copyright (c) 2020-2023 EMQ Technologies Co., Ltd. All Rights Reserved. +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. +%%-------------------------------------------------------------------- + +-module(emqx_ft_assembler_SUITE). + +-compile(export_all). +-compile(nowarn_export_all). + +-include_lib("common_test/include/ct.hrl"). +-include_lib("stdlib/include/assert.hrl"). +-include_lib("kernel/include/file.hrl"). + +all() -> + [ + t_assemble_empty_transfer, + t_assemble_complete_local_transfer, + t_assemble_incomplete_transfer, + t_assemble_no_meta, + + % NOTE + % It depends on the side effects of all previous testcases. + t_list_transfers + ]. + +init_per_suite(Config) -> + Apps = application:ensure_all_started(gproc), + [{suite_apps, Apps} | Config]. + +end_per_suite(_Config) -> + ok. + +init_per_testcase(TC, Config) -> + ok = snabbkaffe:start_trace(), + {ok, Pid} = emqx_ft_assembler_sup:start_link(), + [ + {storage_root, <<"file_transfer_root">>}, + {exports_root, <<"file_transfer_exports">>}, + {file_id, atom_to_binary(TC)}, + {assembler_sup, Pid} + | Config + ]. + +end_per_testcase(_TC, Config) -> + ok = inspect_storage_root(Config), + ok = gen:stop(?config(assembler_sup, Config)), + ok = snabbkaffe:stop(), + ok. + +%% + +-define(CLIENTID1, <<"thatsme">>). +-define(CLIENTID2, <<"thatsnotme">>). + +t_assemble_empty_transfer(Config) -> + Storage = storage(Config), + Transfer = {?CLIENTID1, ?config(file_id, Config)}, + Filename = "important.pdf", + Meta = #{ + name => Filename, + size => 0, + expire_at => 42 + }, + ok = emqx_ft_storage_fs:store_filemeta(Storage, Transfer, Meta), + ?assertMatch( + {ok, [ + #{ + path := _, + timestamp := {{_, _, _}, {_, _, _}}, + fragment := {filemeta, Meta} + } + ]}, + emqx_ft_storage_fs:list(Storage, Transfer, fragment) + ), + Status = complete_assemble(Storage, Transfer, 0), + ?assertEqual({shutdown, ok}, Status), + {ok, [_Result = #{size := _Size = 0}]} = list_exports(Config, Transfer), + % ?assertEqual( + % {error, eof}, + % emqx_ft_storage_fs:pread(Storage, Transfer, Result, 0, Size) + % ), + ok. + +t_assemble_complete_local_transfer(Config) -> + Storage = storage(Config), + Transfer = {?CLIENTID2, ?config(file_id, Config)}, + Filename = "topsecret.pdf", + TransferSize = 10000 + rand:uniform(50000), + SegmentSize = 4096, + Gen = emqx_ft_content_gen:new({Transfer, TransferSize}, SegmentSize), + Hash = emqx_ft_content_gen:hash(Gen, crypto:hash_init(sha256)), + Meta = #{ + name => Filename, + checksum => {sha256, Hash}, + expire_at => 42 + }, + + ok = emqx_ft_storage_fs:store_filemeta(Storage, Transfer, Meta), + _ = emqx_ft_content_gen:consume( + Gen, + fun({Content, SegmentNum, _Meta}) -> + Offset = (SegmentNum - 1) * SegmentSize, + ?assertEqual( + ok, + emqx_ft_storage_fs:store_segment(Storage, Transfer, {Offset, Content}) + ) + end + ), + + {ok, Fragments} = emqx_ft_storage_fs:list(Storage, Transfer, fragment), + ?assertEqual((TransferSize div SegmentSize) + 1 + 1, length(Fragments)), + ?assertEqual( + [Meta], + [FM || #{fragment := {filemeta, FM}} <- Fragments], + Fragments + ), + + Status = complete_assemble(Storage, Transfer, TransferSize), + ?assertEqual({shutdown, ok}, Status), + + ?assertMatch( + {ok, [ + #{ + size := TransferSize, + meta := #{} + } + ]}, + list_exports(Config, Transfer) + ), + {ok, [#{path := AssemblyFilename}]} = list_exports(Config, Transfer), + ?assertMatch( + {ok, #file_info{type = regular, size = TransferSize}}, + file:read_file_info(AssemblyFilename) + ), + ok = emqx_ft_content_gen:check_file_consistency( + {Transfer, TransferSize}, + 100, + AssemblyFilename + ). + +t_assemble_incomplete_transfer(Config) -> + Storage = storage(Config), + Transfer = {?CLIENTID2, ?config(file_id, Config)}, + Filename = "incomplete.pdf", + TransferSize = 10000 + rand:uniform(50000), + SegmentSize = 4096, + Gen = emqx_ft_content_gen:new({Transfer, TransferSize}, SegmentSize), + Hash = emqx_ft_content_gen:hash(Gen, crypto:hash_init(sha256)), + Meta = #{ + name => Filename, + checksum => {sha256, Hash}, + size => TransferSize, + expire_at => 42 + }, + ok = emqx_ft_storage_fs:store_filemeta(Storage, Transfer, Meta), + Status = complete_assemble(Storage, Transfer, TransferSize), + ?assertMatch({shutdown, {error, _}}, Status). + +t_assemble_no_meta(Config) -> + Storage = storage(Config), + Transfer = {?CLIENTID2, ?config(file_id, Config)}, + Status = complete_assemble(Storage, Transfer, 42), + ?assertMatch({shutdown, {error, {incomplete, _}}}, Status). + +complete_assemble(Storage, Transfer, Size) -> + complete_assemble(Storage, Transfer, Size, 1000). + +complete_assemble(Storage, Transfer, Size, Timeout) -> + {async, Pid} = emqx_ft_storage_fs:assemble(Storage, Transfer, Size), + MRef = erlang:monitor(process, Pid), + Pid ! kickoff, + receive + {'DOWN', MRef, process, Pid, Result} -> + Result + after Timeout -> + ct:fail("Assembler did not finish in time") + end. + +%% + +t_list_transfers(Config) -> + {ok, Exports} = list_exports(Config), + ?assertMatch( + [ + #{ + transfer := {?CLIENTID2, <<"t_assemble_complete_local_transfer">>}, + path := _, + size := Size, + meta := #{name := "topsecret.pdf"} + }, + #{ + transfer := {?CLIENTID1, <<"t_assemble_empty_transfer">>}, + path := _, + size := 0, + meta := #{name := "important.pdf"} + } + ] when Size > 0, + lists:sort(Exports) + ). + +%% + +-include_lib("kernel/include/file.hrl"). + +inspect_storage_root(Config) -> + inspect_dir(?config(storage_root, Config)). + +inspect_dir(Dir) -> + FileInfos = filelib:fold_files( + Dir, + ".*", + true, + fun(Filename, Acc) -> orddict:store(Filename, inspect_file(Filename), Acc) end, + orddict:new() + ), + ct:pal("inspect '~s': ~p", [Dir, FileInfos]). + +inspect_file(Filename) -> + {ok, Info} = file:read_file_info(Filename), + {Info#file_info.type, Info#file_info.size, Info#file_info.mtime}. + +mk_fileid() -> + integer_to_binary(erlang:system_time(millisecond)). + +list_exports(Config) -> + {emqx_ft_storage_exporter_fs, Options} = exporter(Config), + emqx_ft_storage_exporter_fs:list_local(Options). + +list_exports(Config, Transfer) -> + {emqx_ft_storage_exporter_fs, Options} = exporter(Config), + emqx_ft_storage_exporter_fs:list_local_transfer(Options, Transfer). + +exporter(Config) -> + emqx_ft_storage_exporter:exporter(storage(Config)). + +storage(Config) -> + emqx_utils_maps:deep_get( + [storage, local], + emqx_ft_schema:translate(#{ + <<"storage">> => #{ + <<"local">> => #{ + <<"segments">> => #{ + <<"root">> => ?config(storage_root, Config) + }, + <<"exporter">> => #{ + <<"local">> => #{ + <<"root">> => ?config(exports_root, Config) + } + } + } + } + }) + ). diff --git a/apps/emqx_ft/test/emqx_ft_conf_SUITE.erl b/apps/emqx_ft/test/emqx_ft_conf_SUITE.erl new file mode 100644 index 000000000..1f53f88af --- /dev/null +++ b/apps/emqx_ft/test/emqx_ft_conf_SUITE.erl @@ -0,0 +1,249 @@ +%%-------------------------------------------------------------------- +%% Copyright (c) 2020-2023 EMQ Technologies Co., Ltd. All Rights Reserved. +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. +%%-------------------------------------------------------------------- + +-module(emqx_ft_conf_SUITE). + +-compile(export_all). +-compile(nowarn_export_all). + +-include_lib("common_test/include/ct.hrl"). +-include_lib("stdlib/include/assert.hrl"). +-include_lib("snabbkaffe/include/test_macros.hrl"). + +all() -> emqx_common_test_helpers:all(?MODULE). + +init_per_suite(Config) -> + Config. + +end_per_suite(_Config) -> + ok. + +init_per_testcase(_Case, Config) -> + _ = emqx_config:save_schema_mod_and_names(emqx_ft_schema), + ok = emqx_common_test_helpers:start_apps( + [emqx_conf, emqx_ft], fun + (emqx_ft) -> + emqx_ft_test_helpers:load_config(#{}); + (_) -> + ok + end + ), + {ok, _} = emqx:update_config([rpc, port_discovery], manual), + Config. + +end_per_testcase(_Case, _Config) -> + ok = emqx_common_test_helpers:stop_apps([emqx_ft, emqx_conf]), + ok = emqx_config:erase(file_transfer). + +%%-------------------------------------------------------------------- +%% Tests +%%-------------------------------------------------------------------- + +t_update_config(_Config) -> + ?assertMatch( + {error, #{kind := validation_error}}, + emqx_conf:update( + [file_transfer], + #{<<"storage">> => #{<<"unknown">> => #{<<"foo">> => 42}}}, + #{} + ) + ), + ?assertMatch( + {ok, _}, + emqx_conf:update( + [file_transfer], + #{ + <<"enable">> => true, + <<"storage">> => #{ + <<"local">> => #{ + <<"segments">> => #{ + <<"root">> => <<"/tmp/path">>, + <<"gc">> => #{ + <<"interval">> => <<"5m">> + } + }, + <<"exporter">> => #{ + <<"local">> => #{ + <<"root">> => <<"/tmp/exports">> + } + } + } + } + }, + #{} + ) + ), + ?assertEqual( + <<"/tmp/path">>, + emqx_config:get([file_transfer, storage, local, segments, root]) + ), + ?assertEqual( + 5 * 60 * 1000, + emqx_ft_storage:with_storage_type(local, fun emqx_ft_conf:gc_interval/1) + ), + ?assertEqual( + {5 * 60, 24 * 60 * 60}, + emqx_ft_storage:with_storage_type(local, fun emqx_ft_conf:segments_ttl/1) + ). + +t_disable_restore_config(Config) -> + ?assertMatch( + {ok, _}, + emqx_conf:update( + [file_transfer], + #{<<"enable">> => true, <<"storage">> => #{<<"local">> => #{}}}, + #{} + ) + ), + ?assertEqual( + 60 * 60 * 1000, + emqx_ft_storage:with_storage_type(local, fun emqx_ft_conf:gc_interval/1) + ), + % Verify that transfers work + ok = emqx_ft_test_helpers:upload_file(gen_clientid(), <<"f1">>, "f1", <>), + % Verify that clearing storage settings reverts config to defaults + ?assertMatch( + {ok, _}, + emqx_conf:update( + [file_transfer], + #{<<"enable">> => false, <<"storage">> => undefined}, + #{} + ) + ), + ?assertEqual( + false, + emqx_ft_conf:enabled() + ), + ?assertMatch( + #{local := #{exporter := #{local := _}}}, + emqx_ft_conf:storage() + ), + ClientId = gen_clientid(), + Client = emqx_ft_test_helpers:start_client(ClientId), + % Verify that transfers fail cleanly when storage is disabled + ?check_trace( + ?assertMatch( + {ok, #{reason_code_name := no_matching_subscribers}}, + emqtt:publish( + Client, + <<"$file/f2/init">>, + emqx_utils_json:encode(emqx_ft:encode_filemeta(#{name => "f2", size => 42})), + 1 + ) + ), + fun(Trace) -> + ?assertMatch([], ?of_kind("file_transfer_init", Trace)) + end + ), + ok = emqtt:stop(Client), + % Restore local storage backend + Root = iolist_to_binary(emqx_ft_test_helpers:root(Config, node(), [segments])), + ?assertMatch( + {ok, _}, + emqx_conf:update( + [file_transfer], + #{ + <<"enable">> => true, + <<"storage">> => #{ + <<"local">> => #{ + <<"segments">> => #{ + <<"root">> => Root, + <<"gc">> => #{<<"interval">> => <<"1s">>} + } + } + } + }, + #{} + ) + ), + % Verify that GC is getting triggered eventually + ?check_trace( + ?block_until(#{?snk_kind := garbage_collection}, 5000, 0), + fun(Trace) -> + ?assertMatch( + [ + #{ + ?snk_kind := garbage_collection, + storage := #{segments := #{root := Root}} + } + ], + ?of_kind(garbage_collection, Trace) + ) + end + ), + % Verify that transfers work again + ok = emqx_ft_test_helpers:upload_file(gen_clientid(), <<"f1">>, "f1", <>). + +t_switch_exporter(_Config) -> + ?assertMatch( + {ok, _}, + emqx_conf:update( + [file_transfer], + #{<<"enable">> => true}, + #{} + ) + ), + ?assertMatch( + #{local := #{exporter := #{local := _}}}, + emqx_ft_conf:storage() + ), + % Verify that switching to a different exporter works + ?assertMatch( + {ok, _}, + emqx_conf:update( + [file_transfer, storage, local, exporter], + #{ + <<"s3">> => #{ + <<"bucket">> => <<"emqx">>, + <<"host">> => <<"https://localhost">>, + <<"port">> => 9000, + <<"transport_options">> => #{ + <<"ipv6_probe">> => false + } + } + }, + #{} + ) + ), + ?assertMatch( + #{local := #{exporter := #{s3 := _}}}, + emqx_ft_conf:storage() + ), + % Verify that switching back to local exporter works + ?assertMatch( + {ok, _}, + emqx_conf:remove( + [file_transfer, storage, local, exporter], + #{} + ) + ), + ?assertMatch( + {ok, _}, + emqx_conf:update( + [file_transfer, storage, local, exporter], + #{<<"local">> => #{}}, + #{} + ) + ), + ?assertMatch( + #{local := #{exporter := #{local := #{}}}}, + emqx_ft_conf:storage() + ), + % Verify that transfers work + ok = emqx_ft_test_helpers:upload_file(gen_clientid(), <<"f1">>, "f1", <>). + +gen_clientid() -> + emqx_base62:encode(emqx_guid:gen()). diff --git a/apps/emqx_ft/test/emqx_ft_content_gen.erl b/apps/emqx_ft/test/emqx_ft_content_gen.erl new file mode 100644 index 000000000..286c1a588 --- /dev/null +++ b/apps/emqx_ft/test/emqx_ft_content_gen.erl @@ -0,0 +1,232 @@ +%%-------------------------------------------------------------------- +%% Copyright (c) 2020-2023 EMQ Technologies Co., Ltd. All Rights Reserved. +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. +%%-------------------------------------------------------------------- + +%% Inspired by +%% https://github.com/kafka4beam/kflow/blob/master/src/testbed/payload_gen.erl + +-module(emqx_ft_content_gen). + +-include_lib("eunit/include/eunit.hrl"). + +-dialyzer(no_improper_lists). + +-export([new/2]). +-export([generate/3]). +-export([next/1]). +-export([consume/1]). +-export([consume/2]). +-export([fold/3]). + +-export([hash/2]). +-export([check_file_consistency/3]). + +-export_type([cont/1]). +-export_type([stream/1]). +-export_type([binary_payload/0]). + +-define(hash_size, 16). + +-type payload() :: {Seed :: term(), Size :: integer()}. + +-type binary_payload() :: { + binary(), _ChunkNum :: non_neg_integer(), _Meta :: #{} +}. + +-type cont(Data) :: + fun(() -> stream(Data)) + | stream(Data). + +-type stream(Data) :: + maybe_improper_list(Data, cont(Data)) + | eos. + +-record(chunk_state, { + seed :: term(), + payload_size :: non_neg_integer(), + offset :: non_neg_integer(), + chunk_size :: non_neg_integer() +}). + +-type chunk_state() :: #chunk_state{}. + +%% ----------------------------------------------------------------------------- +%% Generic streams +%% ----------------------------------------------------------------------------- + +%% @doc Consume one element from the stream. +-spec next(cont(A)) -> stream(A). +next(Fun) when is_function(Fun, 0) -> + Fun(); +next(L) -> + L. + +%% @doc Consume all elements of the stream and feed them into a +%% callback (e.g. brod:produce) +-spec consume(cont(A), fun((A) -> Ret)) -> [Ret]. +consume([Data | Cont], Callback) -> + [Callback(Data) | consume(next(Cont), Callback)]; +consume(Cont, Callback) when is_function(Cont, 0) -> + consume(next(Cont), Callback); +consume(eos, _Callback) -> + []. + +%% @equiv consume(Stream, fun(A) -> A end) +-spec consume(cont(A)) -> [A]. +consume(Stream) -> + consume(Stream, fun(A) -> A end). + +-spec fold(fun((A, Acc) -> Acc), Acc, cont(A)) -> Acc. +fold(Fun, Acc, [Data | Cont]) -> + fold(Fun, Fun(Data, Acc), next(Cont)); +fold(Fun, Acc, Cont) when is_function(Cont, 0) -> + fold(Fun, Acc, next(Cont)); +fold(_Fun, Acc, eos) -> + Acc. + +%% ----------------------------------------------------------------------------- +%% Binary streams +%% ----------------------------------------------------------------------------- + +%% @doc Stream of binary chunks. +%% Limitation: `ChunkSize' should be dividable by `?hash_size' +-spec new(payload(), integer()) -> cont(binary_payload()). +new({Seed, Size}, ChunkSize) when ChunkSize rem ?hash_size =:= 0 -> + fun() -> + generate_next_chunk(#chunk_state{ + seed = Seed, + payload_size = Size, + chunk_size = ChunkSize, + offset = 0 + }) + end. + +%% @doc Generate chunks of data and feed them into +%% `Callback' +-spec generate(payload(), integer(), fun((binary_payload()) -> A)) -> [A]. +generate(Payload, ChunkSize, Callback) -> + consume(new(Payload, ChunkSize), Callback). + +-spec hash(cont(binary_payload()), crypto:hash_state()) -> binary(). +hash(Stream, HashCtxIn) -> + crypto:hash_final( + fold( + fun({Chunk, _, _}, HashCtx) -> + crypto:hash_update(HashCtx, Chunk) + end, + HashCtxIn, + Stream + ) + ). + +-spec check_consistency( + payload(), + integer(), + fun((integer()) -> {ok, binary()} | undefined) +) -> ok. +check_consistency({Seed, Size}, SampleSize, Callback) -> + SeedHash = seed_hash(Seed), + Random = [rand:uniform(Size) - 1 || _ <- lists:seq(1, SampleSize)], + %% Always check first and last bytes, and one that should not exist: + Samples = [0, Size - 1, Size | Random], + lists:foreach( + fun + (N) when N < Size -> + Expected = do_get_byte(N, SeedHash), + ?assertEqual( + {N, {ok, Expected}}, + {N, Callback(N)} + ); + (N) -> + ?assertMatch(undefined, Callback(N)) + end, + Samples + ). + +-spec check_file_consistency( + payload(), + integer(), + file:filename() +) -> ok. +check_file_consistency(Payload, SampleSize, FileName) -> + {ok, FD} = file:open(FileName, [read, raw]), + try + Fun = fun(N) -> + case file:pread(FD, [{N, 1}]) of + {ok, [[X]]} -> {ok, X}; + {ok, [eof]} -> undefined + end + end, + check_consistency(Payload, SampleSize, Fun) + after + file:close(FD) + end. + +%% ============================================================================= +%% Internal functions +%% ============================================================================= + +%% @doc Continue generating chunks +-spec generate_next_chunk(chunk_state()) -> stream(binary()). +generate_next_chunk(#chunk_state{offset = Offset, payload_size = Size}) when Offset >= Size -> + eos; +generate_next_chunk(State0 = #chunk_state{offset = Offset, chunk_size = ChunkSize}) -> + State = State0#chunk_state{offset = Offset + ChunkSize}, + Payload = generate_chunk( + State#chunk_state.seed, + Offset, + ChunkSize, + State#chunk_state.payload_size + ), + [Payload | fun() -> generate_next_chunk(State) end]. + +generate_chunk(Seed, Offset, ChunkSize, Size) -> + SeedHash = seed_hash(Seed), + To = min(Offset + ChunkSize, Size) - 1, + Payload = iolist_to_binary([ + generator_fun(I, SeedHash) + || I <- lists:seq(Offset div 16, To div 16) + ]), + ChunkNum = Offset div ChunkSize + 1, + Meta = #{ + chunk_size => ChunkSize, + chunk_count => ceil(Size / ChunkSize) + }, + Chunk = + case Offset + ChunkSize of + NextOffset when NextOffset > Size -> + binary:part(Payload, 0, Size rem ChunkSize); + _ -> + Payload + end, + {Chunk, ChunkNum, Meta}. + +%% @doc First argument is a chunk number, the second one is a seed. +%% This implementation is hardly efficient, but it was chosen for +%% clarity reasons +-spec generator_fun(integer(), binary()) -> binary(). +generator_fun(N, Seed) -> + crypto:hash(md5, <>). + +%% @doc Hash any term +-spec seed_hash(term()) -> binary(). +seed_hash(Seed) -> + crypto:hash(md5, term_to_binary(Seed)). + +%% @private Get byte at offset `N' +-spec do_get_byte(integer(), binary()) -> byte(). +do_get_byte(N, Seed) -> + Chunk = generator_fun(N div ?hash_size, Seed), + binary:at(Chunk, N rem ?hash_size). diff --git a/apps/emqx_ft/test/emqx_ft_fs_util_SUITE.erl b/apps/emqx_ft/test/emqx_ft_fs_util_SUITE.erl new file mode 100644 index 000000000..e4aa70f81 --- /dev/null +++ b/apps/emqx_ft/test/emqx_ft_fs_util_SUITE.erl @@ -0,0 +1,250 @@ +%%-------------------------------------------------------------------- +%% Copyright (c) 2020-2023 EMQ Technologies Co., Ltd. All Rights Reserved. +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. +%%-------------------------------------------------------------------- + +-module(emqx_ft_fs_util_SUITE). + +-compile(export_all). +-compile(nowarn_export_all). + +-include_lib("common_test/include/ct.hrl"). +-include_lib("stdlib/include/assert.hrl"). +-include_lib("kernel/include/file.hrl"). + +all() -> + emqx_common_test_helpers:all(?MODULE). + +t_fold_single_level(Config) -> + Root = ?config(data_dir, Config), + ?assertMatch( + [ + {"a", #file_info{type = directory}, ["a"]}, + {"c", #file_info{type = directory}, ["c"]}, + {"d", #file_info{type = directory}, ["d"]} + ], + sort(fold(fun cons/4, [], Root, ['*'])) + ). + +t_fold_multi_level(Config) -> + Root = ?config(data_dir, Config), + ?assertMatch( + [ + {"a/b/foo/42", #file_info{type = regular}, ["42", "foo", "b", "a"]}, + {"a/b/foo/ŠÆ", #file_info{type = regular}, ["ŠÆ", "foo", "b", "a"]}, + {"d/e/baz/needle", #file_info{type = regular}, ["needle", "baz", "e", "d"]} + ], + sort(fold(fun cons/4, [], Root, ['*', '*', '*', '*'])) + ), + ?assertMatch( + [ + {"a/b/foo", #file_info{type = directory}, ["foo", "b", "a"]}, + {"c/bar/äø­ę–‡", #file_info{type = regular}, ["äø­ę–‡", "bar", "c"]}, + {"d/e/baz", #file_info{type = directory}, ["baz", "e", "d"]} + ], + sort(fold(fun cons/4, [], Root, ['*', '*', '*'])) + ). + +t_fold_no_glob(Config) -> + Root = ?config(data_dir, Config), + ?assertMatch( + [{"", #file_info{type = directory}, []}], + sort(fold(fun cons/4, [], Root, [])) + ). + +t_fold_glob_too_deep(Config) -> + Root = ?config(data_dir, Config), + ?assertMatch( + [], + sort(fold(fun cons/4, [], Root, ['*', '*', '*', '*', '*'])) + ). + +t_fold_invalid_root(Config) -> + Root = ?config(data_dir, Config), + ?assertMatch( + [], + sort(fold(fun cons/4, [], filename:join([Root, "a", "link"]), ['*'])) + ), + ?assertMatch( + [], + sort(fold(fun cons/4, [], filename:join([Root, "d", "haystack"]), ['*'])) + ). + +t_fold_filter_unicode(Config) -> + Root = ?config(data_dir, Config), + ?assertMatch( + [ + {"a/b/foo/42", #file_info{type = regular}, ["42", "foo", "b", "a"]}, + {"d/e/baz/needle", #file_info{type = regular}, ["needle", "baz", "e", "d"]} + ], + sort(fold(fun cons/4, [], Root, ['*', '*', '*', fun is_latin1/1])) + ), + ?assertMatch( + [ + {"a/b/foo/ŠÆ", #file_info{type = regular}, ["ŠÆ", "foo", "b", "a"]} + ], + sort(fold(fun cons/4, [], Root, ['*', '*', '*', is_not(fun is_latin1/1)])) + ). + +t_fold_filter_levels(Config) -> + Root = ?config(data_dir, Config), + ?assertMatch( + [ + {"a/b/foo", #file_info{type = directory}, ["foo", "b", "a"]}, + {"d/e/baz", #file_info{type = directory}, ["baz", "e", "d"]} + ], + sort(fold(fun cons/4, [], Root, [fun is_letter/1, fun is_letter/1, '*'])) + ). + +t_fold_errors(Config) -> + Root = ?config(data_dir, Config), + ok = meck:new(emqx_ft_fs_util, [passthrough]), + ok = meck:expect(emqx_ft_fs_util, read_info, fun(AbsFilepath) -> + ct:pal("read_info(~p)", [AbsFilepath]), + Filename = filename:basename(AbsFilepath), + case Filename of + "b" -> {error, eacces}; + "link" -> {error, enotsup}; + "bar" -> {error, enotdir}; + "needle" -> {error, ebusy}; + _ -> meck:passthrough([AbsFilepath]) + end + end), + ?assertMatch( + [ + {"a/b", {error, eacces}, ["b", "a"]}, + {"a/link", {error, enotsup}, ["link", "a"]}, + {"c/link", {error, enotsup}, ["link", "c"]}, + {"d/e/baz/needle", {error, ebusy}, ["needle", "baz", "e", "d"]} + ], + sort(fold(fun cons/4, [], Root, ['*', '*', '*', '*'])) + ). + +t_seek_fold(Config) -> + Root = ?config(data_dir, Config), + ?assertMatch( + [ + {leaf, "a/b/foo/42", #file_info{type = regular}, ["42", "foo", "b", "a"]}, + {leaf, "a/b/foo/ŠÆ", #file_info{type = regular}, ["ŠÆ", "foo", "b", "a"]}, + {leaf, "d/e/baz/needle", #file_info{type = regular}, ["needle", "baz", "e", "d"]} + | _Nodes + ], + sort( + emqx_ft_fs_iterator:fold( + fun cons/2, + [], + emqx_ft_fs_iterator:seek(["a", "a"], Root, ['*', '*', '*', '*']) + ) + ) + ), + ?assertMatch( + [ + {leaf, "a/b/foo/ŠÆ", #file_info{type = regular}, ["ŠÆ", "foo", "b", "a"]}, + {leaf, "d/e/baz/needle", #file_info{type = regular}, ["needle", "baz", "e", "d"]} + | _Nodes + ], + sort( + emqx_ft_fs_iterator:fold( + fun cons/2, + [], + emqx_ft_fs_iterator:seek(["a", "b", "foo", "42"], Root, ['*', '*', '*', '*']) + ) + ) + ), + ?assertMatch( + [ + {leaf, "d/e/baz/needle", #file_info{type = regular}, ["needle", "baz", "e", "d"]} + | _Nodes + ], + sort( + emqx_ft_fs_iterator:fold( + fun cons/2, + [], + emqx_ft_fs_iterator:seek(["c", "d", "e", "f"], Root, ['*', '*', '*', '*']) + ) + ) + ). + +t_seek_empty(Config) -> + Root = ?config(data_dir, Config), + ?assertEqual( + emqx_ft_fs_iterator:fold( + fun cons/2, + [], + emqx_ft_fs_iterator:new(Root, ['*', '*', '*', '*']) + ), + emqx_ft_fs_iterator:fold( + fun cons/2, + [], + emqx_ft_fs_iterator:seek([], Root, ['*', '*', '*', '*']) + ) + ). + +t_seek_past_end(Config) -> + Root = ?config(data_dir, Config), + ?assertEqual( + none, + emqx_ft_fs_iterator:next( + emqx_ft_fs_iterator:seek(["g", "h"], Root, ['*', '*', '*', '*']) + ) + ). + +t_seek_with_filter(Config) -> + Root = ?config(data_dir, Config), + ?assertMatch( + [ + {leaf, "d/e/baz", #file_info{type = directory}, ["baz", "e", "d"]} + | _Nodes + ], + sort( + emqx_ft_fs_iterator:fold( + fun cons/2, + [], + emqx_ft_fs_iterator:seek(["a", "link"], Root, ['*', fun is_letter/1, '*']) + ) + ) + ). + +%% + +fold(FoldFun, Acc, Root, Glob) -> + emqx_ft_fs_util:fold(FoldFun, Acc, Root, Glob). + +is_not(F) -> + fun(X) -> not F(X) end. + +is_latin1(Filename) -> + case unicode:characters_to_binary(Filename, unicode, latin1) of + {error, _, _} -> + false; + _ -> + true + end. + +is_letter(Filename) -> + case Filename of + [_] -> + true; + _ -> + false + end. + +cons(Path, Info, Stack, Acc) -> + [{Path, Info, Stack} | Acc]. + +cons(Entry, Acc) -> + [Entry | Acc]. + +sort(L) when is_list(L) -> + lists:sort(L). diff --git a/apps/emqx_ft/test/emqx_ft_fs_util_SUITE_data/a/b/foo/42 b/apps/emqx_ft/test/emqx_ft_fs_util_SUITE_data/a/b/foo/42 new file mode 100644 index 000000000..e69de29bb diff --git a/apps/emqx_ft/test/emqx_ft_fs_util_SUITE_data/a/b/foo/ŠÆ b/apps/emqx_ft/test/emqx_ft_fs_util_SUITE_data/a/b/foo/ŠÆ new file mode 100644 index 000000000..ac31ffd53 --- /dev/null +++ b/apps/emqx_ft/test/emqx_ft_fs_util_SUITE_data/a/b/foo/ŠÆ @@ -0,0 +1 @@ +Ты diff --git a/apps/emqx_ft/test/emqx_ft_fs_util_SUITE_data/a/link b/apps/emqx_ft/test/emqx_ft_fs_util_SUITE_data/a/link new file mode 120000 index 000000000..1b271d838 --- /dev/null +++ b/apps/emqx_ft/test/emqx_ft_fs_util_SUITE_data/a/link @@ -0,0 +1 @@ +../c \ No newline at end of file diff --git a/apps/emqx_ft/test/emqx_ft_fs_util_SUITE_data/c/bar/äø­ę–‡ b/apps/emqx_ft/test/emqx_ft_fs_util_SUITE_data/c/bar/äø­ę–‡ new file mode 100644 index 000000000..2e11eb72f --- /dev/null +++ b/apps/emqx_ft/test/emqx_ft_fs_util_SUITE_data/c/bar/äø­ę–‡ @@ -0,0 +1 @@ +ZhōngwĆ©n diff --git a/apps/emqx_ft/test/emqx_ft_fs_util_SUITE_data/c/link b/apps/emqx_ft/test/emqx_ft_fs_util_SUITE_data/c/link new file mode 120000 index 000000000..82f488f26 --- /dev/null +++ b/apps/emqx_ft/test/emqx_ft_fs_util_SUITE_data/c/link @@ -0,0 +1 @@ +../a \ No newline at end of file diff --git a/apps/emqx_ft/test/emqx_ft_fs_util_SUITE_data/d/e/baz/needle b/apps/emqx_ft/test/emqx_ft_fs_util_SUITE_data/d/e/baz/needle new file mode 100644 index 000000000..d755762d1 --- /dev/null +++ b/apps/emqx_ft/test/emqx_ft_fs_util_SUITE_data/d/e/baz/needle @@ -0,0 +1 @@ +haystack diff --git a/apps/emqx_ft/test/emqx_ft_fs_util_SUITE_data/d/haystack b/apps/emqx_ft/test/emqx_ft_fs_util_SUITE_data/d/haystack new file mode 100644 index 000000000..a6b681bf4 --- /dev/null +++ b/apps/emqx_ft/test/emqx_ft_fs_util_SUITE_data/d/haystack @@ -0,0 +1 @@ +needle diff --git a/apps/emqx_ft/test/emqx_ft_fs_util_tests.erl b/apps/emqx_ft/test/emqx_ft_fs_util_tests.erl new file mode 100644 index 000000000..1939e74c6 --- /dev/null +++ b/apps/emqx_ft/test/emqx_ft_fs_util_tests.erl @@ -0,0 +1,65 @@ +%%-------------------------------------------------------------------- +%% Copyright (c) 2020-2023 EMQ Technologies Co., Ltd. All Rights Reserved. +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. +%%-------------------------------------------------------------------- + +-module(emqx_ft_fs_util_tests). + +-include_lib("eunit/include/eunit.hrl"). + +filename_safe_test_() -> + [ + ?_assertEqual(ok, emqx_ft_fs_util:is_filename_safe("im.safe")), + ?_assertEqual(ok, emqx_ft_fs_util:is_filename_safe(<<"im.safe">>)), + ?_assertEqual(ok, emqx_ft_fs_util:is_filename_safe(<<".safe.100%">>)), + ?_assertEqual(ok, emqx_ft_fs_util:is_filename_safe(<<"safe.as.🦺"/utf8>>)) + ]. + +filename_unsafe_test_() -> + [ + ?_assertEqual({error, empty}, emqx_ft_fs_util:is_filename_safe("")), + ?_assertEqual({error, special}, emqx_ft_fs_util:is_filename_safe(".")), + ?_assertEqual({error, special}, emqx_ft_fs_util:is_filename_safe("..")), + ?_assertEqual({error, special}, emqx_ft_fs_util:is_filename_safe(<<"..">>)), + ?_assertEqual({error, unsafe}, emqx_ft_fs_util:is_filename_safe(<<".././..">>)), + ?_assertEqual({error, unsafe}, emqx_ft_fs_util:is_filename_safe("/etc/passwd")), + ?_assertEqual({error, unsafe}, emqx_ft_fs_util:is_filename_safe("../cookie")), + ?_assertEqual({error, unsafe}, emqx_ft_fs_util:is_filename_safe("C:$cookie")), + ?_assertEqual({error, nonprintable}, emqx_ft_fs_util:is_filename_safe([1, 2, 3])), + ?_assertEqual({error, nonprintable}, emqx_ft_fs_util:is_filename_safe(<<4, 5, 6>>)), + ?_assertEqual({error, nonprintable}, emqx_ft_fs_util:is_filename_safe([$a, 16#7F, $z])) + ]. + +-define(NAMES, [ + {"just.file", <<"just.file">>}, + {".hidden", <<".hidden">>}, + {".~what", <<".~what">>}, + {"100%25.file", <<"100%.file">>}, + {"%2E%2E", <<"..">>}, + {"...", <<"...">>}, + {"%2Fetc%2Fpasswd", <<"/etc/passwd">>}, + {"%01%02%0A ", <<1, 2, 10, 32>>} +]). + +escape_filename_test_() -> + [ + ?_assertEqual(Filename, emqx_ft_fs_util:escape_filename(Input)) + || {Filename, Input} <- ?NAMES + ]. + +unescape_filename_test_() -> + [ + ?_assertEqual(Input, emqx_ft_fs_util:unescape_filename(Filename)) + || {Filename, Input} <- ?NAMES + ]. diff --git a/apps/emqx_ft/test/emqx_ft_responder_SUITE.erl b/apps/emqx_ft/test/emqx_ft_responder_SUITE.erl new file mode 100644 index 000000000..751861206 --- /dev/null +++ b/apps/emqx_ft/test/emqx_ft_responder_SUITE.erl @@ -0,0 +1,84 @@ +%%-------------------------------------------------------------------- +%% Copyright (c) 2020-2023 EMQ Technologies Co., Ltd. All Rights Reserved. +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. +%%-------------------------------------------------------------------- + +-module(emqx_ft_responder_SUITE). + +-compile(export_all). +-compile(nowarn_export_all). + +-include_lib("stdlib/include/assert.hrl"). + +all() -> emqx_common_test_helpers:all(?MODULE). + +init_per_suite(Config) -> + ok = emqx_common_test_helpers:start_apps([emqx_ft], emqx_ft_test_helpers:env_handler(Config)), + Config. + +end_per_suite(_Config) -> + ok = emqx_common_test_helpers:stop_apps([emqx_ft]), + ok. + +init_per_testcase(_Case, Config) -> + Config. + +end_per_testcase(_Case, _Config) -> + ok. + +t_start_ack(_Config) -> + Key = <<"test">>, + DefaultAction = fun({ack, Ref}) -> Ref end, + ?assertMatch( + {ok, _Pid}, + emqx_ft_responder:start(Key, DefaultAction, 1000) + ), + ?assertMatch( + {error, {already_started, _Pid}}, + emqx_ft_responder:start(Key, DefaultAction, 1000) + ), + Ref = make_ref(), + ?assertEqual( + Ref, + emqx_ft_responder:ack(Key, Ref) + ), + ?assertExit( + {noproc, _}, + emqx_ft_responder:ack(Key, Ref) + ). + +t_timeout(_Config) -> + Key = <<"test">>, + Self = self(), + DefaultAction = fun(timeout) -> Self ! {timeout, Key} end, + {ok, _Pid} = emqx_ft_responder:start(Key, DefaultAction, 20), + receive + {timeout, Key} -> + ok + after 100 -> + ct:fail("emqx_ft_responder not called") + end, + ?assertExit( + {noproc, _}, + emqx_ft_responder:ack(Key, oops) + ). + +t_unknown_msgs(_Config) -> + {ok, Pid} = emqx_ft_responder:start(make_ref(), fun(_) -> ok end, 100), + Pid ! {unknown_msg, <<"test">>}, + ok = gen_server:cast(Pid, {unknown_msg, <<"test">>}), + ?assertEqual( + {error, unknown_call}, + gen_server:call(Pid, {unknown_call, <<"test">>}) + ). diff --git a/apps/emqx_ft/test/emqx_ft_storage_exporter_s3_SUITE.erl b/apps/emqx_ft/test/emqx_ft_storage_exporter_s3_SUITE.erl new file mode 100644 index 000000000..e717fe262 --- /dev/null +++ b/apps/emqx_ft/test/emqx_ft_storage_exporter_s3_SUITE.erl @@ -0,0 +1,199 @@ +%%-------------------------------------------------------------------- +%% Copyright (c) 2020-2023 EMQ Technologies Co., Ltd. All Rights Reserved. +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. +%%-------------------------------------------------------------------- + +-module(emqx_ft_storage_exporter_s3_SUITE). + +-compile(export_all). +-compile(nowarn_export_all). + +-include_lib("common_test/include/ct.hrl"). +-include_lib("stdlib/include/assert.hrl"). + +-define(assertS3Data(Data, Url), + case httpc:request(Url) of + {ok, {{_StatusLine, 200, "OK"}, _Headers, Body}} -> + ?assertEqual(Data, list_to_binary(Body), "S3 data mismatch"); + OtherResponse -> + ct:fail("Unexpected response: ~p", [OtherResponse]) + end +). + +all() -> emqx_common_test_helpers:all(?MODULE). + +init_per_suite(Config) -> + Config. +end_per_suite(_Config) -> + ok. + +set_special_configs(Config) -> + fun + (emqx_ft) -> + Storage = emqx_ft_test_helpers:local_storage(Config, #{ + exporter => s3, bucket_name => ?config(bucket_name, Config) + }), + emqx_ft_test_helpers:load_config(#{<<"enable">> => true, <<"storage">> => Storage}); + (_) -> + ok + end. + +init_per_testcase(Case, Config0) -> + ClientId = atom_to_binary(Case), + BucketName = create_bucket(), + Config1 = [{bucket_name, BucketName}, {clientid, ClientId} | Config0], + ok = emqx_common_test_helpers:start_apps([emqx_conf, emqx_ft], set_special_configs(Config1)), + Config1. +end_per_testcase(_Case, _Config) -> + ok = emqx_common_test_helpers:stop_apps([emqx_ft, emqx_conf]), + ok. + +%%-------------------------------------------------------------------- +%% Test Cases +%%------------------------------------------------------------------- + +t_happy_path(Config) -> + ClientId = ?config(clientid, Config), + + FileId = <<"🌚"/utf8>>, + Name = "cool_name", + Data = <<"data"/utf8>>, + + ?assertEqual( + ok, + emqx_ft_test_helpers:upload_file(ClientId, FileId, Name, Data) + ), + + {ok, #{items := [#{uri := Uri}]}} = emqx_ft_storage:files(), + + ?assertS3Data(Data, Uri), + + Key = binary_to_list(ClientId) ++ "/" ++ binary_to_list(FileId) ++ "/" ++ Name, + Meta = erlcloud_s3:get_object_metadata( + ?config(bucket_name, Config), Key, emqx_ft_test_helpers:aws_config() + ), + + ?assertEqual( + ClientId, + metadata_field("clientid", Meta) + ), + + ?assertEqual( + FileId, + metadata_field("fileid", Meta) + ), + + NameBin = list_to_binary(Name), + ?assertMatch( + #{ + <<"name">> := NameBin, + <<"size">> := 4 + }, + emqx_utils_json:decode(metadata_field("filemeta", Meta), [return_maps]) + ). + +t_upload_error(Config) -> + ClientId = ?config(clientid, Config), + + FileId = <<"🌚"/utf8>>, + Name = "cool_name", + Data = <<"data"/utf8>>, + + {ok, _} = emqx_conf:update( + [file_transfer, storage, local, exporter, s3, bucket], <<"invalid-bucket">>, #{} + ), + + ?assertEqual( + {error, unspecified_error}, + emqx_ft_test_helpers:upload_file(ClientId, FileId, Name, Data) + ). + +t_paging(Config) -> + ClientId = ?config(clientid, Config), + N = 1050, + + FileId = fun integer_to_binary/1, + Name = "cool_name", + Data = fun integer_to_binary/1, + + ok = lists:foreach( + fun(I) -> + ok = emqx_ft_test_helpers:upload_file(ClientId, FileId(I), Name, Data(I)) + end, + lists:seq(1, N) + ), + + {ok, #{items := [#{uri := Uri}]}} = emqx_ft_storage:files(#{transfer => {ClientId, FileId(123)}}), + + ?assertS3Data(Data(123), Uri), + + lists:foreach( + fun(PageSize) -> + Pages = file_pages(#{limit => PageSize}), + ?assertEqual( + expected_page_count(PageSize, N), + length(Pages) + ), + FileIds = [ + FId + || #{transfer := {_, FId}} <- lists:concat(Pages) + ], + ?assertEqual( + lists:sort([FileId(I) || I <- lists:seq(1, N)]), + lists:sort(FileIds) + ) + end, + %% less than S3 limit, greater than S3 limit + [20, 550] + ). + +t_invalid_cursor(_Config) -> + InvalidUtf8 = <<16#80>>, + ?assertError( + {badarg, cursor}, + emqx_ft_storage:files(#{following => InvalidUtf8}) + ). + +%%-------------------------------------------------------------------- +%% Helper Functions +%%-------------------------------------------------------------------- + +expected_page_count(PageSize, Total) -> + case Total rem PageSize of + 0 -> Total div PageSize; + _ -> Total div PageSize + 1 + end. + +file_pages(Query) -> + case emqx_ft_storage:files(Query) of + {ok, #{items := Items, cursor := NewCursor}} -> + [Items] ++ file_pages(Query#{following => NewCursor}); + {ok, #{items := Items}} -> + [Items]; + {error, Error} -> + ct:fail("Failed to download files: ~p", [Error]) + end. + +metadata_field(Field, Meta) -> + Key = "x-amz-meta-" ++ Field, + case lists:keyfind(Key, 1, Meta) of + {Key, Value} -> list_to_binary(Value); + false -> false + end. + +create_bucket() -> + BucketName = emqx_s3_test_helpers:unique_bucket(), + _ = application:ensure_all_started(lhttpc), + ok = erlcloud_s3:create_bucket(BucketName, emqx_ft_test_helpers:aws_config()), + BucketName. diff --git a/apps/emqx_ft/test/emqx_ft_storage_fs_SUITE.erl b/apps/emqx_ft/test/emqx_ft_storage_fs_SUITE.erl new file mode 100644 index 000000000..50925cfb9 --- /dev/null +++ b/apps/emqx_ft/test/emqx_ft_storage_fs_SUITE.erl @@ -0,0 +1,93 @@ +%%-------------------------------------------------------------------- +%% Copyright (c) 2020-2023 EMQ Technologies Co., Ltd. All Rights Reserved. +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. +%%-------------------------------------------------------------------- + +-module(emqx_ft_storage_fs_SUITE). + +-compile(export_all). +-compile(nowarn_export_all). + +-include_lib("common_test/include/ct.hrl"). +-include_lib("stdlib/include/assert.hrl"). + +all() -> + [ + {group, cluster} + ]. + +-define(CLUSTER_CASES, [t_multinode_exports]). + +groups() -> + [ + {cluster, [sequence], ?CLUSTER_CASES} + ]. + +init_per_suite(Config) -> + ok = emqx_common_test_helpers:start_apps([emqx_ft], emqx_ft_test_helpers:env_handler(Config)), + Config. +end_per_suite(_Config) -> + ok = emqx_common_test_helpers:stop_apps([emqx_ft]), + ok. + +init_per_testcase(Case, Config) -> + [{tc, Case} | Config]. +end_per_testcase(_Case, _Config) -> + ok. + +init_per_group(cluster, Config) -> + Node = emqx_ft_test_helpers:start_additional_node(Config, emqx_ft_storage_fs1), + [{additional_node, Node} | Config]; +init_per_group(_Group, Config) -> + Config. + +end_per_group(cluster, Config) -> + ok = emqx_ft_test_helpers:stop_additional_node(?config(additional_node, Config)); +end_per_group(_Group, _Config) -> + ok. + +%%-------------------------------------------------------------------- +%% Tests +%%-------------------------------------------------------------------- + +t_multinode_exports(Config) -> + Node1 = ?config(additional_node, Config), + ok = emqx_ft_test_helpers:upload_file(<<"c/1">>, <<"f:1">>, "fn1", <<"data">>, Node1), + + Node2 = node(), + ok = emqx_ft_test_helpers:upload_file(<<"c/2">>, <<"f:2">>, "fn2", <<"data">>, Node2), + + ?assertMatch( + [ + #{transfer := {<<"c/1">>, <<"f:1">>}, name := "fn1"}, + #{transfer := {<<"c/2">>, <<"f:2">>}, name := "fn2"} + ], + lists:sort(list_files(Config)) + ). + +%%-------------------------------------------------------------------- +%% Helpers +%%-------------------------------------------------------------------- + +client_id(Config) -> + atom_to_binary(?config(tc, Config), utf8). + +storage(Config) -> + RawConfig = #{<<"storage">> => emqx_ft_test_helpers:local_storage(Config)}, + #{storage := #{local := Storage}} = emqx_ft_schema:translate(RawConfig), + Storage. + +list_files(Config) -> + {ok, #{items := Files}} = emqx_ft_storage_fs:files(storage(Config), #{}), + Files. diff --git a/apps/emqx_ft/test/emqx_ft_storage_fs_gc_SUITE.erl b/apps/emqx_ft/test/emqx_ft_storage_fs_gc_SUITE.erl new file mode 100644 index 000000000..a7ffd5675 --- /dev/null +++ b/apps/emqx_ft/test/emqx_ft_storage_fs_gc_SUITE.erl @@ -0,0 +1,363 @@ +%%-------------------------------------------------------------------- +%% Copyright (c) 2020-2023 EMQ Technologies Co., Ltd. All Rights Reserved. +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. +%%-------------------------------------------------------------------- + +-module(emqx_ft_storage_fs_gc_SUITE). + +-compile(export_all). +-compile(nowarn_export_all). + +-include_lib("emqx_ft/include/emqx_ft_storage_fs.hrl"). +-include_lib("stdlib/include/assert.hrl"). +-include_lib("snabbkaffe/include/test_macros.hrl"). + +all() -> + emqx_common_test_helpers:all(?MODULE). + +init_per_suite(Config) -> + _ = application:load(emqx_ft), + ok = emqx_common_test_helpers:start_apps([]), + Config. + +end_per_suite(_Config) -> + ok = emqx_common_test_helpers:stop_apps([]), + ok. + +init_per_testcase(TC, Config) -> + SegmentsRoot = emqx_ft_test_helpers:root(Config, node(), [TC, segments]), + ExportsRoot = emqx_ft_test_helpers:root(Config, node(), [TC, exports]), + ok = emqx_common_test_helpers:start_app( + emqx_ft, + fun(emqx_ft) -> + emqx_ft_test_helpers:load_config(#{ + <<"enable">> => true, + <<"storage">> => #{ + <<"local">> => #{ + <<"segments">> => #{<<"root">> => SegmentsRoot}, + <<"exporter">> => #{ + <<"local">> => #{<<"root">> => ExportsRoot} + } + } + } + }) + end + ), + ok = snabbkaffe:start_trace(), + Config. + +end_per_testcase(_TC, _Config) -> + ok = snabbkaffe:stop(), + ok = application:stop(emqx_ft), + ok. + +%% + +-define(NSEGS(Filesize, SegmentSize), (ceil(Filesize / SegmentSize) + 1)). + +t_gc_triggers_periodically(_Config) -> + Interval = 500, + ok = set_gc_config(interval, Interval), + ok = emqx_ft_storage_fs_gc:reset(), + ?check_trace( + timer:sleep(Interval * 3), + fun(Trace) -> + [Event, _ | _] = ?of_kind(garbage_collection, Trace), + ?assertMatch( + #{ + stats := #gcstats{ + files = 0, + directories = 0, + space = 0, + errors = #{} = Errors + } + } when map_size(Errors) == 0, + Event + ) + end + ). + +t_gc_triggers_manually(_Config) -> + ?check_trace( + ?assertMatch( + #gcstats{files = 0, directories = 0, space = 0, errors = #{} = Errors} when + map_size(Errors) == 0, + emqx_ft_storage_fs_gc:collect() + ), + fun(Trace) -> + [Event] = ?of_kind(garbage_collection, Trace), + ?assertMatch( + #{stats := #gcstats{}}, + Event + ) + end + ). + +t_gc_complete_transfers(_Config) -> + {local, Storage} = emqx_ft_storage:backend(), + ok = set_gc_config(minimum_segments_ttl, 0), + ok = set_gc_config(maximum_segments_ttl, 3), + ok = set_gc_config(interval, 500), + ok = emqx_ft_storage_fs_gc:reset(), + Transfers = [ + { + T1 = {<<"client1">>, mk_file_id()}, + #{name => "cat.cur", segments_ttl => 10}, + emqx_ft_content_gen:new({?LINE, S1 = 42}, SS1 = 16) + }, + { + T2 = {<<"client2">>, mk_file_id()}, + #{name => "cat.ico", segments_ttl => 10}, + emqx_ft_content_gen:new({?LINE, S2 = 420}, SS2 = 64) + }, + { + T3 = {<<"client42">>, mk_file_id()}, + #{name => "cat.jpg", segments_ttl => 10}, + emqx_ft_content_gen:new({?LINE, S3 = 42000}, SS3 = 1024) + } + ], + % 1. Start all transfers + TransferSizes = emqx_utils:pmap( + fun(Transfer) -> start_transfer(Storage, Transfer) end, + Transfers + ), + ?assertEqual([S1, S2, S3], TransferSizes), + ?assertMatch( + #gcstats{files = 0, directories = 0, errors = #{} = Es} when map_size(Es) == 0, + emqx_ft_storage_fs_gc:collect() + ), + % 2. Complete just the first transfer + {ok, {ok, Event}} = ?wait_async_action( + ?assertEqual(ok, complete_transfer(Storage, T1, S1)), + #{?snk_kind := garbage_collection}, + 1000 + ), + ?assertMatch( + #{ + stats := #gcstats{ + files = Files, + directories = 2, + space = Space, + errors = #{} = Es + } + } when Files == ?NSEGS(S1, SS1) andalso Space > S1 andalso map_size(Es) == 0, + Event + ), + % 3. Complete rest of transfers + {ok, Sub} = snabbkaffe_collector:subscribe( + ?match_event(#{?snk_kind := garbage_collection}), + 2, + 1000, + 0 + ), + ?assertEqual( + [ok, ok], + emqx_utils:pmap( + fun({Transfer, Size}) -> complete_transfer(Storage, Transfer, Size) end, + [{T2, S2}, {T3, S3}] + ) + ), + {ok, Events} = snabbkaffe_collector:receive_events(Sub), + CFiles = lists:sum([Stats#gcstats.files || #{stats := Stats} <- Events]), + CDirectories = lists:sum([Stats#gcstats.directories || #{stats := Stats} <- Events]), + CSpace = lists:sum([Stats#gcstats.space || #{stats := Stats} <- Events]), + CErrors = lists:foldl( + fun maps:merge/2, + #{}, + [Stats#gcstats.errors || #{stats := Stats} <- Events] + ), + ?assertEqual(?NSEGS(S2, SS2) + ?NSEGS(S3, SS3), CFiles), + ?assertEqual(2 + 2, CDirectories), + ?assertMatch(Space when Space > S2 + S3, CSpace), + ?assertMatch(Errors when map_size(Errors) == 0, CErrors), + % 4. Ensure that empty transfer directories will be eventually collected + {ok, _} = ?block_until( + #{ + ?snk_kind := garbage_collection, + stats := #gcstats{ + files = 0, + directories = 6, + space = 0 + } + }, + 5000, + 0 + ). + +t_gc_incomplete_transfers(_Config) -> + ok = set_gc_config(minimum_segments_ttl, 0), + ok = set_gc_config(maximum_segments_ttl, 4), + {local, Storage} = emqx_ft_storage:backend(), + Transfers = [ + { + {<<"client43"/utf8>>, <<"file-šŸ¦•"/utf8>>}, + #{name => "dog.cur", segments_ttl => 1}, + emqx_ft_content_gen:new({?LINE, S1 = 123}, SS1 = 32) + }, + { + {<<"client44">>, <<"file-šŸ¦–"/utf8>>}, + #{name => "dog.ico", segments_ttl => 2}, + emqx_ft_content_gen:new({?LINE, S2 = 456}, SS2 = 64) + }, + { + {<<"client1337">>, <<"file-šŸ¦€"/utf8>>}, + #{name => "dog.jpg", segments_ttl => 3000}, + emqx_ft_content_gen:new({?LINE, S3 = 7890}, SS3 = 128) + }, + { + {<<"client31337">>, <<"file-ā³"/utf8>>}, + #{name => "dog.jpg"}, + emqx_ft_content_gen:new({?LINE, S4 = 1230}, SS4 = 256) + } + ], + % 1. Start transfers, send all the segments but don't trigger completion. + _ = emqx_utils:pmap(fun(Transfer) -> start_transfer(Storage, Transfer) end, Transfers), + % 2. Enable periodic GC every 0.5 seconds. + ok = set_gc_config(interval, 500), + ok = emqx_ft_storage_fs_gc:reset(), + % 3. First we need the first transfer to be collected. + {ok, _} = ?block_until( + #{ + ?snk_kind := garbage_collection, + stats := #gcstats{ + files = Files, + directories = 4, + space = Space + } + } when Files == (?NSEGS(S1, SS1)) andalso Space > S1, + 5000, + 0 + ), + % 4. Then the second one. + {ok, _} = ?block_until( + #{ + ?snk_kind := garbage_collection, + stats := #gcstats{ + files = Files, + directories = 4, + space = Space + } + } when Files == (?NSEGS(S2, SS2)) andalso Space > S2, + 5000, + 0 + ), + % 5. Then transfers 3 and 4 because 3rd has too big TTL and 4th has no specific TTL. + {ok, _} = ?block_until( + #{ + ?snk_kind := garbage_collection, + stats := #gcstats{ + files = Files, + directories = 4 * 2, + space = Space + } + } when Files == (?NSEGS(S3, SS3) + ?NSEGS(S4, SS4)) andalso Space > S3 + S4, + 5000, + 0 + ). + +t_gc_handling_errors(_Config) -> + ok = set_gc_config(minimum_segments_ttl, 0), + ok = set_gc_config(maximum_segments_ttl, 0), + {local, Storage} = emqx_ft_storage:backend(), + Transfer1 = {<<"client1">>, mk_file_id()}, + Transfer2 = {<<"client2">>, mk_file_id()}, + Filemeta = #{name => "oops.pdf"}, + Size = 420, + SegSize = 16, + _ = start_transfer( + Storage, + {Transfer1, Filemeta, emqx_ft_content_gen:new({?LINE, Size}, SegSize)} + ), + _ = start_transfer( + Storage, + {Transfer2, Filemeta, emqx_ft_content_gen:new({?LINE, Size}, SegSize)} + ), + % 1. Throw some chaos in the transfer directory. + DirFragment1 = emqx_ft_storage_fs:get_subdir(Storage, Transfer1, fragment), + DirTemporary1 = emqx_ft_storage_fs:get_subdir(Storage, Transfer1, temporary), + PathShadyLink = filename:join(DirTemporary1, "linked-here"), + ok = file:make_symlink(DirFragment1, PathShadyLink), + DirTransfer2 = emqx_ft_storage_fs:get_subdir(Storage, Transfer2), + PathTripUp = filename:join(DirTransfer2, "trip-up-here"), + ok = file:write_file(PathTripUp, <<"HAHA">>), + ok = timer:sleep(timer:seconds(1)), + % 2. Observe the errors are reported consistently. + ?check_trace( + ?assertMatch( + #gcstats{ + files = Files, + directories = 3, + space = Space, + errors = #{ + % NOTE: dangling symlink looks like `enoent` for some reason + {file, PathShadyLink} := {unexpected, _}, + {directory, DirTransfer2} := eexist + } + } when Files == ?NSEGS(Size, SegSize) * 2 andalso Space > Size * 2, + emqx_ft_storage_fs_gc:collect() + ), + fun(Trace) -> + ?assertMatch( + [ + #{ + errors := #{ + {file, PathShadyLink} := {unexpected, _}, + {directory, DirTransfer2} := eexist + } + } + ], + ?of_kind("garbage_collection_errors", Trace) + ) + end + ). + +%% + +set_gc_config(Name, Value) -> + emqx_config:put([file_transfer, storage, local, segments, gc, Name], Value). + +start_transfer(Storage, {Transfer, Meta, Gen}) -> + ?assertEqual( + ok, + emqx_ft_storage_fs:store_filemeta(Storage, Transfer, Meta) + ), + emqx_ft_content_gen:fold( + fun({Content, SegmentNum, #{chunk_size := SegmentSize}}, _Transferred) -> + Offset = (SegmentNum - 1) * SegmentSize, + ?assertEqual( + ok, + emqx_ft_storage_fs:store_segment(Storage, Transfer, {Offset, Content}) + ), + Offset + byte_size(Content) + end, + 0, + Gen + ). + +complete_transfer(Storage, Transfer, Size) -> + complete_transfer(Storage, Transfer, Size, 100). + +complete_transfer(Storage, Transfer, Size, Timeout) -> + {async, Pid} = emqx_ft_storage_fs:assemble(Storage, Transfer, Size), + MRef = erlang:monitor(process, Pid), + Pid ! kickoff, + receive + {'DOWN', MRef, process, Pid, {shutdown, Result}} -> + Result + after Timeout -> + ct:fail("Assembler did not finish in time") + end. + +mk_file_id() -> + emqx_guid:to_hexstr(emqx_guid:gen()). diff --git a/apps/emqx_ft/test/emqx_ft_storage_fs_reader_SUITE.erl b/apps/emqx_ft/test/emqx_ft_storage_fs_reader_SUITE.erl new file mode 100644 index 000000000..217205f6f --- /dev/null +++ b/apps/emqx_ft/test/emqx_ft_storage_fs_reader_SUITE.erl @@ -0,0 +1,153 @@ +%%-------------------------------------------------------------------- +%% Copyright (c) 2020-2023 EMQ Technologies Co., Ltd. All Rights Reserved. +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. +%%-------------------------------------------------------------------- + +-module(emqx_ft_storage_fs_reader_SUITE). + +-compile(export_all). +-compile(nowarn_export_all). + +-include_lib("common_test/include/ct.hrl"). +-include_lib("stdlib/include/assert.hrl"). + +all() -> emqx_common_test_helpers:all(?MODULE). + +init_per_suite(Config) -> + ok = emqx_common_test_helpers:start_apps([emqx_ft], emqx_ft_test_helpers:env_handler(Config)), + Config. + +end_per_suite(_Config) -> + ok = emqx_common_test_helpers:stop_apps([emqx_ft]), + ok. + +init_per_testcase(_Case, Config) -> + file:make_dir(?config(data_dir, Config)), + Data = <<"hello world">>, + Path = expand_path(Config, "test_file"), + ok = mk_test_file(Path, Data), + [{path, Path} | Config]. + +end_per_testcase(_Case, _Config) -> + ok. + +t_successful_read(Config) -> + Path = ?config(path, Config), + + {ok, ReaderPid} = emqx_ft_storage_fs_reader:start_link(self(), Path), + ?assertEqual( + {ok, <<"hello ">>}, + emqx_ft_storage_fs_reader:read(ReaderPid, 6) + ), + ?assertEqual( + {ok, <<"world">>}, + emqx_ft_storage_fs_reader:read(ReaderPid, 6) + ), + ?assertEqual( + eof, + emqx_ft_storage_fs_reader:read(ReaderPid, 6) + ), + ?assertNot(is_process_alive(ReaderPid)). + +t_caller_dead(Config) -> + erlang:process_flag(trap_exit, true), + + Path = ?config(path, Config), + + CallerPid = spawn_link( + fun() -> + receive + stop -> ok + end + end + ), + {ok, ReaderPid} = emqx_ft_storage_fs_reader:start_link(CallerPid, Path), + _ = erlang:monitor(process, ReaderPid), + ?assertEqual( + {ok, <<"hello ">>}, + emqx_ft_storage_fs_reader:read(ReaderPid, 6) + ), + CallerPid ! stop, + receive + {'DOWN', _, process, ReaderPid, _} -> ok + after 1000 -> + ct:fail("Reader process did not die") + end. + +t_tables(Config) -> + Path = ?config(path, Config), + + {ok, ReaderPid0} = emqx_ft_storage_fs_reader:start_link(self(), Path), + + ReaderQH0 = emqx_ft_storage_fs_reader:table(ReaderPid0, 6), + ?assertEqual( + [<<"hello ">>, <<"world">>], + qlc:eval(ReaderQH0) + ), + + {ok, ReaderPid1} = emqx_ft_storage_fs_reader:start_link(self(), Path), + + ReaderQH1 = emqx_ft_storage_fs_reader:table(ReaderPid1), + ?assertEqual( + [<<"hello world">>], + qlc:eval(ReaderQH1) + ). + +t_bad_messages(Config) -> + Path = ?config(path, Config), + + {ok, ReaderPid} = emqx_ft_storage_fs_reader:start_link(self(), Path), + + ReaderPid ! {bad, message}, + gen_server:cast(ReaderPid, {bad, message}), + + ?assertEqual( + {error, {bad_call, {bad, message}}}, + gen_server:call(ReaderPid, {bad, message}) + ). + +t_nonexistent_file(_Config) -> + ?assertEqual( + {error, enoent}, + emqx_ft_storage_fs_reader:start_link(self(), "/a/b/c/bar") + ). + +t_start_supervised(Config) -> + Path = ?config(path, Config), + + {ok, ReaderPid} = emqx_ft_storage_fs_reader:start_supervised(self(), Path), + ?assertEqual( + {ok, <<"hello ">>}, + emqx_ft_storage_fs_reader:read(ReaderPid, 6) + ). + +t_rpc_error(_Config) -> + ReaderQH = emqx_ft_storage_fs_reader:table(fake_remote_pid('dummy@127.0.0.1'), 6), + ?assertEqual( + [], + qlc:eval(ReaderQH) + ). + +mk_test_file(Path, Data) -> + ok = file:write_file(Path, Data). + +expand_path(Config, Filename) -> + filename:join([?config(data_dir, Config), Filename]). + +%% This is a hack to create a pid that is not registered on the local node. +%% https://www.erlang.org/doc/apps/erts/erl_ext_dist.html#new_pid_ext +fake_remote_pid(Node) -> + <<131, NodeAtom/binary>> = term_to_binary(Node), + PidBin = <<131, 88, NodeAtom/binary, 1:32/big, 1:32/big, 1:32/big>>, + binary_to_term(PidBin). diff --git a/apps/emqx_ft/test/emqx_ft_test_helpers.erl b/apps/emqx_ft/test/emqx_ft_test_helpers.erl new file mode 100644 index 000000000..2eb6d84db --- /dev/null +++ b/apps/emqx_ft/test/emqx_ft_test_helpers.erl @@ -0,0 +1,128 @@ +%%-------------------------------------------------------------------- +%% Copyright (c) 2023 EMQ Technologies Co., Ltd. All Rights Reserved. +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. +%%-------------------------------------------------------------------- + +-module(emqx_ft_test_helpers). + +-compile(export_all). +-compile(nowarn_export_all). + +-include_lib("common_test/include/ct.hrl"). + +-define(S3_HOST, <<"minio">>). +-define(S3_PORT, 9000). + +start_additional_node(Config, Name) -> + emqx_common_test_helpers:start_slave( + Name, + [ + {apps, [emqx_ft]}, + {join_to, node()}, + {configure_gen_rpc, true}, + {env_handler, env_handler(Config)} + ] + ). + +stop_additional_node(Node) -> + ok = rpc:call(Node, ekka, leave, []), + ok = rpc:call(Node, emqx_common_test_helpers, stop_apps, [[emqx_ft]]), + ok = emqx_common_test_helpers:stop_slave(Node), + ok. + +env_handler(Config) -> + fun + (emqx_ft) -> + load_config(#{<<"enable">> => true, <<"storage">> => local_storage(Config)}); + (_) -> + ok + end. + +local_storage(Config) -> + local_storage(Config, #{exporter => local}). + +local_storage(Config, Opts) -> + #{ + <<"local">> => #{ + <<"segments">> => #{<<"root">> => root(Config, node(), [segments])}, + <<"exporter">> => exporter(Config, Opts) + } + }. + +exporter(Config, #{exporter := local}) -> + #{<<"local">> => #{<<"root">> => root(Config, node(), [exports])}}; +exporter(_Config, #{exporter := s3, bucket_name := BucketName}) -> + BaseConfig = emqx_s3_test_helpers:base_raw_config(tcp), + #{ + <<"s3">> => BaseConfig#{ + <<"bucket">> => list_to_binary(BucketName), + <<"host">> => ?S3_HOST, + <<"port">> => ?S3_PORT + } + }. + +load_config(Config) -> + emqx_common_test_helpers:load_config(emqx_ft_schema, #{<<"file_transfer">> => Config}). + +tcp_port(Node) -> + {_, Port} = rpc:call(Node, emqx_config, get, [[listeners, tcp, default, bind]]), + Port. + +root(Config, Node, Tail) -> + iolist_to_binary(filename:join([?config(priv_dir, Config), "file_transfer", Node | Tail])). + +start_client(ClientId) -> + start_client(ClientId, node()). + +start_client(ClientId, Node) -> + Port = tcp_port(Node), + {ok, Client} = emqtt:start_link([{proto_ver, v5}, {clientid, ClientId}, {port, Port}]), + {ok, _} = emqtt:connect(Client), + Client. + +upload_file(ClientId, FileId, Name, Data) -> + upload_file(ClientId, FileId, Name, Data, node()). + +upload_file(ClientId, FileId, Name, Data, Node) -> + C1 = start_client(ClientId, Node), + + Size = byte_size(Data), + Meta = #{ + name => Name, + expire_at => erlang:system_time(_Unit = second) + 3600, + size => Size + }, + MetaPayload = emqx_utils_json:encode(emqx_ft:encode_filemeta(Meta)), + + ct:pal("MetaPayload = ~ts", [MetaPayload]), + + MetaTopic = <<"$file/", FileId/binary, "/init">>, + {ok, #{reason_code_name := success}} = emqtt:publish(C1, MetaTopic, MetaPayload, 1), + {ok, #{reason_code_name := success}} = emqtt:publish( + C1, <<"$file/", FileId/binary, "/0">>, Data, 1 + ), + + FinTopic = <<"$file/", FileId/binary, "/fin/", (integer_to_binary(Size))/binary>>, + FinResult = + case emqtt:publish(C1, FinTopic, <<>>, 1) of + {ok, #{reason_code_name := success}} -> + ok; + {ok, #{reason_code_name := Error}} -> + {error, Error} + end, + ok = emqtt:stop(C1), + FinResult. + +aws_config() -> + emqx_s3_test_helpers:aws_config(tcp, binary_to_list(?S3_HOST), ?S3_PORT). diff --git a/apps/emqx_ft/test/props/prop_emqx_ft_assembly.erl b/apps/emqx_ft/test/props/prop_emqx_ft_assembly.erl new file mode 100644 index 000000000..a437e8dfe --- /dev/null +++ b/apps/emqx_ft/test/props/prop_emqx_ft_assembly.erl @@ -0,0 +1,221 @@ +%%-------------------------------------------------------------------- +%% Copyright (c) 2020-2023 EMQ Technologies Co., Ltd. All Rights Reserved. +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. +%%-------------------------------------------------------------------- + +-module(prop_emqx_ft_assembly). + +-include_lib("proper/include/proper.hrl"). + +-import(emqx_proper_types, [scaled/2]). + +-define(COVERAGE_TIMEOUT, 5000). + +prop_coverage() -> + ?FORALL( + {Filesize, Segsizes}, + {filesize_t(), segsizes_t()}, + ?FORALL( + Fragments, + noshrink(segments_t(Filesize, Segsizes)), + ?TIMEOUT( + ?COVERAGE_TIMEOUT, + begin + ASM1 = append_segments(mk_assembly(Filesize), Fragments), + {Time, ASM2} = timer:tc(emqx_ft_assembly, update, [ASM1]), + measure( + #{"Fragments" => length(Fragments), "Time" => Time}, + case emqx_ft_assembly:status(ASM2) of + complete -> + Coverage = emqx_ft_assembly:coverage(ASM2), + measure( + #{"CoverageLength" => length(Coverage)}, + is_coverage_complete(Coverage) + ); + {incomplete, {missing, {segment, _, _}}} -> + measure("CoverageLength", 0, true) + end + ) + end + ) + ) + ). + +prop_coverage_likely_incomplete() -> + ?FORALL( + {Filesize, Segsizes, Hole}, + {filesize_t(), segsizes_t(), filesize_t()}, + ?FORALL( + Fragments, + noshrink(segments_t(Filesize, Segsizes, Hole)), + ?TIMEOUT( + ?COVERAGE_TIMEOUT, + begin + ASM1 = append_segments(mk_assembly(Filesize), Fragments), + {Time, ASM2} = timer:tc(emqx_ft_assembly, update, [ASM1]), + measure( + #{"Fragments" => length(Fragments), "Time" => Time}, + case emqx_ft_assembly:status(ASM2) of + complete -> + % NOTE: this is still possible due to the nature of `SUCHTHATMAYBE` + IsComplete = emqx_ft_assembly:coverage(ASM2), + collect(complete, is_coverage_complete(IsComplete)); + {incomplete, {missing, {segment, _, _}}} -> + collect(incomplete, true) + end + ) + end + ) + ) + ). + +prop_coverage_complete() -> + ?FORALL( + {Filesize, Segsizes}, + {filesize_t(), ?SUCHTHAT([BaseSegsize | _], segsizes_t(), BaseSegsize > 0)}, + ?FORALL( + {Fragments, RemoteNode}, + noshrink({segments_t(Filesize, Segsizes), remote_node_t()}), + begin + % Ensure that we have complete coverage + ASM1 = mk_assembly(Filesize), + ASM2 = append_coverage(ASM1, RemoteNode, Filesize, Segsizes), + ASM3 = append_segments(ASM2, Fragments), + {Time, ASM4} = timer:tc(emqx_ft_assembly, update, [ASM3]), + measure( + #{"CoverageMax" => nsegs(Filesize, Segsizes), "Time" => Time}, + case emqx_ft_assembly:status(ASM4) of + complete -> + Coverage = emqx_ft_assembly:coverage(ASM4), + measure( + #{"Coverage" => length(Coverage)}, + is_coverage_complete(Coverage) + ); + {incomplete, _} -> + false + end + ) + end + ) + ). + +measure(NamedSamples, Test) -> + maps:fold(fun(Name, Sample, Acc) -> measure(Name, Sample, Acc) end, Test, NamedSamples). + +is_coverage_complete([]) -> + true; +is_coverage_complete(Coverage = [_ | Tail]) -> + is_coverage_complete(Coverage, Tail). + +is_coverage_complete([_], []) -> + true; +is_coverage_complete( + [{_Node1, #{fragment := {segment, #{offset := O1, size := S1}}}} | Rest], + [{_Node2, #{fragment := {segment, #{offset := O2}}}} | Tail] +) -> + (O1 + S1 == O2) andalso is_coverage_complete(Rest, Tail). + +mk_assembly(Filesize) -> + emqx_ft_assembly:append(emqx_ft_assembly:new(Filesize), node(), mk_filemeta(Filesize)). + +append_segments(ASMIn, Fragments) -> + lists:foldl( + fun({Node, {Offset, Size}}, ASM) -> + emqx_ft_assembly:append(ASM, Node, mk_segment(Offset, Size)) + end, + ASMIn, + Fragments + ). + +append_coverage(ASM, Node, Filesize, Segsizes = [BaseSegsize | _]) -> + append_coverage(ASM, Node, Filesize, BaseSegsize, 0, nsegs(Filesize, Segsizes)). + +append_coverage(ASM, Node, Filesize, Segsize, I, NSegs) when I < NSegs -> + Offset = I * Segsize, + Size = min(Segsize, Filesize - Offset), + ASMNext = emqx_ft_assembly:append(ASM, Node, mk_segment(Offset, Size)), + append_coverage(ASMNext, Node, Filesize, Segsize, I + 1, NSegs); +append_coverage(ASM, _Node, _Filesize, _Segsize, _, _NSegs) -> + ASM. + +mk_filemeta(Filesize) -> + #{ + path => "MANIFEST.json", + fragment => {filemeta, #{name => ?MODULE_STRING, size => Filesize}} + }. + +mk_segment(Offset, Size) -> + #{ + path => "SEG" ++ integer_to_list(Offset) ++ integer_to_list(Size), + fragment => {segment, #{offset => Offset, size => Size}} + }. + +nsegs(Filesize, [BaseSegsize | _]) -> + Filesize div max(1, BaseSegsize) + 1. + +segments_t(Filesize, Segsizes) -> + scaled(nsegs(Filesize, Segsizes), list({node_t(), segment_t(Filesize, Segsizes)})). + +segments_t(Filesize, Segsizes, Hole) -> + scaled(nsegs(Filesize, Segsizes), list({node_t(), segment_t(Filesize, Segsizes, Hole)})). + +segment_t(Filesize, Segsizes, Hole) -> + ?SUCHTHATMAYBE( + {Offset, Size}, + segment_t(Filesize, Segsizes), + (Hole rem Filesize) =< Offset orelse (Hole rem Filesize) > (Offset + Size) + ). + +segment_t(Filesize, Segsizes) -> + ?LET( + Segsize, + oneof(Segsizes), + ?LET( + Index, + range(0, Filesize div max(1, Segsize)), + {Index * Segsize, min(Segsize, Filesize - (Index * Segsize))} + ) + ). + +filesize_t() -> + scaled(4000, non_neg_integer()). + +segsizes_t() -> + ?LET( + BaseSize, + segsize_t(), + oneof([ + [BaseSize, BaseSize * 2], + [BaseSize, BaseSize * 2, BaseSize * 3], + [BaseSize, BaseSize * 2, BaseSize * 5] + ]) + ). + +segsize_t() -> + scaled(50, non_neg_integer()). + +remote_node_t() -> + oneof([ + 'emqx42@emqx.local', + 'emqx43@emqx.local', + 'emqx44@emqx.local' + ]). + +node_t() -> + oneof([ + node(), + 'emqx42@emqx.local', + 'emqx43@emqx.local', + 'emqx44@emqx.local' + ]). diff --git a/apps/emqx_machine/src/emqx_machine.app.src b/apps/emqx_machine/src/emqx_machine.app.src index a44d2b36e..7cf0e4b53 100644 --- a/apps/emqx_machine/src/emqx_machine.app.src +++ b/apps/emqx_machine/src/emqx_machine.app.src @@ -3,7 +3,7 @@ {id, "emqx_machine"}, {description, "The EMQX Machine"}, % strict semver, bump manually! - {vsn, "0.2.3"}, + {vsn, "0.2.4"}, {modules, []}, {registered, []}, {applications, [kernel, stdlib, emqx_ctl]}, diff --git a/apps/emqx_machine/src/emqx_machine_boot.erl b/apps/emqx_machine/src/emqx_machine_boot.erl index e3f84079b..f74db45ec 100644 --- a/apps/emqx_machine/src/emqx_machine_boot.erl +++ b/apps/emqx_machine/src/emqx_machine_boot.erl @@ -154,6 +154,8 @@ basic_reboot_apps() -> ee -> CE ++ [ + emqx_s3, + emqx_ft, emqx_eviction_agent, emqx_node_rebalance ] diff --git a/apps/emqx_s3/BSL.txt b/apps/emqx_s3/BSL.txt new file mode 100644 index 000000000..0acc0e696 --- /dev/null +++ b/apps/emqx_s3/BSL.txt @@ -0,0 +1,94 @@ +Business Source License 1.1 + +Licensor: Hangzhou EMQ Technologies Co., Ltd. +Licensed Work: EMQX Enterprise Edition + The Licensed Work is (c) 2023 + Hangzhou EMQ Technologies Co., Ltd. +Additional Use Grant: Students and educators are granted right to copy, + modify, and create derivative work for research + or education. +Change Date: 2027-02-01 +Change License: Apache License, Version 2.0 + +For information about alternative licensing arrangements for the Software, +please contact Licensor: https://www.emqx.com/en/contact + +Notice + +The Business Source License (this document, or the ā€œLicenseā€) is not an Open +Source license. However, the Licensed Work will eventually be made available +under an Open Source License, as stated in this License. + +License text copyright (c) 2017 MariaDB Corporation Ab, All Rights Reserved. +ā€œBusiness Source Licenseā€ is a trademark of MariaDB Corporation Ab. + +----------------------------------------------------------------------------- + +Business Source License 1.1 + +Terms + +The Licensor hereby grants you the right to copy, modify, create derivative +works, redistribute, and make non-production use of the Licensed Work. The +Licensor may make an Additional Use Grant, above, permitting limited +production use. + +Effective on the Change Date, or the fourth anniversary of the first publicly +available distribution of a specific version of the Licensed Work under this +License, whichever comes first, the Licensor hereby grants you rights under +the terms of the Change License, and the rights granted in the paragraph +above terminate. + +If your use of the Licensed Work does not comply with the requirements +currently in effect as described in this License, you must purchase a +commercial license from the Licensor, its affiliated entities, or authorized +resellers, or you must refrain from using the Licensed Work. + +All copies of the original and modified Licensed Work, and derivative works +of the Licensed Work, are subject to this License. This License applies +separately for each version of the Licensed Work and the Change Date may vary +for each version of the Licensed Work released by Licensor. + +You must conspicuously display this License on each original or modified copy +of the Licensed Work. If you receive the Licensed Work in original or +modified form from a third party, the terms and conditions set forth in this +License apply to your use of that work. + +Any use of the Licensed Work in violation of this License will automatically +terminate your rights under this License for the current and all other +versions of the Licensed Work. + +This License does not grant you any right in any trademark or logo of +Licensor or its affiliates (provided that you may use a trademark or logo of +Licensor as expressly required by this License). + +TO THE EXTENT PERMITTED BY APPLICABLE LAW, THE LICENSED WORK IS PROVIDED ON +AN ā€œAS ISā€ BASIS. LICENSOR HEREBY DISCLAIMS ALL WARRANTIES AND CONDITIONS, +EXPRESS OR IMPLIED, INCLUDING (WITHOUT LIMITATION) WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NON-INFRINGEMENT, AND +TITLE. + +MariaDB hereby grants you permission to use this License’s text to license +your works, and to refer to it using the trademark ā€œBusiness Source Licenseā€, +as long as you comply with the Covenants of Licensor below. + +Covenants of Licensor + +In consideration of the right to use this License’s text and the ā€œBusiness +Source Licenseā€ name and trademark, Licensor covenants to MariaDB, and to all +other recipients of the licensed work to be provided by Licensor: + +1. To specify as the Change License the GPL Version 2.0 or any later version, + or a license that is compatible with GPL Version 2.0 or a later version, + where ā€œcompatibleā€ means that software provided under the Change License can + be included in a program with software provided under GPL Version 2.0 or a + later version. Licensor may specify additional Change Licenses without + limitation. + +2. To either: (a) specify an additional grant of rights to use that does not + impose any additional restriction on the right granted in this License, as + the Additional Use Grant; or (b) insert the text ā€œNoneā€. + +3. To specify a Change Date. + +4. Not to modify this License in any other way. diff --git a/apps/emqx_s3/README.md b/apps/emqx_s3/README.md new file mode 100644 index 000000000..4ce1b0c0a --- /dev/null +++ b/apps/emqx_s3/README.md @@ -0,0 +1,135 @@ +# emqx_s3 + +EMQX S3 Application + +## Description + +This application provides functionality for uploading files to S3. + +## Usage + +The steps to integrate this application are: +* Integrate S3 configuration schema where needed. +* On _client_ application start: + * Call `emqx_s3:start_profile(ProfileName, ProfileConfig)` with configuration. + * Add `emqx_config_handler` hook to call `emqx_s3:start_profile(ProfileName, ProfileConfig)` when configuration is updated. +* On _client_ application stop, call `emqx_s3:stop_profile(ProfileName)`. + +`ProfileName` is a unique name used to distinguish different sets of S3 settings. Each profile has its own connection pool and configuration. + +To use S3 from a _client_ application: +* Create an uploader process with `{ok, Pid} = emqx_s3:start_uploader(ProfileName, #{key => MyKey})`. +* Write data with `emqx_s3_uploader:write(Pid, <<"data">>)`. +* Finish the uploader with `emqx_s3_uploader:complete(Pid)` or `emqx_s3_uploader:abort(Pid)`. + +### Configuration + +Example of integrating S3 configuration schema into a _client_ application `emqx_someapp`. + +```erlang +-module(emqx_someapp_schema). + +... + +roots() -> [someapp] +... + +fields(someapp) -> + [ + {other_setting, ...}, + {s3_settings, + mk( + hoconsc:ref(emqx_s3_schema, s3), + #{ + desc => ?DESC("s3_settings"), + required => true + } + )} + ]; +... + +``` + +### Application start and config hooks + +```erlang +-module(emqx_someapp_app). + +-behaviour(application). + +-export([start/2, stop/1]). + +-export([ + pre_config_update/3, + post_config_update/5 +]). + +start(_StartType, _StartArgs) -> + ProfileConfig = emqx_config:get([someapp, s3_settings]), + ProfileName = someapp, + ok = emqx_s3:start_profile(ProfileName, ProfileConfig), + ok = emqx_config_handler:add_handler([someapp], ?MODULE). + +stop(_State) -> + ok = emqx_conf:remove_handler([someapp]), + ProfileName = someapp, + ok = emqx_s3:stop_profile(ProfileName). + +pre_config_update(_Path, NewConfig, _OldConfig) -> + {ok, NewConfig}. + +post_config_update(Path, _Req, NewConfig, _OldConfig, _AppEnvs) -> + NewProfileConfig = maps:get(s3_settings, NewConfig), + ProfileName = someapp, + %% more graceful error handling may be needed + ok = emqx_s3:update_profile(ProfileName, NewProfileConfig). + +``` + +### Uploader usage + +```erlang +-module(emqx_someapp_logic). +... + +-spec do_upload_data(Key :: string(), Data :: binary()) -> ok. +do_upload_data(Key, Data) -> + ProfileName = someapp, + {ok, Pid} = emqx_s3:start_uploader(ProfileName, #{key => Key}), + ok = emqx_s3_uploader:write(Pid, Data), + ok = emqx_s3_uploader:complete(Pid). + +``` + +## Design + +![Design](./docs/s3_app.png) + +* Each profile has its own supervisor `emqx_s3_profile_sup`. +* Under each profile supervisor, there is a + * `emqx_s3_profile_uploader_sup` supervisor for uploader processes. + * `emqx_s3_profile_conf` server for managing profile configuration. + +When an uploader process is started, it checkouts the actual S3 configuration for the profile from the `emqx_s3_profile_conf` server. It uses the obtained configuration and connection pool to upload data to S3 till the termination, even if the configuration is updated. + +Other processes (`emqx_XXX`) can also checkout the actual S3 configuration for the profile from the `emqx_s3_profile_conf` server. + +`emqx_s3_profile_conf`: +* Keeps actual S3 configuration for the profile and creates a connection pool for the actual configuration. +* Creates a new connection pool when the configuration is updated. +* Keeps track of uploaders using connection pools. +* Drops connection pools when no uploaders are using it or after a timeout. + +The code is designed to allow a painless transition from `ehttpc` pool to any other HTTP pool/client. + +## Possible performance improvements + +One of the downsides of the current implementation is that there is a lot of message passing between the uploader client and the actual sockets. + +A possible improvement could be: +* Use a process-less HTTP client, like [Mint](https://github.com/elixir-mint/mint). +* Use a resource pool, like [NimblePool](https://github.com/dashbitco/nimble_pool) to manage the HTTP connections. It temporarily grants sockets to its clients. +* Do the buffering logic locally in the uploader client. +* Use `emqx_s3_client` directly from the uploader client. + +In this case, the data will be directly sent to the socket, without being sent to any intermediate processes. diff --git a/apps/emqx_s3/docker-ct b/apps/emqx_s3/docker-ct new file mode 100644 index 000000000..a5a001815 --- /dev/null +++ b/apps/emqx_s3/docker-ct @@ -0,0 +1,2 @@ +minio +toxiproxy diff --git a/apps/emqx_s3/docs/s3_app.png b/apps/emqx_s3/docs/s3_app.png new file mode 100644 index 000000000..cb7758844 Binary files /dev/null and b/apps/emqx_s3/docs/s3_app.png differ diff --git a/apps/emqx_s3/rebar.config b/apps/emqx_s3/rebar.config new file mode 100644 index 000000000..b1483e028 --- /dev/null +++ b/apps/emqx_s3/rebar.config @@ -0,0 +1,6 @@ +{deps, [ + {emqx, {path, "../../apps/emqx"}}, + {erlcloud, {git, "https://github.com/emqx/erlcloud", {tag, "3.6.8-emqx-1"}}} +]}. + +{project_plugins, [erlfmt]}. diff --git a/apps/emqx_s3/src/emqx_s3.app.src b/apps/emqx_s3/src/emqx_s3.app.src new file mode 100644 index 000000000..7864ffb29 --- /dev/null +++ b/apps/emqx_s3/src/emqx_s3.app.src @@ -0,0 +1,14 @@ +{application, emqx_s3, [ + {description, "EMQX S3"}, + {vsn, "5.0.6"}, + {modules, []}, + {registered, [emqx_s3_sup]}, + {applications, [ + kernel, + stdlib, + gproc, + erlcloud, + ehttpc + ]}, + {mod, {emqx_s3_app, []}} +]}. diff --git a/apps/emqx_s3/src/emqx_s3.erl b/apps/emqx_s3/src/emqx_s3.erl new file mode 100644 index 000000000..0c2592736 --- /dev/null +++ b/apps/emqx_s3/src/emqx_s3.erl @@ -0,0 +1,96 @@ +%%-------------------------------------------------------------------- +%% Copyright (c) 2022-2023 EMQ Technologies Co., Ltd. All Rights Reserved. +%%-------------------------------------------------------------------- + +-module(emqx_s3). + +-include_lib("emqx/include/types.hrl"). + +-export([ + start_profile/2, + stop_profile/1, + update_profile/2, + start_uploader/2, + with_client/2 +]). + +-export_type([ + profile_id/0, + profile_config/0, + acl/0 +]). + +-type profile_id() :: atom() | binary(). + +-type acl() :: + private + | public_read + | public_read_write + | authenticated_read + | bucket_owner_read + | bucket_owner_full_control. + +-type transport_options() :: #{ + headers => map(), + connect_timeout => pos_integer(), + enable_pipelining => pos_integer(), + max_retries => pos_integer(), + pool_size => pos_integer(), + pool_type => atom(), + ipv6_probe => boolean(), + ssl => map() +}. + +-type profile_config() :: #{ + bucket := string(), + access_key_id => string(), + secret_access_key => string(), + host := string(), + port := pos_integer(), + url_expire_time := pos_integer(), + acl => acl(), + min_part_size => pos_integer(), + transport_options => transport_options() +}. + +-define(IS_PROFILE_ID(ProfileId), (is_atom(ProfileId) orelse is_binary(ProfileId))). + +%%-------------------------------------------------------------------- +%% API +%%-------------------------------------------------------------------- + +-spec start_profile(profile_id(), profile_config()) -> ok_or_error(term()). +start_profile(ProfileId, ProfileConfig) when ?IS_PROFILE_ID(ProfileId) -> + case emqx_s3_sup:start_profile(ProfileId, ProfileConfig) of + {ok, _} -> + ok; + {error, _} = Error -> + Error + end. + +-spec stop_profile(profile_id()) -> ok_or_error(term()). +stop_profile(ProfileId) when ?IS_PROFILE_ID(ProfileId) -> + emqx_s3_sup:stop_profile(ProfileId). + +-spec update_profile(profile_id(), profile_config()) -> ok_or_error(term()). +update_profile(ProfileId, ProfileConfig) when ?IS_PROFILE_ID(ProfileId) -> + emqx_s3_profile_conf:update_config(ProfileId, ProfileConfig). + +-spec start_uploader(profile_id(), emqx_s3_uploader:opts()) -> + supervisor:start_ret() | {error, profile_not_found}. +start_uploader(ProfileId, Opts) when ?IS_PROFILE_ID(ProfileId) -> + emqx_s3_profile_uploader_sup:start_uploader(ProfileId, Opts). + +-spec with_client(profile_id(), fun((emqx_s3_client:client()) -> Result)) -> + {error, profile_not_found} | Result. +with_client(ProfileId, Fun) when is_function(Fun, 1) andalso ?IS_PROFILE_ID(ProfileId) -> + case emqx_s3_profile_conf:checkout_config(ProfileId) of + {ok, ClientConfig, _UploadConfig} -> + try + Fun(emqx_s3_client:create(ClientConfig)) + after + emqx_s3_profile_conf:checkin_config(ProfileId) + end; + {error, _} = Error -> + Error + end. diff --git a/apps/emqx_s3/src/emqx_s3.hrl b/apps/emqx_s3/src/emqx_s3.hrl new file mode 100644 index 000000000..9a2cf8d2f --- /dev/null +++ b/apps/emqx_s3/src/emqx_s3.hrl @@ -0,0 +1,13 @@ +%%-------------------------------------------------------------------- +%% Copyright (c) 2022-2023 EMQ Technologies Co., Ltd. All Rights Reserved. +%%-------------------------------------------------------------------- + +-define(VIA_GPROC(Id), {via, gproc, {n, l, Id}}). + +-define(SAFE_CALL_VIA_GPROC(Id, Message, Timeout, NoProcError), + try gen_server:call(?VIA_GPROC(Id), Message, Timeout) of + Result -> Result + catch + exit:{noproc, _} -> {error, NoProcError} + end +). diff --git a/apps/emqx_s3/src/emqx_s3_app.erl b/apps/emqx_s3/src/emqx_s3_app.erl new file mode 100644 index 000000000..8d8b0f7b9 --- /dev/null +++ b/apps/emqx_s3/src/emqx_s3_app.erl @@ -0,0 +1,16 @@ +%%-------------------------------------------------------------------- +%% Copyright (c) 2022-2023 EMQ Technologies Co., Ltd. All Rights Reserved. +%%-------------------------------------------------------------------- + +-module(emqx_s3_app). + +-behaviour(application). + +-export([start/2, stop/1]). + +start(_Type, _Args) -> + {ok, Sup} = emqx_s3_sup:start_link(), + {ok, Sup}. + +stop(_State) -> + ok. diff --git a/apps/emqx_s3/src/emqx_s3_client.erl b/apps/emqx_s3/src/emqx_s3_client.erl new file mode 100644 index 000000000..3bc5861c6 --- /dev/null +++ b/apps/emqx_s3/src/emqx_s3_client.erl @@ -0,0 +1,428 @@ +%%-------------------------------------------------------------------- +%% Copyright (c) 2022-2023 EMQ Technologies Co., Ltd. All Rights Reserved. +%%-------------------------------------------------------------------- + +-module(emqx_s3_client). + +-include_lib("emqx/include/types.hrl"). +-include_lib("emqx/include/logger.hrl"). +-include_lib("erlcloud/include/erlcloud_aws.hrl"). + +-export([ + create/1, + + put_object/3, + put_object/4, + + start_multipart/2, + start_multipart/3, + upload_part/5, + complete_multipart/4, + abort_multipart/3, + list/2, + uri/2, + + format/1, + format_request/1 +]). + +-export_type([ + client/0, + headers/0 +]). + +-type headers() :: #{binary() | string() => iodata()}. +-type erlcloud_headers() :: list({string(), iodata()}). + +-type key() :: string(). +-type part_number() :: non_neg_integer(). +-type upload_id() :: string(). +-type etag() :: string(). +-type http_pool() :: ehttpc:pool_name(). +-type pool_type() :: random | hash. +-type upload_options() :: list({acl, emqx_s3:acl()}). + +-opaque client() :: #{ + aws_config := aws_config(), + upload_options := upload_options(), + bucket := string(), + headers := erlcloud_headers(), + url_expire_time := non_neg_integer(), + pool_type := pool_type() +}. + +-type config() :: #{ + scheme := string(), + host := string(), + port := part_number(), + bucket := string(), + headers := headers(), + acl := emqx_s3:acl() | undefined, + url_expire_time := pos_integer(), + access_key_id := string() | undefined, + secret_access_key := string() | undefined, + http_pool := http_pool(), + pool_type := pool_type(), + request_timeout := timeout() | undefined, + max_retries := non_neg_integer() | undefined +}. + +-type s3_options() :: proplists:proplist(). + +-define(DEFAULT_REQUEST_TIMEOUT, 30000). +-define(DEFAULT_MAX_RETRIES, 2). + +%%-------------------------------------------------------------------- +%% API +%%-------------------------------------------------------------------- + +-spec create(config()) -> client(). +create(Config) -> + #{ + aws_config => aws_config(Config), + upload_options => upload_options(Config), + bucket => maps:get(bucket, Config), + url_expire_time => maps:get(url_expire_time, Config), + headers => headers(Config), + pool_type => maps:get(pool_type, Config) + }. + +-spec put_object(client(), key(), iodata()) -> ok_or_error(term()). +put_object(Client, Key, Value) -> + put_object(Client, #{}, Key, Value). + +-spec put_object(client(), headers(), key(), iodata()) -> ok_or_error(term()). +put_object( + #{bucket := Bucket, upload_options := Options, headers := Headers, aws_config := AwsConfig}, + SpecialHeaders, + Key, + Value +) -> + AllHeaders = join_headers(Headers, SpecialHeaders), + try erlcloud_s3:put_object(Bucket, erlcloud_key(Key), Value, Options, AllHeaders, AwsConfig) of + Props when is_list(Props) -> + ok + catch + error:{aws_error, Reason} -> + ?SLOG(debug, #{msg => "put_object_fail", key => Key, reason => Reason}), + {error, Reason} + end. + +-spec start_multipart(client(), key()) -> ok_or_error(upload_id(), term()). +start_multipart(Client, Key) -> + start_multipart(Client, #{}, Key). + +-spec start_multipart(client(), headers(), key()) -> ok_or_error(upload_id(), term()). +start_multipart( + #{bucket := Bucket, upload_options := Options, headers := Headers, aws_config := AwsConfig}, + SpecialHeaders, + Key +) -> + AllHeaders = join_headers(Headers, SpecialHeaders), + case erlcloud_s3:start_multipart(Bucket, erlcloud_key(Key), Options, AllHeaders, AwsConfig) of + {ok, Props} -> + {ok, response_property('uploadId', Props)}; + {error, Reason} -> + ?SLOG(debug, #{msg => "start_multipart_fail", key => Key, reason => Reason}), + {error, Reason} + end. + +-spec upload_part(client(), key(), upload_id(), part_number(), iodata()) -> + ok_or_error(etag(), term()). +upload_part( + #{bucket := Bucket, headers := Headers, aws_config := AwsConfig}, + Key, + UploadId, + PartNumber, + Value +) -> + case + erlcloud_s3:upload_part( + Bucket, erlcloud_key(Key), UploadId, PartNumber, Value, Headers, AwsConfig + ) + of + {ok, Props} -> + {ok, response_property(etag, Props)}; + {error, Reason} -> + ?SLOG(debug, #{msg => "upload_part_fail", key => Key, reason => Reason}), + {error, Reason} + end. + +-spec complete_multipart(client(), key(), upload_id(), [etag()]) -> ok_or_error(term()). +complete_multipart( + #{bucket := Bucket, headers := Headers, aws_config := AwsConfig}, + Key, + UploadId, + ETags +) -> + case + erlcloud_s3:complete_multipart( + Bucket, erlcloud_key(Key), UploadId, ETags, Headers, AwsConfig + ) + of + ok -> + ok; + {error, Reason} -> + ?SLOG(debug, #{msg => "complete_multipart_fail", key => Key, reason => Reason}), + {error, Reason} + end. + +-spec abort_multipart(client(), key(), upload_id()) -> ok_or_error(term()). +abort_multipart(#{bucket := Bucket, headers := Headers, aws_config := AwsConfig}, Key, UploadId) -> + case erlcloud_s3:abort_multipart(Bucket, erlcloud_key(Key), UploadId, [], Headers, AwsConfig) of + ok -> + ok; + {error, Reason} -> + ?SLOG(debug, #{msg => "abort_multipart_fail", key => Key, reason => Reason}), + {error, Reason} + end. + +-spec list(client(), s3_options()) -> ok_or_error(proplists:proplist(), term()). +list(#{bucket := Bucket, aws_config := AwsConfig}, Options) -> + try erlcloud_s3:list_objects(Bucket, Options, AwsConfig) of + Result -> {ok, Result} + catch + error:{aws_error, Reason} -> + ?SLOG(debug, #{msg => "list_objects_fail", bucket => Bucket, reason => Reason}), + {error, Reason} + end. + +-spec uri(client(), key()) -> iodata(). +uri(#{bucket := Bucket, aws_config := AwsConfig, url_expire_time := ExpireTime}, Key) -> + erlcloud_s3:make_presigned_v4_url(ExpireTime, Bucket, get, erlcloud_key(Key), [], AwsConfig). + +-spec format(client()) -> term(). +format(#{aws_config := AwsConfig} = Client) -> + Client#{aws_config => AwsConfig#aws_config{secret_access_key = "***"}}. + +%%-------------------------------------------------------------------- +%% Internal functions +%%-------------------------------------------------------------------- + +upload_options(#{acl := Acl}) when Acl =/= undefined -> + [ + {acl, Acl} + ]; +upload_options(#{}) -> + []. + +headers(#{headers := Headers}) -> + headers_user_to_erlcloud_request(Headers); +headers(#{}) -> + []. + +aws_config(#{ + scheme := Scheme, + host := Host, + port := Port, + access_key_id := AccessKeyId, + secret_access_key := SecretAccessKey, + http_pool := HttpPool, + pool_type := PoolType, + request_timeout := Timeout, + max_retries := MaxRetries +}) -> + #aws_config{ + s3_scheme = Scheme, + s3_host = Host, + s3_port = Port, + s3_bucket_access_method = path, + s3_bucket_after_host = true, + + access_key_id = AccessKeyId, + secret_access_key = SecretAccessKey, + + http_client = request_fun( + HttpPool, PoolType, with_default(MaxRetries, ?DEFAULT_MAX_RETRIES) + ), + + %% This value will be transparently passed to ehttpc + timeout = with_default(Timeout, ?DEFAULT_REQUEST_TIMEOUT), + %% We rely on retry mechanism of ehttpc + retry_num = 1 + }. + +-spec request_fun(http_pool(), pool_type(), non_neg_integer()) -> erlcloud_httpc:request_fun(). +request_fun(HttpPool, PoolType, MaxRetries) -> + fun(Url, Method, Headers, Body, Timeout, _Config) -> + with_path_and_query_only(Url, fun(PathQuery) -> + Request = make_request( + Method, PathQuery, headers_erlcloud_request_to_ehttpc(Headers), Body + ), + case pick_worker_safe(HttpPool, PoolType) of + {ok, Worker} -> + ehttpc_request(Worker, Method, Request, Timeout, MaxRetries); + {error, Reason} -> + ?SLOG(error, #{ + msg => "s3_request_fun_fail", + reason => Reason, + http_pool => HttpPool, + pool_type => PoolType, + method => Method, + request => Request, + timeout => Timeout, + max_retries => MaxRetries + }), + {error, Reason} + end + end) + end. + +ehttpc_request(HttpPool, Method, Request, Timeout, MaxRetries) -> + try timer:tc(fun() -> ehttpc:request(HttpPool, Method, Request, Timeout, MaxRetries) end) of + {Time, {ok, StatusCode, RespHeaders}} -> + ?SLOG(info, #{ + msg => "s3_ehttpc_request_ok", + status_code => StatusCode, + headers => RespHeaders, + time => Time + }), + {ok, { + {StatusCode, undefined}, headers_ehttpc_to_erlcloud_response(RespHeaders), undefined + }}; + {Time, {ok, StatusCode, RespHeaders, RespBody}} -> + ?SLOG(info, #{ + msg => "s3_ehttpc_request_ok", + status_code => StatusCode, + headers => RespHeaders, + body => RespBody, + time => Time + }), + {ok, { + {StatusCode, undefined}, headers_ehttpc_to_erlcloud_response(RespHeaders), RespBody + }}; + {Time, {error, Reason}} -> + ?SLOG(error, #{ + msg => "s3_ehttpc_request_fail", + reason => Reason, + timeout => Timeout, + pool => HttpPool, + method => Method, + time => Time + }), + {error, Reason} + catch + error:badarg -> + ?SLOG(error, #{ + msg => "s3_ehttpc_request_fail", + reason => badarg, + timeout => Timeout, + pool => HttpPool, + method => Method + }), + {error, no_ehttpc_pool}; + error:Reason -> + ?SLOG(error, #{ + msg => "s3_ehttpc_request_fail", + reason => Reason, + timeout => Timeout, + pool => HttpPool, + method => Method + }), + {error, Reason} + end. + +pick_worker_safe(HttpPool, PoolType) -> + try + {ok, pick_worker(HttpPool, PoolType)} + catch + error:badarg -> + {error, no_ehttpc_pool} + end. + +pick_worker(HttpPool, random) -> + ehttpc_pool:pick_worker(HttpPool); +pick_worker(HttpPool, hash) -> + ehttpc_pool:pick_worker(HttpPool, self()). + +-define(IS_BODY_EMPTY(Body), (Body =:= undefined orelse Body =:= <<>>)). +-define(NEEDS_NO_BODY(Method), (Method =:= get orelse Method =:= head orelse Method =:= delete)). + +make_request(Method, PathQuery, Headers, Body) when + ?IS_BODY_EMPTY(Body) andalso ?NEEDS_NO_BODY(Method) +-> + {PathQuery, Headers}; +make_request(_Method, PathQuery, Headers, Body) when ?IS_BODY_EMPTY(Body) -> + {PathQuery, [{<<"content-length">>, <<"0">>} | Headers], <<>>}; +make_request(_Method, PathQuery, Headers, Body) -> + {PathQuery, Headers, Body}. + +format_request({PathQuery, Headers, _Body}) -> {PathQuery, Headers, <<"...">>}; +format_request({PathQuery, Headers}) -> {PathQuery, Headers}. + +with_path_and_query_only(Url, Fun) -> + case string:split(Url, "//", leading) of + [_Scheme, UrlRem] -> + case string:split(UrlRem, "/", leading) of + [_HostPort, PathQuery] -> + Fun([$/ | PathQuery]); + _ -> + {error, {invalid_url, Url}} + end; + _ -> + {error, {invalid_url, Url}} + end. + +%% We need some header conversions to tie the emqx_s3, erlcloud and ehttpc APIs together. + +%% The request header flow is: + +%% UserHeaders -> [emqx_s3_client API] -> ErlcloudRequestHeaders0 -> +%% -> [erlcloud API] -> ErlcloudRequestHeaders1 -> [emqx_s3_client injected request_fun] -> +%% -> EhttpcRequestHeaders -> [ehttpc API] + +%% The response header flow is: + +%% [ehttpc API] -> EhttpcResponseHeaders -> [emqx_s3_client injected request_fun] -> +%% -> ErlcloudResponseHeaders0 -> [erlcloud API] -> [emqx_s3_client API] + +%% UserHeders (emqx_s3 API headers) are maps with string/binary keys. +%% ErlcloudRequestHeaders are lists of tuples with string keys and iodata values +%% ErlcloudResponseHeders are lists of tuples with lower case string keys and iodata values. +%% EhttpcHeaders are lists of tuples with binary keys and iodata values. + +%% Users provide headers as a map, but erlcloud expects a list of tuples with string keys and values. +headers_user_to_erlcloud_request(UserHeaders) -> + [{to_list_string(K), V} || {K, V} <- maps:to_list(UserHeaders)]. + +%% Ehttpc returns operates on headers as a list of tuples with binary keys. +%% Erlcloud expects a list of tuples with string values and lowcase string keys +%% from the underlying http library. +headers_ehttpc_to_erlcloud_response(EhttpcHeaders) -> + [{string:to_lower(to_list_string(K)), to_list_string(V)} || {K, V} <- EhttpcHeaders]. + +%% Ehttpc expects a list of tuples with binary keys. +%% Erlcloud provides a list of tuples with string keys. +headers_erlcloud_request_to_ehttpc(ErlcloudHeaders) -> + [{to_binary(K), V} || {K, V} <- ErlcloudHeaders]. + +join_headers(ErlcloudHeaders, UserSpecialHeaders) -> + ErlcloudHeaders ++ headers_user_to_erlcloud_request(UserSpecialHeaders). + +to_binary(Val) when is_list(Val) -> list_to_binary(Val); +to_binary(Val) when is_binary(Val) -> Val. + +to_list_string(Val) when is_binary(Val) -> + binary_to_list(Val); +to_list_string(Val) when is_list(Val) -> + Val. + +erlcloud_key(Characters) -> + binary_to_list(unicode:characters_to_binary(Characters)). + +response_property(Name, Props) -> + case proplists:get_value(Name, Props) of + undefined -> + %% This schould not happen for valid S3 implementations + ?SLOG(error, #{ + msg => "missing_s3_response_property", + name => Name, + props => Props + }), + error({missing_s3_response_property, Name}); + Value -> + Value + end. + +with_default(undefined, Default) -> Default; +with_default(Value, _Default) -> Value. diff --git a/apps/emqx_s3/src/emqx_s3_profile_conf.erl b/apps/emqx_s3/src/emqx_s3_profile_conf.erl new file mode 100644 index 000000000..87f006bcb --- /dev/null +++ b/apps/emqx_s3/src/emqx_s3_profile_conf.erl @@ -0,0 +1,388 @@ +%%-------------------------------------------------------------------- +%% Copyright (c) 2022-2023 EMQ Technologies Co., Ltd. All Rights Reserved. +%%-------------------------------------------------------------------- + +-module(emqx_s3_profile_conf). + +-behaviour(gen_server). + +-include_lib("emqx/include/logger.hrl"). +-include_lib("emqx/include/types.hrl"). + +-include_lib("snabbkaffe/include/snabbkaffe.hrl"). + +-include("src/emqx_s3.hrl"). + +-export([ + start_link/2, + child_spec/2 +]). + +-export([ + checkout_config/1, + checkout_config/2, + checkin_config/1, + checkin_config/2, + + update_config/2, + update_config/3 +]). + +-export([ + init/1, + handle_call/3, + handle_cast/2, + handle_info/2, + terminate/2, + code_change/3 +]). + +%% For test purposes +-export([ + client_config/2, + start_http_pool/2, + id/1 +]). + +-define(DEFAULT_CALL_TIMEOUT, 5000). + +-define(DEFAULT_HTTP_POOL_TIMEOUT, 60000). +-define(DEAFULT_HTTP_POOL_CLEANUP_INTERVAL, 60000). + +-define(SAFE_CALL_VIA_GPROC(ProfileId, Message, Timeout), + ?SAFE_CALL_VIA_GPROC(id(ProfileId), Message, Timeout, profile_not_found) +). + +-spec child_spec(emqx_s3:profile_id(), emqx_s3:profile_config()) -> supervisor:child_spec(). +child_spec(ProfileId, ProfileConfig) -> + #{ + id => ProfileId, + start => {?MODULE, start_link, [ProfileId, ProfileConfig]}, + restart => permanent, + shutdown => 5000, + type => worker, + modules => [?MODULE] + }. + +-spec start_link(emqx_s3:profile_id(), emqx_s3:profile_config()) -> gen_server:start_ret(). +start_link(ProfileId, ProfileConfig) -> + gen_server:start_link(?VIA_GPROC(id(ProfileId)), ?MODULE, [ProfileId, ProfileConfig], []). + +-spec update_config(emqx_s3:profile_id(), emqx_s3:profile_config()) -> ok_or_error(term()). +update_config(ProfileId, ProfileConfig) -> + update_config(ProfileId, ProfileConfig, ?DEFAULT_CALL_TIMEOUT). + +-spec update_config(emqx_s3:profile_id(), emqx_s3:profile_config(), timeout()) -> + ok_or_error(term()). +update_config(ProfileId, ProfileConfig, Timeout) -> + ?SAFE_CALL_VIA_GPROC(ProfileId, {update_config, ProfileConfig}, Timeout). + +-spec checkout_config(emqx_s3:profile_id()) -> + {ok, emqx_s3_client:config(), emqx_s3_uploader:config()} | {error, profile_not_found}. +checkout_config(ProfileId) -> + checkout_config(ProfileId, ?DEFAULT_CALL_TIMEOUT). + +-spec checkout_config(emqx_s3:profile_id(), timeout()) -> + {ok, emqx_s3_client:config(), emqx_s3_uploader:config()} | {error, profile_not_found}. +checkout_config(ProfileId, Timeout) -> + ?SAFE_CALL_VIA_GPROC(ProfileId, {checkout_config, self()}, Timeout). + +-spec checkin_config(emqx_s3:profile_id()) -> ok | {error, profile_not_found}. +checkin_config(ProfileId) -> + checkin_config(ProfileId, ?DEFAULT_CALL_TIMEOUT). + +-spec checkin_config(emqx_s3:profile_id(), timeout()) -> ok | {error, profile_not_found}. +checkin_config(ProfileId, Timeout) -> + ?SAFE_CALL_VIA_GPROC(ProfileId, {checkin_config, self()}, Timeout). + +%%-------------------------------------------------------------------- +%% gen_server callbacks +%%-------------------------------------------------------------------- + +init([ProfileId, ProfileConfig]) -> + _ = process_flag(trap_exit, true), + ok = cleanup_profile_pools(ProfileId), + case start_http_pool(ProfileId, ProfileConfig) of + {ok, PoolName} -> + HttpPoolCleanupInterval = http_pool_cleanup_interval(ProfileConfig), + {ok, #{ + profile_id => ProfileId, + profile_config => ProfileConfig, + client_config => client_config(ProfileConfig, PoolName), + uploader_config => uploader_config(ProfileConfig), + pool_name => PoolName, + pool_clients => emqx_s3_profile_http_pool_clients:create_table(), + %% We don't expose these options to users currently, but use in tests + http_pool_timeout => http_pool_timeout(ProfileConfig), + http_pool_cleanup_interval => HttpPoolCleanupInterval, + + outdated_pool_cleanup_tref => erlang:send_after( + HttpPoolCleanupInterval, self(), cleanup_outdated + ) + }}; + {error, Reason} -> + {stop, Reason} + end. + +handle_call( + {checkout_config, Pid}, + _From, + #{ + client_config := ClientConfig, + uploader_config := UploaderConfig + } = State +) -> + ok = register_client(Pid, State), + {reply, {ok, ClientConfig, UploaderConfig}, State}; +handle_call({checkin_config, Pid}, _From, State) -> + ok = unregister_client(Pid, State), + {reply, ok, State}; +handle_call( + {update_config, NewProfileConfig}, + _From, + #{profile_id := ProfileId} = State +) -> + case update_http_pool(ProfileId, NewProfileConfig, State) of + {ok, PoolName} -> + NewState = State#{ + profile_config => NewProfileConfig, + client_config => client_config(NewProfileConfig, PoolName), + uploader_config => uploader_config(NewProfileConfig), + http_pool_timeout => http_pool_timeout(NewProfileConfig), + http_pool_cleanup_interval => http_pool_cleanup_interval(NewProfileConfig), + pool_name => PoolName + }, + {reply, ok, NewState}; + {error, Reason} -> + {reply, {error, Reason}, State} + end; +handle_call(_Request, _From, State) -> + {reply, {error, not_implemented}, State}. + +handle_cast(_Request, State) -> + {noreply, State}. + +handle_info({'DOWN', _Ref, process, Pid, _Reason}, State) -> + ok = unregister_client(Pid, State), + {noreply, State}; +handle_info(cleanup_outdated, #{http_pool_cleanup_interval := HttpPoolCleanupInterval} = State0) -> + %% Maybe cleanup asynchoronously + ok = cleanup_outdated_pools(State0), + State1 = State0#{ + outdated_pool_cleanup_tref => erlang:send_after( + HttpPoolCleanupInterval, self(), cleanup_outdated + ) + }, + {noreply, State1}; +handle_info(_Info, State) -> + {noreply, State}. + +terminate(_Reason, #{profile_id := ProfileId}) -> + cleanup_profile_pools(ProfileId). + +code_change(_OldVsn, State, _Extra) -> + {ok, State}. + +%%-------------------------------------------------------------------- +%% Internal functions +%%-------------------------------------------------------------------- + +id(ProfileId) -> + {?MODULE, ProfileId}. + +client_config(ProfileConfig, PoolName) -> + HTTPOpts = maps:get(transport_options, ProfileConfig, #{}), + #{ + scheme => scheme(HTTPOpts), + host => maps:get(host, ProfileConfig), + port => maps:get(port, ProfileConfig), + url_expire_time => maps:get(url_expire_time, ProfileConfig), + headers => maps:get(headers, HTTPOpts, #{}), + acl => maps:get(acl, ProfileConfig, undefined), + bucket => maps:get(bucket, ProfileConfig), + access_key_id => maps:get(access_key_id, ProfileConfig, undefined), + secret_access_key => maps:get(secret_access_key, ProfileConfig, undefined), + request_timeout => maps:get(request_timeout, HTTPOpts, undefined), + max_retries => maps:get(max_retries, HTTPOpts, undefined), + pool_type => maps:get(pool_type, HTTPOpts, random), + http_pool => PoolName + }. + +uploader_config(#{max_part_size := MaxPartSize, min_part_size := MinPartSize} = _ProfileConfig) -> + #{ + min_part_size => MinPartSize, + max_part_size => MaxPartSize + }. + +scheme(#{ssl := #{enable := true}}) -> "https://"; +scheme(_TransportOpts) -> "http://". + +start_http_pool(ProfileId, ProfileConfig) -> + HttpConfig = http_config(ProfileConfig), + PoolName = pool_name(ProfileId), + case do_start_http_pool(PoolName, HttpConfig) of + ok -> + ok = emqx_s3_profile_http_pools:register(ProfileId, PoolName), + ok = ?tp(debug, "s3_start_http_pool", #{pool_name => PoolName, profile_id => ProfileId}), + {ok, PoolName}; + {error, _} = Error -> + Error + end. + +update_http_pool(ProfileId, ProfileConfig, #{pool_name := OldPoolName} = State) -> + HttpConfig = http_config(ProfileConfig), + OldHttpConfig = old_http_config(State), + case OldHttpConfig =:= HttpConfig of + true -> + {ok, OldPoolName}; + false -> + PoolName = pool_name(ProfileId), + case do_start_http_pool(PoolName, HttpConfig) of + ok -> + ok = set_old_pool_outdated(State), + ok = emqx_s3_profile_http_pools:register(ProfileId, PoolName), + {ok, PoolName}; + {error, _} = Error -> + Error + end + end. + +pool_name(ProfileId) -> + iolist_to_binary([ + <<"s3-http-">>, + profile_id_to_bin(ProfileId), + <<"-">>, + integer_to_binary(erlang:system_time(millisecond)), + <<"-">>, + integer_to_binary(erlang:unique_integer([positive])) + ]). +profile_id_to_bin(Atom) when is_atom(Atom) -> atom_to_binary(Atom, utf8); +profile_id_to_bin(Bin) when is_binary(Bin) -> Bin. + +old_http_config(#{profile_config := ProfileConfig}) -> http_config(ProfileConfig). + +set_old_pool_outdated(#{ + profile_id := ProfileId, pool_name := PoolName, http_pool_timeout := HttpPoolTimeout +}) -> + _ = emqx_s3_profile_http_pools:set_outdated(ProfileId, PoolName, HttpPoolTimeout), + ok. + +cleanup_profile_pools(ProfileId) -> + lists:foreach( + fun(PoolName) -> + ok = stop_http_pool(ProfileId, PoolName) + end, + emqx_s3_profile_http_pools:all(ProfileId) + ). + +register_client(Pid, #{profile_id := ProfileId, pool_clients := PoolClients, pool_name := PoolName}) -> + MRef = monitor(process, Pid), + ok = emqx_s3_profile_http_pool_clients:register(PoolClients, Pid, MRef, PoolName), + _ = emqx_s3_profile_http_pools:register_client(ProfileId, PoolName), + ok. + +unregister_client( + Pid, + #{ + profile_id := ProfileId, pool_clients := PoolClients, pool_name := PoolName + } +) -> + case emqx_s3_profile_http_pool_clients:unregister(PoolClients, Pid) of + undefined -> + ok; + {MRef, PoolName} -> + true = erlang:demonitor(MRef, [flush]), + _ = emqx_s3_profile_http_pools:unregister_client(ProfileId, PoolName), + ok; + {MRef, OutdatedPoolName} -> + true = erlang:demonitor(MRef, [flush]), + ClientNum = emqx_s3_profile_http_pools:unregister_client(ProfileId, OutdatedPoolName), + maybe_stop_outdated_pool(ProfileId, OutdatedPoolName, ClientNum) + end. + +maybe_stop_outdated_pool(ProfileId, OutdatedPoolName, 0) -> + ok = stop_http_pool(ProfileId, OutdatedPoolName); +maybe_stop_outdated_pool(_ProfileId, _OutdatedPoolName, _ClientNum) -> + ok. + +cleanup_outdated_pools(#{profile_id := ProfileId}) -> + lists:foreach( + fun(PoolName) -> + ok = stop_http_pool(ProfileId, PoolName) + end, + emqx_s3_profile_http_pools:outdated(ProfileId) + ). + +%%-------------------------------------------------------------------- +%% HTTP Pool implementation dependent functions +%%-------------------------------------------------------------------- + +http_config( + #{ + host := Host, + port := Port, + transport_options := #{ + pool_type := PoolType, + pool_size := PoolSize, + enable_pipelining := EnablePipelining, + connect_timeout := ConnectTimeout + } = HTTPOpts + } +) -> + {Transport, TransportOpts} = + case scheme(HTTPOpts) of + "http://" -> + {tcp, []}; + "https://" -> + SSLOpts = emqx_tls_lib:to_client_opts(maps:get(ssl, HTTPOpts)), + {tls, SSLOpts} + end, + NTransportOpts = maybe_ipv6_probe(TransportOpts, maps:get(ipv6_probe, HTTPOpts, true)), + [ + {host, Host}, + {port, Port}, + {connect_timeout, ConnectTimeout}, + {keepalive, 30000}, + {pool_type, PoolType}, + {pool_size, PoolSize}, + {transport, Transport}, + {transport_opts, NTransportOpts}, + {enable_pipelining, EnablePipelining} + ]. + +maybe_ipv6_probe(TransportOpts, true) -> + emqx_utils:ipv6_probe(TransportOpts); +maybe_ipv6_probe(TransportOpts, false) -> + TransportOpts. + +http_pool_cleanup_interval(ProfileConfig) -> + maps:get( + http_pool_cleanup_interval, ProfileConfig, ?DEAFULT_HTTP_POOL_CLEANUP_INTERVAL + ). + +http_pool_timeout(ProfileConfig) -> + maps:get( + http_pool_timeout, ProfileConfig, ?DEFAULT_HTTP_POOL_TIMEOUT + ). + +stop_http_pool(ProfileId, PoolName) -> + case ehttpc_sup:stop_pool(PoolName) of + ok -> + ok; + {error, Reason} -> + ?SLOG(error, #{msg => "ehttpc_pool_stop_fail", pool_name => PoolName, reason => Reason}), + ok + end, + ok = emqx_s3_profile_http_pools:unregister(ProfileId, PoolName), + ok = ?tp(debug, "s3_stop_http_pool", #{pool_name => PoolName}). + +do_start_http_pool(PoolName, HttpConfig) -> + ?SLOG(warning, #{msg => "s3_start_http_pool", pool_name => PoolName, config => HttpConfig}), + case ehttpc_sup:start_pool(PoolName, HttpConfig) of + {ok, _} -> + ?SLOG(warning, #{msg => "s3_start_http_pool_success", pool_name => PoolName}), + ok; + {error, _} = Error -> + ?SLOG(error, #{msg => "s3_start_http_pool_fail", pool_name => PoolName, error => Error}), + Error + end. diff --git a/apps/emqx_s3/src/emqx_s3_profile_http_pool_clients.erl b/apps/emqx_s3/src/emqx_s3_profile_http_pool_clients.erl new file mode 100644 index 000000000..19e4d2ddb --- /dev/null +++ b/apps/emqx_s3/src/emqx_s3_profile_http_pool_clients.erl @@ -0,0 +1,36 @@ +%%-------------------------------------------------------------------- +%% Copyright (c) 2022-2023 EMQ Technologies Co., Ltd. All Rights Reserved. +%%-------------------------------------------------------------------- + +-module(emqx_s3_profile_http_pool_clients). + +-export([ + create_table/0, + + register/4, + unregister/2 +]). + +-define(TAB, ?MODULE). + +-spec create_table() -> ok. +create_table() -> + ets:new(?TAB, [ + private, + set + ]). + +-spec register(ets:tid(), pid(), reference(), emqx_s3_profile_http_pools:pool_name()) -> ok. +register(Tab, Pid, MRef, PoolName) -> + true = ets:insert(Tab, {Pid, {MRef, PoolName}}), + ok. + +-spec unregister(ets:tid(), pid()) -> + {reference(), emqx_s3_profile_http_pools:pool_name()} | undefined. +unregister(Tab, Pid) -> + case ets:take(Tab, Pid) of + [{Pid, {MRef, PoolName}}] -> + {MRef, PoolName}; + [] -> + undefined + end. diff --git a/apps/emqx_s3/src/emqx_s3_profile_http_pools.erl b/apps/emqx_s3/src/emqx_s3_profile_http_pools.erl new file mode 100644 index 000000000..944f2037d --- /dev/null +++ b/apps/emqx_s3/src/emqx_s3_profile_http_pools.erl @@ -0,0 +1,124 @@ +%%-------------------------------------------------------------------- +%% Copyright (c) 2022-2023 EMQ Technologies Co., Ltd. All Rights Reserved. +%%-------------------------------------------------------------------- + +-module(emqx_s3_profile_http_pools). + +-include_lib("stdlib/include/ms_transform.hrl"). + +-export([ + create_table/0, + + register/2, + unregister/2, + + register_client/2, + unregister_client/2, + + set_outdated/3, + + outdated/1, + all/1 +]). + +-export_type([pool_name/0]). + +-define(TAB, ?MODULE). + +-type pool_name() :: ecpool:pool_name(). + +-type pool_key() :: {emqx_s3:profile_id(), pool_name()}. + +-record(pool, { + key :: pool_key(), + client_count = 0 :: integer(), + deadline = undefined :: undefined | integer(), + extra = #{} :: map() +}). + +-spec create_table() -> ok. +create_table() -> + _ = ets:new(?TAB, [ + named_table, + public, + ordered_set, + {keypos, #pool.key}, + {read_concurrency, true}, + {write_concurrency, true} + ]), + ok. + +-spec register(emqx_s3:profile_id(), pool_name()) -> + ok. +register(ProfileId, PoolName) -> + Key = key(ProfileId, PoolName), + true = ets:insert(?TAB, #pool{ + key = Key, + client_count = 0, + deadline = undefined, + extra = #{} + }), + ok. + +-spec unregister(emqx_s3:profile_id(), pool_name()) -> + ok. +unregister(ProfileId, PoolName) -> + Key = key(ProfileId, PoolName), + true = ets:delete(?TAB, Key), + ok. + +-spec register_client(emqx_s3:profile_id(), pool_name()) -> + integer(). +register_client(ProfileId, PoolName) -> + Key = key(ProfileId, PoolName), + ets:update_counter(?TAB, Key, {#pool.client_count, 1}). + +-spec unregister_client(emqx_s3:profile_id(), pool_name()) -> + integer(). +unregister_client(ProfileId, PoolName) -> + Key = key(ProfileId, PoolName), + try + ets:update_counter(?TAB, Key, {#pool.client_count, -1}) + catch + error:badarg -> + undefined + end. + +-spec set_outdated(emqx_s3:profile_id(), pool_name(), integer()) -> + ok. +set_outdated(ProfileId, PoolName, Timeout) -> + Key = key(ProfileId, PoolName), + Now = erlang:monotonic_time(millisecond), + _ = ets:update_element(?TAB, Key, {#pool.deadline, Now + Timeout}), + ok. + +-spec outdated(emqx_s3:profile_id()) -> + [pool_name()]. +outdated(ProfileId) -> + Now = erlang:monotonic_time(millisecond), + MS = ets:fun2ms( + fun(#pool{key = {CurProfileId, CurPoolName}, deadline = CurDeadline}) when + CurProfileId =:= ProfileId andalso + CurDeadline =/= undefined andalso CurDeadline < Now + -> + CurPoolName + end + ), + ets:select(?TAB, MS). + +-spec all(emqx_s3:profile_id()) -> + [pool_name()]. +all(ProfileId) -> + MS = ets:fun2ms( + fun(#pool{key = {CurProfileId, CurPoolName}}) when CurProfileId =:= ProfileId -> + CurPoolName + end + ), + ets:select(?TAB, MS). + +%%-------------------------------------------------------------------- +%% Helpers +%%-------------------------------------------------------------------- + +key(ProfileId, PoolName) -> + {ProfileId, PoolName}. diff --git a/apps/emqx_s3/src/emqx_s3_profile_sup.erl b/apps/emqx_s3/src/emqx_s3_profile_sup.erl new file mode 100644 index 000000000..c39fc9f4b --- /dev/null +++ b/apps/emqx_s3/src/emqx_s3_profile_sup.erl @@ -0,0 +1,48 @@ +%%-------------------------------------------------------------------- +%% Copyright (c) 2022-2023 EMQ Technologies Co., Ltd. All Rights Reserved. +%%-------------------------------------------------------------------- + +-module(emqx_s3_profile_sup). + +-behaviour(supervisor). + +-include_lib("emqx/include/types.hrl"). + +-export([ + start_link/2, + child_spec/2 +]). + +-export([init/1]). + +-spec start_link(emqx_s3:profile_id(), emqx_s3:profile_config()) -> supervisor:start_ret(). +start_link(ProfileId, ProfileConfig) -> + supervisor:start_link(?MODULE, [ProfileId, ProfileConfig]). + +-spec child_spec(emqx_s3:profile_id(), emqx_s3:profile_config()) -> supervisor:child_spec(). +child_spec(ProfileId, ProfileConfig) -> + #{ + id => ProfileId, + start => {?MODULE, start_link, [ProfileId, ProfileConfig]}, + restart => permanent, + shutdown => 5000, + type => supervisor, + modules => [?MODULE] + }. + +%%-------------------------------------------------------------------- +%% supervisor callbacks +%%------------------------------------------------------------------- + +init([ProfileId, ProfileConfig]) -> + SupFlags = #{ + strategy => one_for_one, + intensity => 10, + period => 5 + }, + ChildSpecs = [ + %% Order matters + emqx_s3_profile_conf:child_spec(ProfileId, ProfileConfig), + emqx_s3_profile_uploader_sup:child_spec(ProfileId) + ], + {ok, {SupFlags, ChildSpecs}}. diff --git a/apps/emqx_s3/src/emqx_s3_profile_uploader_sup.erl b/apps/emqx_s3/src/emqx_s3_profile_uploader_sup.erl new file mode 100644 index 000000000..fb7b93a15 --- /dev/null +++ b/apps/emqx_s3/src/emqx_s3_profile_uploader_sup.erl @@ -0,0 +1,75 @@ +%%-------------------------------------------------------------------- +%% Copyright (c) 2022-2023 EMQ Technologies Co., Ltd. All Rights Reserved. +%%-------------------------------------------------------------------- + +-module(emqx_s3_profile_uploader_sup). + +-behaviour(supervisor). + +-include_lib("emqx/include/types.hrl"). +-include_lib("emqx/include/logger.hrl"). + +-include("src/emqx_s3.hrl"). + +-export([ + start_link/1, + child_spec/1, + id/1, + start_uploader/2 +]). + +-export([init/1]). + +-export_type([id/0]). + +-type id() :: {?MODULE, emqx_s3:profile_id()}. + +-spec start_link(emqx_s3:profile_id()) -> supervisor:start_ret(). +start_link(ProfileId) -> + supervisor:start_link(?VIA_GPROC(id(ProfileId)), ?MODULE, [ProfileId]). + +-spec child_spec(emqx_s3:profile_id()) -> supervisor:child_spec(). +child_spec(ProfileId) -> + #{ + id => id(ProfileId), + start => {?MODULE, start_link, [ProfileId]}, + restart => permanent, + shutdown => 5000, + type => supervisor, + modules => [?MODULE] + }. + +-spec id(emqx_s3:profile_id()) -> id(). +id(ProfileId) -> + {?MODULE, ProfileId}. + +-spec start_uploader(emqx_s3:profile_id(), emqx_s3_uploader:opts()) -> + supervisor:start_ret() | {error, profile_not_found}. +start_uploader(ProfileId, Opts) -> + try supervisor:start_child(?VIA_GPROC(id(ProfileId)), [Opts]) of + Result -> Result + catch + exit:{noproc, _} -> {error, profile_not_found} + end. + +%%-------------------------------------------------------------------- +%% supervisor callbacks +%%------------------------------------------------------------------- + +init([ProfileId]) -> + SupFlags = #{ + strategy => simple_one_for_one, + intensity => 10, + period => 5 + }, + ChildSpecs = [ + #{ + id => emqx_s3_uploader, + start => {emqx_s3_uploader, start_link, [ProfileId]}, + restart => temporary, + shutdown => 5000, + type => worker, + modules => [emqx_s3_uploader] + } + ], + {ok, {SupFlags, ChildSpecs}}. diff --git a/apps/emqx_s3/src/emqx_s3_schema.erl b/apps/emqx_s3/src/emqx_s3_schema.erl new file mode 100644 index 000000000..5866f8c2b --- /dev/null +++ b/apps/emqx_s3/src/emqx_s3_schema.erl @@ -0,0 +1,172 @@ +%%-------------------------------------------------------------------- +%% Copyright (c) 2022-2023 EMQ Technologies Co., Ltd. All Rights Reserved. +%%-------------------------------------------------------------------- + +-module(emqx_s3_schema). + +-include_lib("typerefl/include/types.hrl"). +-include_lib("hocon/include/hoconsc.hrl"). + +-import(hoconsc, [mk/2, ref/2]). + +-export([roots/0, fields/1, namespace/0, tags/0, desc/1]). + +-export([translate/1]). +-export([translate/2]). + +roots() -> + [s3]. + +namespace() -> "s3". + +tags() -> + [<<"S3">>]. + +fields(s3) -> + [ + {access_key_id, + mk( + string(), + #{ + desc => ?DESC("access_key_id"), + required => false + } + )}, + {secret_access_key, + mk( + string(), + #{ + desc => ?DESC("secret_access_key"), + required => false, + sensitive => true + } + )}, + {bucket, + mk( + string(), + #{ + desc => ?DESC("bucket"), + required => true + } + )}, + {host, + mk( + string(), + #{ + desc => ?DESC("host"), + required => true + } + )}, + {port, + mk( + pos_integer(), + #{ + desc => ?DESC("port"), + required => true + } + )}, + {url_expire_time, + mk( + emqx_schema:duration_s(), + #{ + default => "1h", + desc => ?DESC("url_expire_time"), + required => false + } + )}, + {min_part_size, + mk( + emqx_schema:bytesize(), + #{ + default => "5mb", + desc => ?DESC("min_part_size"), + required => true, + validator => fun part_size_validator/1 + } + )}, + {max_part_size, + mk( + emqx_schema:bytesize(), + #{ + default => "5gb", + desc => ?DESC("max_part_size"), + required => true, + validator => fun part_size_validator/1 + } + )}, + {acl, + mk( + hoconsc:enum([ + private, + public_read, + public_read_write, + authenticated_read, + bucket_owner_read, + bucket_owner_full_control + ]), + #{ + desc => ?DESC("acl"), + required => false + } + )}, + {transport_options, + mk( + ref(?MODULE, transport_options), + #{ + desc => ?DESC("transport_options"), + required => false + } + )} + ]; +fields(transport_options) -> + [ + {ipv6_probe, + mk( + boolean(), + #{ + default => true, + desc => ?DESC("ipv6_probe"), + required => false + } + )} + ] ++ + props_without( + [base_url, max_retries, retry_interval, request], emqx_connector_http:fields(config) + ) ++ + props_with( + [headers, max_retries, request_timeout], emqx_connector_http:fields("request") + ). + +desc(s3) -> + "S3 connection options"; +desc(transport_options) -> + "Options for the HTTP transport layer used by the S3 client". + +translate(Conf) -> + translate(Conf, #{}). + +translate(Conf, OptionsIn) -> + Options = maps:merge(#{atom_key => true}, OptionsIn), + #{s3 := TranslatedConf} = hocon_tconf:check_plain( + emqx_s3_schema, #{<<"s3">> => Conf}, Options, [s3] + ), + TranslatedConf. + +%%-------------------------------------------------------------------- +%% Helpers +%%-------------------------------------------------------------------- + +props_with(Keys, Proplist) -> + lists:filter(fun({K, _}) -> lists:member(K, Keys) end, Proplist). + +props_without(Keys, Proplist) -> + lists:filter(fun({K, _}) -> not lists:member(K, Keys) end, Proplist). + +part_size_validator(PartSizeLimit) -> + case + PartSizeLimit >= 5 * 1024 * 1024 andalso + PartSizeLimit =< 5 * 1024 * 1024 * 1024 + of + true -> ok; + false -> {error, "must be at least 5mb and less than 5gb"} + end. diff --git a/apps/emqx_s3/src/emqx_s3_sup.erl b/apps/emqx_s3/src/emqx_s3_sup.erl new file mode 100644 index 000000000..0f6b0160b --- /dev/null +++ b/apps/emqx_s3/src/emqx_s3_sup.erl @@ -0,0 +1,47 @@ +%%-------------------------------------------------------------------- +%% Copyright (c) 2022-2023 EMQ Technologies Co., Ltd. All Rights Reserved. +%%-------------------------------------------------------------------- + +-module(emqx_s3_sup). + +-behaviour(supervisor). + +-include_lib("emqx/include/types.hrl"). + +-export([ + start_link/0, + start_profile/2, + stop_profile/1 +]). + +-export([init/1]). + +-spec start_link() -> supervisor:start_ret(). +start_link() -> + supervisor:start_link({local, ?MODULE}, ?MODULE, []). + +-spec start_profile(emqx_s3:profile_id(), emqx_s3:profile_config()) -> supervisor:startchild_ret(). +start_profile(ProfileId, ProfileConfig) -> + supervisor:start_child(?MODULE, emqx_s3_profile_sup:child_spec(ProfileId, ProfileConfig)). + +-spec stop_profile(emqx_s3:profile_id()) -> ok_or_error(term()). +stop_profile(ProfileId) -> + case supervisor:terminate_child(?MODULE, ProfileId) of + ok -> + supervisor:delete_child(?MODULE, ProfileId); + {error, Reason} -> + {error, Reason} + end. + +%%-------------------------------------------------------------------- +%% supervisor callbacks +%%------------------------------------------------------------------- + +init([]) -> + ok = emqx_s3_profile_http_pools:create_table(), + SupFlags = #{ + strategy => one_for_one, + intensity => 10, + period => 5 + }, + {ok, {SupFlags, []}}. diff --git a/apps/emqx_s3/src/emqx_s3_uploader.erl b/apps/emqx_s3/src/emqx_s3_uploader.erl new file mode 100644 index 000000000..595612f62 --- /dev/null +++ b/apps/emqx_s3/src/emqx_s3_uploader.erl @@ -0,0 +1,329 @@ +%%-------------------------------------------------------------------- +%% Copyright (c) 2022-2023 EMQ Technologies Co., Ltd. All Rights Reserved. +%%-------------------------------------------------------------------- + +-module(emqx_s3_uploader). + +-include_lib("emqx/include/types.hrl"). + +-behaviour(gen_statem). + +-export([ + start_link/2, + + write/2, + write/3, + + complete/1, + complete/2, + + abort/1, + abort/2 +]). + +-export([ + init/1, + callback_mode/0, + handle_event/4, + terminate/3, + code_change/4, + format_status/1, + format_status/2 +]). + +-export_type([opts/0]). + +-type opts() :: #{ + key := string(), + headers => emqx_s3_client:headers() +}. + +-type data() :: #{ + profile_id := emqx_s3:profile_id(), + client := emqx_s3_client:client(), + key := emqx_s3_client:key(), + buffer := iodata(), + buffer_size := non_neg_integer(), + min_part_size := pos_integer(), + max_part_size := pos_integer(), + upload_id := undefined | emqx_s3_client:upload_id(), + etags := [emqx_s3_client:etag()], + part_number := emqx_s3_client:part_number(), + headers := emqx_s3_client:headers() +}. + +%% 5MB +-define(DEFAULT_MIN_PART_SIZE, 5242880). +%% 5GB +-define(DEFAULT_MAX_PART_SIZE, 5368709120). + +-define(DEFAULT_TIMEOUT, 30000). + +-spec start_link(emqx_s3:profile_id(), opts()) -> gen_statem:start_ret(). +start_link(ProfileId, #{key := Key} = Opts) when is_list(Key) -> + gen_statem:start_link(?MODULE, [ProfileId, Opts], []). + +-spec write(pid(), iodata()) -> ok_or_error(term()). +write(Pid, WriteData) -> + write(Pid, WriteData, ?DEFAULT_TIMEOUT). + +-spec write(pid(), iodata(), timeout()) -> ok_or_error(term()). +write(Pid, WriteData, Timeout) -> + gen_statem:call(Pid, {write, wrap(WriteData)}, Timeout). + +-spec complete(pid()) -> ok_or_error(term()). +complete(Pid) -> + complete(Pid, ?DEFAULT_TIMEOUT). + +-spec complete(pid(), timeout()) -> ok_or_error(term()). +complete(Pid, Timeout) -> + gen_statem:call(Pid, complete, Timeout). + +-spec abort(pid()) -> ok_or_error(term()). +abort(Pid) -> + abort(Pid, ?DEFAULT_TIMEOUT). + +-spec abort(pid(), timeout()) -> ok_or_error(term()). +abort(Pid, Timeout) -> + gen_statem:call(Pid, abort, Timeout). + +%%-------------------------------------------------------------------- +%% gen_statem callbacks +%%-------------------------------------------------------------------- + +callback_mode() -> handle_event_function. + +init([ProfileId, #{key := Key} = Opts]) -> + process_flag(trap_exit, true), + {ok, ClientConfig, UploaderConfig} = emqx_s3_profile_conf:checkout_config(ProfileId), + Client = client(ClientConfig), + {ok, upload_not_started, #{ + profile_id => ProfileId, + client => Client, + headers => maps:get(headers, Opts, #{}), + key => Key, + buffer => [], + buffer_size => 0, + min_part_size => maps:get(min_part_size, UploaderConfig, ?DEFAULT_MIN_PART_SIZE), + max_part_size => maps:get(max_part_size, UploaderConfig, ?DEFAULT_MAX_PART_SIZE), + upload_id => undefined, + etags => [], + part_number => 1 + }}. + +handle_event({call, From}, {write, WriteDataWrapped}, State, Data0) -> + WriteData = unwrap(WriteDataWrapped), + case is_valid_part(WriteData, Data0) of + true -> + handle_write(State, From, WriteData, Data0); + false -> + {keep_state_and_data, {reply, From, {error, {too_large, iolist_size(WriteData)}}}} + end; +handle_event({call, From}, complete, upload_not_started, Data0) -> + case put_object(Data0) of + ok -> + {stop_and_reply, normal, {reply, From, ok}}; + {error, _} = Error -> + {stop_and_reply, Error, {reply, From, Error}, Data0} + end; +handle_event({call, From}, complete, upload_started, Data0) -> + case complete_upload(Data0) of + {ok, Data1} -> + {stop_and_reply, normal, {reply, From, ok}, Data1}; + {error, _} = Error -> + {stop_and_reply, Error, {reply, From, Error}, Data0} + end; +handle_event({call, From}, abort, upload_not_started, _Data) -> + {stop_and_reply, normal, {reply, From, ok}}; +handle_event({call, From}, abort, upload_started, Data0) -> + case abort_upload(Data0) of + ok -> + {stop_and_reply, normal, {reply, From, ok}}; + {error, _} = Error -> + {stop_and_reply, Error, {reply, From, Error}, Data0} + end. + +handle_write(upload_not_started, From, WriteData, Data0) -> + Data1 = append_buffer(Data0, WriteData), + case maybe_start_upload(Data1) of + not_started -> + {keep_state, Data1, {reply, From, ok}}; + {started, Data2} -> + case upload_part(Data2) of + {ok, Data3} -> + {next_state, upload_started, Data3, {reply, From, ok}}; + {error, _} = Error -> + {stop_and_reply, Error, {reply, From, Error}, Data2} + end; + {error, _} = Error -> + {stop_and_reply, Error, {reply, From, Error}, Data1} + end; +handle_write(upload_started, From, WriteData, Data0) -> + Data1 = append_buffer(Data0, WriteData), + case maybe_upload_part(Data1) of + {ok, Data2} -> + {keep_state, Data2, {reply, From, ok}}; + {error, _} = Error -> + {stop_and_reply, Error, {reply, From, Error}, Data1} + end. + +terminate(Reason, _State, #{client := Client, upload_id := UploadId, key := Key}) when + (UploadId =/= undefined) andalso (Reason =/= normal) +-> + emqx_s3_client:abort_multipart(Client, Key, UploadId); +terminate(_Reason, _State, _Data) -> + ok. + +code_change(_OldVsn, StateName, State, _Extra) -> + {ok, StateName, State}. + +format_status(#{data := #{client := Client} = Data} = Status) -> + Status#{ + data => Data#{ + client => emqx_s3_client:format(Client), + buffer => [<<"...">>] + } + }. + +format_status(_Opt, [PDict, State, #{client := Client} = Data]) -> + #{ + data => Data#{ + client => emqx_s3_client:format(Client), + buffer => [<<"...">>] + }, + state => State, + pdict => PDict + }. + +%%-------------------------------------------------------------------- +%% Internal functions +%%-------------------------------------------------------------------- + +-spec maybe_start_upload(data()) -> not_started | {started, data()} | {error, term()}. +maybe_start_upload(#{buffer_size := BufferSize, min_part_size := MinPartSize} = Data) -> + case BufferSize >= MinPartSize of + true -> + start_upload(Data); + false -> + not_started + end. + +-spec start_upload(data()) -> {started, data()} | {error, term()}. +start_upload(#{client := Client, key := Key, headers := Headers} = Data) -> + case emqx_s3_client:start_multipart(Client, Headers, Key) of + {ok, UploadId} -> + NewData = Data#{upload_id => UploadId}, + {started, NewData}; + {error, _} = Error -> + Error + end. + +-spec maybe_upload_part(data()) -> ok_or_error(data(), term()). +maybe_upload_part(#{buffer_size := BufferSize, min_part_size := MinPartSize} = Data) -> + case BufferSize >= MinPartSize of + true -> + upload_part(Data); + false -> + {ok, Data} + end. + +-spec upload_part(data()) -> ok_or_error(data(), term()). +upload_part(#{buffer_size := 0} = Data) -> + {ok, Data}; +upload_part( + #{ + client := Client, + key := Key, + upload_id := UploadId, + buffer := Buffer, + part_number := PartNumber, + etags := ETags + } = Data +) -> + case emqx_s3_client:upload_part(Client, Key, UploadId, PartNumber, Buffer) of + {ok, ETag} -> + NewData = Data#{ + buffer => [], + buffer_size => 0, + part_number => PartNumber + 1, + etags => [{PartNumber, ETag} | ETags] + }, + {ok, NewData}; + {error, _} = Error -> + Error + end. + +-spec complete_upload(data()) -> ok_or_error(data(), term()). +complete_upload( + #{ + client := Client, + key := Key, + upload_id := UploadId + } = Data0 +) -> + case upload_part(Data0) of + {ok, #{etags := ETags} = Data1} -> + case + emqx_s3_client:complete_multipart( + Client, Key, UploadId, lists:reverse(ETags) + ) + of + ok -> + {ok, Data1}; + {error, _} = Error -> + Error + end; + {error, _} = Error -> + Error + end. + +-spec abort_upload(data()) -> ok_or_error(term()). +abort_upload( + #{ + client := Client, + key := Key, + upload_id := UploadId + } +) -> + case emqx_s3_client:abort_multipart(Client, Key, UploadId) of + ok -> + ok; + {error, _} = Error -> + Error + end. + +-spec put_object(data()) -> ok_or_error(term()). +put_object( + #{ + client := Client, + key := Key, + buffer := Buffer, + headers := Headers + } +) -> + case emqx_s3_client:put_object(Client, Headers, Key, Buffer) of + ok -> + ok; + {error, _} = Error -> + Error + end. + +-spec append_buffer(data(), iodata()) -> data(). +append_buffer(#{buffer := Buffer, buffer_size := BufferSize} = Data, WriteData) -> + Data#{ + buffer => [Buffer, WriteData], + buffer_size => BufferSize + iolist_size(WriteData) + }. + +-compile({inline, [wrap/1, unwrap/1]}). +wrap(Data) -> + fun() -> Data end. + +unwrap(WrappedData) -> + WrappedData(). + +is_valid_part(WriteData, #{max_part_size := MaxPartSize, buffer_size := BufferSize}) -> + BufferSize + iolist_size(WriteData) =< MaxPartSize. + +client(Config) -> + emqx_s3_client:create(Config). diff --git a/apps/emqx_s3/test/certs/ca.crt b/apps/emqx_s3/test/certs/ca.crt new file mode 100644 index 000000000..8a9dafccd --- /dev/null +++ b/apps/emqx_s3/test/certs/ca.crt @@ -0,0 +1,29 @@ +-----BEGIN CERTIFICATE----- +MIIE5DCCAswCCQCF3o0gIdaNDjANBgkqhkiG9w0BAQsFADA0MRIwEAYDVQQKDAlF +TVFYIFRlc3QxHjAcBgNVBAMMFUNlcnRpZmljYXRlIEF1dGhvcml0eTAeFw0yMTEy +MzAwODQxMTFaFw00OTA1MTcwODQxMTFaMDQxEjAQBgNVBAoMCUVNUVggVGVzdDEe +MBwGA1UEAwwVQ2VydGlmaWNhdGUgQXV0aG9yaXR5MIICIjANBgkqhkiG9w0BAQEF +AAOCAg8AMIICCgKCAgEAqmqSrxyH16j63QhqGLT1UO8I+m6BM3HfnJQM8laQdtJ0 +WgHqCh0/OphH3S7v4SfF4fNJDEJWMWuuzJzU9cTqHPLzhvo3+ZHcMIENgtY2p2Cf +7AQjEqFViEDyv2ZWNEe76BJeShntdY5NZr4gIPar99YGG/Ln8YekspleV+DU38rE +EX9WzhgBr02NN9z4NzIxeB+jdvPnxcXs3WpUxzfnUjOQf/T1tManvSdRbFmKMbxl +A8NLYK3oAYm8EbljWUINUNN6loqYhbigKv8bvo5S4xvRqmX86XB7sc0SApngtNcg +O0EKn8z/KVPDskE+8lMfGMiU2e2Tzw6Rph57mQPOPtIp5hPiKRik7ST9n0p6piXW +zRLplJEzSjf40I1u+VHmpXlWI/Fs8b1UkDSMiMVJf0LyWb4ziBSZOY2LtZzWHbWj +LbNgxQcwSS29tKgUwfEFmFcm+iOM59cPfkl2IgqVLh5h4zmKJJbfQKSaYb5fcKRf +50b1qsN40VbR3Pk/0lJ0/WqgF6kZCExmT1qzD5HJES/5grjjKA4zIxmHOVU86xOF +ouWvtilVR4PGkzmkFvwK5yRhBUoGH/A9BurhqOc0QCGay1kqHQFA6se4JJS+9KOS +x8Rn1Nm6Pi7sd6Le3cKmHTlyl5a/ofKqTCX2Qh+v/7y62V1V1wnoh3ipRjdPTnMC +AwEAATANBgkqhkiG9w0BAQsFAAOCAgEARCqaocvlMFUQjtFtepO2vyG1krn11xJ0 +e7md26i+g8SxCCYqQ9IqGmQBg0Im8fyNDKRN/LZoj5+A4U4XkG1yya91ZIrPpWyF +KUiRAItchNj3g1kHmI2ckl1N//6Kpx3DPaS7qXZaN3LTExf6Ph+StE1FnS0wVF+s +tsNIf6EaQ+ZewW3pjdlLeAws3jvWKUkROc408Ngvx74zbbKo/zAC4tz8oH9ZcpsT +WD8enVVEeUQKI6ItcpZ9HgTI9TFWgfZ1vYwvkoRwNIeabYI62JKmLEo2vGfGwWKr +c+GjnJ/tlVI2DpPljfWOnQ037/7yyJI/zo65+HPRmGRD6MuW/BdPDYOvOZUTcQKh +kANi5THSbJJgZcG3jb1NLebaUQ1H0zgVjn0g3KhUV+NJQYk8RQ7rHtB+MySqTKlM +kRkRjfTfR0Ykxpks7Mjvsb6NcZENf08ZFPd45+e/ptsxpiKu4e4W4bV7NZDvNKf9 +0/aD3oGYNMiP7s+KJ1lRSAjnBuG21Yk8FpzG+yr8wvJhV8aFgNQ5wIH86SuUTmN0 +5bVzFEIcUejIwvGoQEctNHBlOwHrb7zmB6OwyZeMapdXBQ+9UDhYg8ehDqdDOdfn +wsBcnjD2MwNhlE1hjL+tZWLNwSHiD6xx3LvNoXZu2HK8Cp3SOrkE69cFghYMIZZb +T+fp6tNL6LE= +-----END CERTIFICATE----- diff --git a/apps/emqx_s3/test/emqx_s3_SUITE.erl b/apps/emqx_s3/test/emqx_s3_SUITE.erl new file mode 100644 index 000000000..287dcb597 --- /dev/null +++ b/apps/emqx_s3/test/emqx_s3_SUITE.erl @@ -0,0 +1,66 @@ +%%-------------------------------------------------------------------- +%% Copyright (c) 2022-2023 EMQ Technologies Co., Ltd. All Rights Reserved. +%%-------------------------------------------------------------------- + +-module(emqx_s3_SUITE). + +-compile(nowarn_export_all). +-compile(export_all). + +-include_lib("eunit/include/eunit.hrl"). +-include_lib("common_test/include/ct.hrl"). + +all() -> + emqx_common_test_helpers:all(?MODULE). + +init_per_suite(Config) -> + {ok, _} = application:ensure_all_started(emqx_s3), + Config. + +end_per_suite(_Config) -> + ok = application:stop(emqx_s3). + +%%-------------------------------------------------------------------- +%% Test cases +%%-------------------------------------------------------------------- + +t_start_stop_update(_Config) -> + ProfileId = <<"test">>, + ProfileConfig = profile_config(), + + ?assertMatch( + ok, + emqx_s3:start_profile(ProfileId, ProfileConfig) + ), + + ?assertMatch( + {error, _}, + emqx_s3:start_profile(ProfileId, ProfileConfig) + ), + + ?assertEqual( + ok, + emqx_s3:update_profile(ProfileId, ProfileConfig) + ), + + ?assertMatch( + {error, _}, + emqx_s3:update_profile(<<"unknown">>, ProfileConfig) + ), + + ?assertEqual( + ok, + emqx_s3:stop_profile(ProfileId) + ), + + ?assertMatch( + {error, _}, + emqx_s3:stop_profile(ProfileId) + ). + +%%-------------------------------------------------------------------- +%% Helpers +%%-------------------------------------------------------------------- + +profile_config() -> + emqx_s3_test_helpers:base_config(tcp). diff --git a/apps/emqx_s3/test/emqx_s3_client_SUITE.erl b/apps/emqx_s3/test/emqx_s3_client_SUITE.erl new file mode 100644 index 000000000..434510867 --- /dev/null +++ b/apps/emqx_s3/test/emqx_s3_client_SUITE.erl @@ -0,0 +1,170 @@ +%%-------------------------------------------------------------------- +%% Copyright (c) 2022-2023 EMQ Technologies Co., Ltd. All Rights Reserved. +%%-------------------------------------------------------------------- + +-module(emqx_s3_client_SUITE). + +-compile(nowarn_export_all). +-compile(export_all). + +-include_lib("eunit/include/eunit.hrl"). +-include_lib("common_test/include/ct.hrl"). + +-define(PROFILE_ID, atom_to_binary(?MODULE)). + +all() -> + [ + {group, tcp}, + {group, tls} + ]. + +groups() -> + AllCases = emqx_common_test_helpers:all(?MODULE), + PoolGroups = [ + {group, pool_random}, + {group, pool_hash} + ], + [ + {tcp, [], PoolGroups}, + {tls, [], PoolGroups}, + {pool_random, [], AllCases}, + {pool_hash, [], AllCases} + ]. + +init_per_suite(Config) -> + {ok, _} = application:ensure_all_started(emqx_s3), + Config. + +end_per_suite(_Config) -> + ok = application:stop(emqx_s3). + +init_per_group(ConnTypeGroup, Config) when ConnTypeGroup =:= tcp; ConnTypeGroup =:= tls -> + [{conn_type, ConnTypeGroup} | Config]; +init_per_group(PoolTypeGroup, Config) when + PoolTypeGroup =:= pool_random; PoolTypeGroup =:= pool_hash +-> + PoolType = + case PoolTypeGroup of + pool_random -> random; + pool_hash -> hash + end, + [{pool_type, PoolType} | Config]. +end_per_group(_ConnType, _Config) -> + ok. + +init_per_testcase(_TestCase, Config0) -> + ConnType = ?config(conn_type, Config0), + + Bucket = emqx_s3_test_helpers:unique_bucket(), + TestAwsConfig = emqx_s3_test_helpers:aws_config(ConnType), + ok = erlcloud_s3:create_bucket(Bucket, TestAwsConfig), + Config1 = [ + {key, emqx_s3_test_helpers:unique_key()}, + {bucket, Bucket}, + {aws_config, TestAwsConfig} + | Config0 + ], + {ok, PoolName} = emqx_s3_profile_conf:start_http_pool(?PROFILE_ID, profile_config(Config1)), + [{ehttpc_pool_name, PoolName} | Config1]. + +end_per_testcase(_TestCase, Config) -> + ok = ehttpc_sup:stop_pool(?config(ehttpc_pool_name, Config)). + +%%-------------------------------------------------------------------- +%% Test cases +%%-------------------------------------------------------------------- + +t_multipart_upload(Config) -> + Key = ?config(key, Config), + + Client = client(Config), + + {ok, UploadId} = emqx_s3_client:start_multipart(Client, Key), + + Data = data(6_000_000), + + {ok, Etag1} = emqx_s3_client:upload_part(Client, Key, UploadId, 1, Data), + {ok, Etag2} = emqx_s3_client:upload_part(Client, Key, UploadId, 2, Data), + + ok = emqx_s3_client:complete_multipart( + Client, Key, UploadId, [{1, Etag1}, {2, Etag2}] + ). + +t_simple_put(Config) -> + Key = ?config(key, Config), + + Client = client(Config), + + Data = data(6_000_000), + + ok = emqx_s3_client:put_object(Client, Key, Data). + +t_list(Config) -> + Key = ?config(key, Config), + + Client = client(Config), + + ok = emqx_s3_client:put_object(Client, Key, <<"data">>), + + {ok, List} = emqx_s3_client:list(Client, Key), + + [KeyInfo] = proplists:get_value(contents, List), + ?assertMatch( + #{ + key := Key, + size := 4, + etag := _, + last_modified := _ + }, + maps:from_list(KeyInfo) + ). + +t_url(Config) -> + Key = ?config(key, Config), + + Client = client(Config), + ok = emqx_s3_client:put_object(Client, Key, <<"data">>), + + Url = emqx_s3_client:uri(Client, Key), + + ?assertMatch( + {ok, {{_StatusLine, 200, "OK"}, _Headers, "data"}}, + httpc:request(Url) + ). + +t_no_acl(Config) -> + Key = ?config(key, Config), + + ClientConfig = emqx_s3_profile_conf:client_config( + profile_config(Config), ?config(ehttpc_pool_name, Config) + ), + Client = emqx_s3_client:create(maps:without([acl], ClientConfig)), + + ok = emqx_s3_client:put_object(Client, Key, <<"data">>). + +%%-------------------------------------------------------------------- +%% Helpers +%%-------------------------------------------------------------------- + +client(Config) -> + ClientConfig = emqx_s3_profile_conf:client_config( + profile_config(Config), ?config(ehttpc_pool_name, Config) + ), + emqx_s3_client:create(ClientConfig). + +profile_config(Config) -> + ProfileConfig0 = emqx_s3_test_helpers:base_config(?config(conn_type, Config)), + ProfileConfig1 = maps:put( + bucket, + ?config(bucket, Config), + ProfileConfig0 + ), + ProfileConfig2 = emqx_utils_maps:deep_put( + [transport_options, pool_type], + ProfileConfig1, + ?config(pool_type, Config) + ), + ProfileConfig2. + +data(Size) -> + iolist_to_binary([$a || _ <- lists:seq(1, Size)]). diff --git a/apps/emqx_s3/test/emqx_s3_profile_conf_SUITE.erl b/apps/emqx_s3/test/emqx_s3_profile_conf_SUITE.erl new file mode 100644 index 000000000..433cfe07b --- /dev/null +++ b/apps/emqx_s3/test/emqx_s3_profile_conf_SUITE.erl @@ -0,0 +1,282 @@ +%%-------------------------------------------------------------------- +%% Copyright (c) 2022-2023 EMQ Technologies Co., Ltd. All Rights Reserved. +%%-------------------------------------------------------------------- + +-module(emqx_s3_profile_conf_SUITE). + +-compile(nowarn_export_all). +-compile(export_all). + +-include_lib("eunit/include/eunit.hrl"). +-include_lib("common_test/include/ct.hrl"). +-include_lib("emqx/include/asserts.hrl"). + +all() -> emqx_common_test_helpers:all(?MODULE). + +suite() -> [{timetrap, {minutes, 1}}]. + +init_per_suite(Config) -> + {ok, _} = application:ensure_all_started(emqx_s3), + Config. + +end_per_suite(_Config) -> + ok = application:stop(emqx_s3). + +init_per_testcase(_TestCase, Config) -> + ok = snabbkaffe:start_trace(), + TestAwsConfig = emqx_s3_test_helpers:aws_config(tcp), + + Bucket = emqx_s3_test_helpers:unique_bucket(), + ok = erlcloud_s3:create_bucket(Bucket, TestAwsConfig), + + ProfileBaseConfig = emqx_s3_test_helpers:base_config(tcp), + ProfileConfig = ProfileBaseConfig#{bucket => Bucket}, + ok = emqx_s3:start_profile(profile_id(), ProfileConfig), + + [{profile_config, ProfileConfig} | Config]. + +end_per_testcase(_TestCase, _Config) -> + ok = snabbkaffe:stop(), + _ = emqx_s3:stop_profile(profile_id()). + +%%-------------------------------------------------------------------- +%% Test cases +%%-------------------------------------------------------------------- + +t_regular_outdated_pool_cleanup(Config) -> + _ = process_flag(trap_exit, true), + Key = emqx_s3_test_helpers:unique_key(), + {ok, Pid} = emqx_s3:start_uploader(profile_id(), #{key => Key}), + + [OldPool] = emqx_s3_profile_http_pools:all(profile_id()), + + ProfileBaseConfig = ?config(profile_config, Config), + ProfileConfig = emqx_utils_maps:deep_put( + [transport_options, pool_size], ProfileBaseConfig, 16 + ), + ok = emqx_s3:update_profile(profile_id(), ProfileConfig), + + ?assertEqual( + 2, + length(emqx_s3_profile_http_pools:all(profile_id())) + ), + + ?assertWaitEvent( + ok = emqx_s3_uploader:abort(Pid), + #{?snk_kind := "s3_stop_http_pool", pool_name := OldPool}, + 1000 + ), + + [NewPool] = emqx_s3_profile_http_pools:all(profile_id()), + + ?assertWaitEvent( + ok = emqx_s3:stop_profile(profile_id()), + #{?snk_kind := "s3_stop_http_pool", pool_name := NewPool}, + 1000 + ), + + ?assertEqual( + 0, + length(emqx_s3_profile_http_pools:all(profile_id())) + ). + +t_timeout_pool_cleanup(Config) -> + _ = process_flag(trap_exit, true), + + %% We restart the profile to set `http_pool_timeout` value suitable for test + ok = emqx_s3:stop_profile(profile_id()), + ProfileBaseConfig = ?config(profile_config, Config), + ProfileConfig = ProfileBaseConfig#{ + http_pool_timeout => 500, + http_pool_cleanup_interval => 100 + }, + ok = emqx_s3:start_profile(profile_id(), ProfileConfig), + + %% Start uploader + Key = emqx_s3_test_helpers:unique_key(), + {ok, Pid} = emqx_s3:start_uploader(profile_id(), #{key => Key}), + ok = emqx_s3_uploader:write(Pid, <<"data">>), + + [OldPool] = emqx_s3_profile_http_pools:all(profile_id()), + + NewProfileConfig = emqx_utils_maps:deep_put( + [transport_options, pool_size], ProfileConfig, 16 + ), + + %% We update profile to create new pool and wait for the old one to be stopped by timeout + ?assertWaitEvent( + ok = emqx_s3:update_profile(profile_id(), NewProfileConfig), + #{?snk_kind := "s3_stop_http_pool", pool_name := OldPool}, + 1000 + ), + + %% The uploader now has no valid pool and should fail + ?assertMatch( + {error, _}, + emqx_s3_uploader:complete(Pid) + ). + +t_checkout_no_profile(_Config) -> + ?assertEqual( + {error, profile_not_found}, + emqx_s3_profile_conf:checkout_config(<<"no_such_profile">>) + ). + +t_httpc_pool_start_error(Config) -> + %% `ehhtpc_pool`s are lazy so it is difficult to trigger an error + %% passing some bad connection options. + %% So we emulate some unknown crash with `meck`. + meck:new(ehttpc_pool, [passthrough]), + meck:expect(ehttpc_pool, init, fun(_) -> meck:raise(error, badarg) end), + + ?assertMatch( + {error, _}, + emqx_s3:start_profile(<<"profile">>, ?config(profile_config, Config)) + ). + +t_httpc_pool_update_error(Config) -> + %% `ehhtpc_pool`s are lazy so it is difficult to trigger an error + %% passing some bad connection options. + %% So we emulate some unknown crash with `meck`. + meck:new(ehttpc_pool, [passthrough]), + meck:expect(ehttpc_pool, init, fun(_) -> meck:raise(error, badarg) end), + + ProfileBaseConfig = ?config(profile_config, Config), + NewProfileConfig = emqx_utils_maps:deep_put( + [transport_options, pool_size], ProfileBaseConfig, 16 + ), + + ?assertMatch( + {error, _}, + emqx_s3:start_profile(<<"profile">>, NewProfileConfig) + ). + +t_orphaned_pools_cleanup(_Config) -> + ProfileId = profile_id(), + Pid = gproc:where({n, l, emqx_s3_profile_conf:id(ProfileId)}), + + %% We kill conf and wait for it to restart + %% and create a new pool + ?assertWaitEvent( + exit(Pid, kill), + #{?snk_kind := "s3_start_http_pool", profile_id := ProfileId}, + 1000 + ), + + %% We should still have only one pool + ?assertEqual( + 1, + length(emqx_s3_profile_http_pools:all(ProfileId)) + ). + +t_orphaned_pools_cleanup_non_graceful(_Config) -> + ProfileId = profile_id(), + Pid = gproc:where({n, l, emqx_s3_profile_conf:id(ProfileId)}), + + %% We stop pool, conf server should not fail when attempting to stop it once more + [PoolName] = emqx_s3_profile_http_pools:all(ProfileId), + ok = ehttpc_pool:stop_pool(PoolName), + + %% We kill conf and wait for it to restart + %% and create a new pool + ?assertWaitEvent( + exit(Pid, kill), + #{?snk_kind := "s3_start_http_pool", profile_id := ProfileId}, + 1000 + ), + + %% We should still have only one pool + ?assertEqual( + 1, + length(emqx_s3_profile_http_pools:all(ProfileId)) + ). + +t_checkout_client(Config) -> + ProfileId = profile_id(), + Key = emqx_s3_test_helpers:unique_key(), + Caller = self(), + Pid = spawn_link(fun() -> + emqx_s3:with_client( + ProfileId, + fun(Client) -> + receive + put_object -> + Caller ! {put_object, emqx_s3_client:put_object(Client, Key, <<"data">>)} + end, + receive + list_objects -> + Caller ! {list_objects, emqx_s3_client:list(Client, [])} + end + end + ), + Caller ! client_released, + receive + stop -> ok + end + end), + + %% Ask spawned process to put object + Pid ! put_object, + receive + {put_object, ok} -> ok + after 1000 -> + ct:fail("put_object fail") + end, + + %% Now change config for the profile + ProfileBaseConfig = ?config(profile_config, Config), + NewProfileConfig0 = ProfileBaseConfig#{bucket => <<"new_bucket">>}, + NewProfileConfig1 = emqx_utils_maps:deep_put( + [transport_options, pool_size], NewProfileConfig0, 16 + ), + ok = emqx_s3:update_profile(profile_id(), NewProfileConfig1), + + %% We should have two pools now, because the old one is still in use + %% by the spawned process + ?assertEqual( + 2, + length(emqx_s3_profile_http_pools:all(ProfileId)) + ), + + %% Ask spawned process to list objects + Pid ! list_objects, + receive + {list_objects, Result} -> + {ok, OkResult} = Result, + Contents = proplists:get_value(contents, OkResult), + ?assertEqual(1, length(Contents)), + ?assertEqual(Key, proplists:get_value(key, hd(Contents))) + after 1000 -> + ct:fail("list_objects fail") + end, + + %% Wait till spawned process releases client + receive + client_released -> ok + after 1000 -> + ct:fail("client not released") + end, + + %% We should have only one pool now, because the old one is released + ?assertEqual( + 1, + length(emqx_s3_profile_http_pools:all(ProfileId)) + ). + +t_unknown_messages(_Config) -> + Pid = gproc:where({n, l, emqx_s3_profile_conf:id(profile_id())}), + + Pid ! unknown, + ok = gen_server:cast(Pid, unknown), + + ?assertEqual( + {error, not_implemented}, + gen_server:call(Pid, unknown) + ). + +%%-------------------------------------------------------------------- +%% Test helpers +%%-------------------------------------------------------------------- + +profile_id() -> + <<"test">>. diff --git a/apps/emqx_s3/test/emqx_s3_schema_SUITE.erl b/apps/emqx_s3/test/emqx_s3_schema_SUITE.erl new file mode 100644 index 000000000..63f659da0 --- /dev/null +++ b/apps/emqx_s3/test/emqx_s3_schema_SUITE.erl @@ -0,0 +1,172 @@ +%%-------------------------------------------------------------------- +%% Copyright (c) 2022-2023 EMQ Technologies Co., Ltd. All Rights Reserved. +%%-------------------------------------------------------------------- + +-module(emqx_s3_schema_SUITE). + +-compile(nowarn_export_all). +-compile(export_all). + +-include_lib("eunit/include/eunit.hrl"). +-include_lib("common_test/include/ct.hrl"). + +all() -> + emqx_common_test_helpers:all(?MODULE). + +%%-------------------------------------------------------------------- +%% Test cases +%%-------------------------------------------------------------------- + +t_minimal_config(_Config) -> + ?assertMatch( + #{ + bucket := "bucket", + host := "s3.us-east-1.endpoint.com", + port := 443, + min_part_size := 5242880, + transport_options := + #{ + connect_timeout := 15000, + enable_pipelining := 100, + pool_size := 8, + pool_type := random, + ssl := #{enable := false} + } + }, + emqx_s3_schema:translate(#{ + <<"bucket">> => <<"bucket">>, + <<"host">> => <<"s3.us-east-1.endpoint.com">>, + <<"port">> => 443 + }) + ). + +t_full_config(_Config) -> + ?assertMatch( + #{ + access_key_id := "access_key_id", + acl := public_read, + bucket := "bucket", + host := "s3.us-east-1.endpoint.com", + min_part_size := 10485760, + port := 443, + secret_access_key := "secret_access_key", + transport_options := + #{ + connect_timeout := 30000, + enable_pipelining := 200, + headers := #{<<"x-amz-acl">> := <<"public-read">>}, + max_retries := 3, + pool_size := 10, + pool_type := random, + request_timeout := 10000, + ssl := + #{ + cacertfile := <<"cacertfile.crt">>, + certfile := <<"server.crt">>, + ciphers := ["ECDHE-RSA-AES256-GCM-SHA384"], + depth := 10, + enable := true, + keyfile := <<"server.key">>, + reuse_sessions := true, + secure_renegotiate := true, + server_name_indication := "some-host", + verify := verify_peer, + versions := ['tlsv1.2'] + } + } + }, + emqx_s3_schema:translate(#{ + <<"access_key_id">> => <<"access_key_id">>, + <<"secret_access_key">> => <<"secret_access_key">>, + <<"bucket">> => <<"bucket">>, + <<"host">> => <<"s3.us-east-1.endpoint.com">>, + <<"port">> => 443, + <<"min_part_size">> => <<"10mb">>, + <<"acl">> => <<"public_read">>, + <<"transport_options">> => #{ + <<"connect_timeout">> => 30000, + <<"enable_pipelining">> => 200, + <<"pool_size">> => 10, + <<"pool_type">> => <<"random">>, + <<"ssl">> => #{ + <<"enable">> => true, + <<"keyfile">> => <<"server.key">>, + <<"certfile">> => <<"server.crt">>, + <<"cacertfile">> => <<"cacertfile.crt">>, + <<"server_name_indication">> => <<"some-host">>, + <<"verify">> => <<"verify_peer">>, + <<"versions">> => [<<"tlsv1.2">>], + <<"ciphers">> => [<<"ECDHE-RSA-AES256-GCM-SHA384">>] + }, + <<"request_timeout">> => <<"10s">>, + <<"max_retries">> => 3, + <<"headers">> => #{ + <<"x-amz-acl">> => <<"public-read">> + } + } + }) + ). + +t_sensitive_config_hidden(_Config) -> + ?assertMatch( + #{ + access_key_id := "access_key_id", + secret_access_key := <<"******">> + }, + emqx_s3_schema:translate( + #{ + <<"bucket">> => <<"bucket">>, + <<"host">> => <<"s3.us-east-1.endpoint.com">>, + <<"port">> => 443, + <<"access_key_id">> => <<"access_key_id">>, + <<"secret_access_key">> => <<"secret_access_key">> + }, + % NOTE: this is what Config API handler is doing + #{obfuscate_sensitive_values => true} + ) + ). + +t_invalid_limits(_Config) -> + ?assertException( + throw, + {emqx_s3_schema, [#{kind := validation_error, path := "s3.min_part_size"}]}, + emqx_s3_schema:translate(#{ + <<"bucket">> => <<"bucket">>, + <<"host">> => <<"s3.us-east-1.endpoint.com">>, + <<"port">> => 443, + <<"min_part_size">> => <<"1mb">> + }) + ), + + ?assertException( + throw, + {emqx_s3_schema, [#{kind := validation_error, path := "s3.min_part_size"}]}, + emqx_s3_schema:translate(#{ + <<"bucket">> => <<"bucket">>, + <<"host">> => <<"s3.us-east-1.endpoint.com">>, + <<"port">> => 443, + <<"min_part_size">> => <<"100000gb">> + }) + ), + + ?assertException( + throw, + {emqx_s3_schema, [#{kind := validation_error, path := "s3.max_part_size"}]}, + emqx_s3_schema:translate(#{ + <<"bucket">> => <<"bucket">>, + <<"host">> => <<"s3.us-east-1.endpoint.com">>, + <<"port">> => 443, + <<"max_part_size">> => <<"1mb">> + }) + ), + + ?assertException( + throw, + {emqx_s3_schema, [#{kind := validation_error, path := "s3.max_part_size"}]}, + emqx_s3_schema:translate(#{ + <<"bucket">> => <<"bucket">>, + <<"host">> => <<"s3.us-east-1.endpoint.com">>, + <<"port">> => 443, + <<"max_part_size">> => <<"100000gb">> + }) + ). diff --git a/apps/emqx_s3/test/emqx_s3_test_helpers.erl b/apps/emqx_s3/test/emqx_s3_test_helpers.erl new file mode 100644 index 000000000..a73f618af --- /dev/null +++ b/apps/emqx_s3/test/emqx_s3_test_helpers.erl @@ -0,0 +1,140 @@ +%%-------------------------------------------------------------------- +%% Copyright (c) 2022-2023 EMQ Technologies Co., Ltd. All Rights Reserved. +%%-------------------------------------------------------------------- + +-module(emqx_s3_test_helpers). + +-compile(nowarn_export_all). +-compile(export_all). + +-define(ACCESS_KEY_ID, "minioadmin"). +-define(SECRET_ACCESS_KEY, "minioadmin"). + +-define(TOXIPROXY_HOST, "toxiproxy"). +-define(TOXIPROXY_PORT, 8474). + +-define(TCP_HOST, ?TOXIPROXY_HOST). +-define(TCP_PORT, 19000). +-define(TLS_HOST, ?TOXIPROXY_HOST). +-define(TLS_PORT, 19100). + +-include_lib("erlcloud/include/erlcloud_aws.hrl"). + +-export([ + aws_config/1, + base_raw_config/1, + base_config/1, + + unique_key/0, + unique_bucket/0, + + with_failure/3 +]). + +%%-------------------------------------------------------------------- +%% API +%%-------------------------------------------------------------------- + +aws_config(tcp) -> + aws_config(tcp, ?TCP_HOST, ?TCP_PORT); +aws_config(tls) -> + aws_config(tls, ?TLS_HOST, ?TLS_PORT). + +aws_config(tcp, Host, Port) -> + erlcloud_s3_new( + ?ACCESS_KEY_ID, + ?SECRET_ACCESS_KEY, + Host, + Port, + "http://" + ); +aws_config(tls, Host, Port) -> + erlcloud_s3_new( + ?ACCESS_KEY_ID, + ?SECRET_ACCESS_KEY, + Host, + Port, + "https://" + ). + +base_raw_config(tcp) -> + #{ + <<"bucket">> => <<"bucket">>, + <<"access_key_id">> => bin(?ACCESS_KEY_ID), + <<"secret_access_key">> => bin(?SECRET_ACCESS_KEY), + <<"host">> => ?TCP_HOST, + <<"port">> => ?TCP_PORT, + <<"max_part_size">> => 10 * 1024 * 1024, + <<"transport_options">> => + #{ + <<"request_timeout">> => 2000 + } + }; +base_raw_config(tls) -> + #{ + <<"bucket">> => <<"bucket">>, + <<"access_key_id">> => bin(?ACCESS_KEY_ID), + <<"secret_access_key">> => bin(?SECRET_ACCESS_KEY), + <<"host">> => ?TLS_HOST, + <<"port">> => ?TLS_PORT, + <<"max_part_size">> => 10 * 1024 * 1024, + <<"transport_options">> => + #{ + <<"request_timeout">> => 2000, + <<"ssl">> => #{ + <<"enable">> => true, + <<"cacertfile">> => bin(cert_path("ca.crt")), + <<"server_name_indication">> => <<"authn-server">>, + <<"verify">> => <<"verify_peer">> + } + } + }. + +base_config(ConnType) -> + emqx_s3_schema:translate(base_raw_config(ConnType)). + +unique_key() -> + "key-" ++ integer_to_list(erlang:system_time(millisecond)) ++ "-" ++ + integer_to_list(erlang:unique_integer([positive])). + +unique_bucket() -> + "bucket-" ++ integer_to_list(erlang:system_time(millisecond)) ++ "-" ++ + integer_to_list(erlang:unique_integer([positive])). + +with_failure(_ConnType, ehttpc_500, Fun) -> + try + meck:new(ehttpc, [passthrough, no_history]), + meck:expect(ehttpc, request, fun(_, _, _, _, _) -> {ok, 500, []} end), + Fun() + after + meck:unload(ehttpc) + end; +with_failure(ConnType, FailureType, Fun) -> + emqx_common_test_helpers:with_failure( + FailureType, + toxproxy_name(ConnType), + ?TOXIPROXY_HOST, + ?TOXIPROXY_PORT, + Fun + ). + +%%-------------------------------------------------------------------- +%% Internal functions +%%-------------------------------------------------------------------- + +toxproxy_name(tcp) -> "minio_tcp"; +toxproxy_name(tls) -> "minio_tls". + +cert_path(FileName) -> + Dir = code:lib_dir(emqx_s3, test), + filename:join([Dir, <<"certs">>, FileName]). + +bin(String) when is_list(String) -> list_to_binary(String); +bin(Binary) when is_binary(Binary) -> Binary. + +erlcloud_s3_new(AccessKeyId, SecretAccessKey, Host, Port, Scheme) -> + AwsConfig = erlcloud_s3:new(AccessKeyId, SecretAccessKey, Host, Port), + AwsConfig#aws_config{ + s3_scheme = Scheme, + s3_bucket_access_method = path + }. diff --git a/apps/emqx_s3/test/emqx_s3_uploader_SUITE.erl b/apps/emqx_s3/test/emqx_s3_uploader_SUITE.erl new file mode 100644 index 000000000..6ba0e3ed9 --- /dev/null +++ b/apps/emqx_s3/test/emqx_s3_uploader_SUITE.erl @@ -0,0 +1,591 @@ +%%-------------------------------------------------------------------- +%% Copyright (c) 2022-2023 EMQ Technologies Co., Ltd. All Rights Reserved. +%%-------------------------------------------------------------------- + +-module(emqx_s3_uploader_SUITE). + +-compile(nowarn_export_all). +-compile(export_all). + +-include_lib("eunit/include/eunit.hrl"). +-include_lib("common_test/include/ct.hrl"). + +-define(assertProcessExited(Reason, Pid), + receive + {'DOWN', _, _, Pid, Reason} -> + ok + after 3000 -> + ct:fail("uploader process did not exit") + end +). + +-define(assertObjectEqual(Value, AwsConfig, Bucket, Key), + ?assertEqual( + Value, + proplists:get_value( + content, + erlcloud_s3:get_object( + Bucket, + Key, + AwsConfig + ) + ) + ) +). + +all() -> + [ + {group, tcp}, + {group, tls} + ]. + +groups() -> + [ + {tcp, [ + {group, common_cases}, + {group, tcp_cases} + ]}, + {tls, [ + {group, common_cases}, + {group, tls_cases} + ]}, + {common_cases, [], [ + t_happy_path_simple_put, + t_happy_path_multi, + t_abort_multi, + t_abort_simple_put, + t_signed_url_download, + t_signed_nonascii_url_download, + + {group, noconn_errors}, + {group, timeout_errors}, + {group, http_errors} + ]}, + + {tcp_cases, [ + t_config_switch, + t_config_switch_http_settings, + t_too_large, + t_no_profile + ]}, + + {tls_cases, [ + t_tls_error + ]}, + + {noconn_errors, [{group, transport_errors}]}, + {timeout_errors, [{group, transport_errors}]}, + {http_errors, [{group, transport_errors}]}, + + {transport_errors, [ + t_start_multipart_error, + t_upload_part_error, + t_complete_multipart_error, + t_abort_multipart_error, + t_put_object_error + ]} + ]. + +suite() -> [{timetrap, {minutes, 1}}]. + +init_per_suite(Config) -> + {ok, _} = application:ensure_all_started(emqx_s3), + Config. + +end_per_suite(_Config) -> + ok = application:stop(emqx_s3). + +init_per_group(Group, Config) when Group =:= tcp orelse Group =:= tls -> + [{conn_type, Group} | Config]; +init_per_group(noconn_errors, Config) -> + [{failure, down} | Config]; +init_per_group(timeout_errors, Config) -> + [{failure, timeout} | Config]; +init_per_group(http_errors, Config) -> + [{failure, ehttpc_500} | Config]; +init_per_group(_ConnType, Config) -> + Config. + +end_per_group(_ConnType, _Config) -> + ok. + +init_per_testcase(_TestCase, Config) -> + ok = snabbkaffe:start_trace(), + ConnType = ?config(conn_type, Config), + TestAwsConfig = emqx_s3_test_helpers:aws_config(ConnType), + + Bucket = emqx_s3_test_helpers:unique_bucket(), + ok = erlcloud_s3:create_bucket(Bucket, TestAwsConfig), + + ProfileBaseConfig = emqx_s3_test_helpers:base_config(ConnType), + ProfileConfig = ProfileBaseConfig#{bucket => Bucket}, + ok = emqx_s3:start_profile(profile_id(), ProfileConfig), + + [{bucket, Bucket}, {test_aws_config, TestAwsConfig}, {profile_config, ProfileConfig} | Config]. + +end_per_testcase(_TestCase, _Config) -> + ok = snabbkaffe:stop(), + _ = emqx_s3:stop_profile(profile_id()). + +%%-------------------------------------------------------------------- +%% Test cases +%%-------------------------------------------------------------------- + +t_happy_path_simple_put(Config) -> + Key = emqx_s3_test_helpers:unique_key(), + {ok, Pid} = emqx_s3:start_uploader(profile_id(), #{key => Key}), + + _ = erlang:monitor(process, Pid), + + Data = data($a, 1024, 10), + + lists:foreach( + fun(Chunk) -> + ?assertEqual( + ok, + emqx_s3_uploader:write(Pid, Chunk) + ) + end, + Data + ), + + ok = emqx_s3_uploader:complete(Pid), + + ?assertProcessExited( + normal, + Pid + ), + + ?assertObjectEqual( + iolist_to_binary(Data), + ?config(test_aws_config, Config), + ?config(bucket, Config), + Key + ). + +t_happy_path_multi(Config) -> + Key = emqx_s3_test_helpers:unique_key(), + {ok, Pid} = emqx_s3:start_uploader(profile_id(), #{key => Key}), + + _ = erlang:monitor(process, Pid), + + Data = data($a, 1024 * 1024, 10), + + lists:foreach( + fun(Chunk) -> + ?assertEqual( + ok, + emqx_s3_uploader:write(Pid, Chunk) + ) + end, + Data + ), + + ok = emqx_s3_uploader:complete(Pid), + + ?assertProcessExited( + normal, + Pid + ), + + ?assertObjectEqual( + iolist_to_binary(Data), + ?config(test_aws_config, Config), + ?config(bucket, Config), + Key + ). + +t_signed_url_download(_Config) -> + Prefix = emqx_s3_test_helpers:unique_key(), + Key = Prefix ++ "/ascii.txt", + + {ok, Data} = upload(Key, 1024, 5), + + SignedUrl = emqx_s3:with_client(profile_id(), fun(Client) -> + emqx_s3_client:uri(Client, Key) + end), + + {ok, {_, _, Body}} = httpc:request(get, {SignedUrl, []}, [], []), + + ?assertEqual( + iolist_to_binary(Data), + iolist_to_binary(Body) + ). + +t_signed_nonascii_url_download(_Config) -> + Prefix = emqx_s3_test_helpers:unique_key(), + Key = Prefix ++ "/unicode-🫠.txt", + + {ok, Data} = upload(Key, 1024 * 1024, 8), + + SignedUrl = emqx_s3:with_client(profile_id(), fun(Client) -> + emqx_s3_client:uri(Client, Key) + end), + + {ok, {_, _, Body}} = httpc:request(get, {SignedUrl, []}, [], []), + + ?assertEqual( + iolist_to_binary(Data), + iolist_to_binary(Body) + ). + +t_abort_multi(Config) -> + Key = emqx_s3_test_helpers:unique_key(), + {ok, Pid} = emqx_s3:start_uploader(profile_id(), #{key => Key}), + + _ = erlang:monitor(process, Pid), + + [Data] = data($a, 6 * 1024 * 1024, 1), + + ok = emqx_s3_uploader:write(Pid, Data), + + ?assertMatch( + [], + list_objects(Config) + ), + + ok = emqx_s3_uploader:abort(Pid), + + ?assertMatch( + [], + list_objects(Config) + ), + + ?assertProcessExited( + normal, + Pid + ). + +t_abort_simple_put(_Config) -> + Key = emqx_s3_test_helpers:unique_key(), + {ok, Pid} = emqx_s3:start_uploader(profile_id(), #{key => Key}), + + _ = erlang:monitor(process, Pid), + + [Data] = data($a, 10 * 1024, 1), + + ok = emqx_s3_uploader:write(Pid, Data), + + ok = emqx_s3_uploader:abort(Pid), + + ?assertProcessExited( + normal, + Pid + ). + +t_config_switch(Config) -> + Key = emqx_s3_test_helpers:unique_key(), + OldBucket = ?config(bucket, Config), + {ok, Pid0} = emqx_s3:start_uploader(profile_id(), #{key => Key}), + + [Data0, Data1] = data($a, 6 * 1024 * 1024, 2), + + ok = emqx_s3_uploader:write(Pid0, Data0), + + %% Switch to the new config, but without changing HTTP settings + ProfileConfig = ?config(profile_config, Config), + NewBucket = emqx_s3_test_helpers:unique_bucket(), + ok = erlcloud_s3:create_bucket(NewBucket, ?config(test_aws_config, Config)), + NewProfileConfig = ProfileConfig#{bucket => NewBucket}, + + ok = emqx_s3:update_profile(profile_id(), NewProfileConfig), + + %% Already started uploader should be OK and use previous config + ok = emqx_s3_uploader:write(Pid0, Data1), + ok = emqx_s3_uploader:complete(Pid0), + + ?assertObjectEqual( + iolist_to_binary([Data0, Data1]), + ?config(test_aws_config, Config), + OldBucket, + Key + ), + + %% Now check that new uploader uses new config + {ok, Pid1} = emqx_s3:start_uploader(profile_id(), #{key => Key}), + ok = emqx_s3_uploader:write(Pid1, Data0), + ok = emqx_s3_uploader:complete(Pid1), + + ?assertObjectEqual( + iolist_to_binary(Data0), + ?config(test_aws_config, Config), + NewBucket, + Key + ). + +t_config_switch_http_settings(Config) -> + Key = emqx_s3_test_helpers:unique_key(), + OldBucket = ?config(bucket, Config), + {ok, Pid0} = emqx_s3:start_uploader(profile_id(), #{key => Key}), + + [Data0, Data1] = data($a, 6 * 1024 * 1024, 2), + + ok = emqx_s3_uploader:write(Pid0, Data0), + + %% Switch to the new config, completely changing HTTP settings (tcp -> tls) + NewBucket = emqx_s3_test_helpers:unique_bucket(), + NewTestAwsConfig = emqx_s3_test_helpers:aws_config(tls), + ok = erlcloud_s3:create_bucket(NewBucket, NewTestAwsConfig), + NewProfileConfig0 = emqx_s3_test_helpers:base_config(tls), + NewProfileConfig1 = NewProfileConfig0#{bucket => NewBucket}, + + ok = emqx_s3:update_profile(profile_id(), NewProfileConfig1), + + %% Already started uploader should be OK and use previous config + ok = emqx_s3_uploader:write(Pid0, Data1), + ok = emqx_s3_uploader:complete(Pid0), + + ?assertObjectEqual( + iolist_to_binary([Data0, Data1]), + ?config(test_aws_config, Config), + OldBucket, + Key + ), + + %% Now check that new uploader uses new config + {ok, Pid1} = emqx_s3:start_uploader(profile_id(), #{key => Key}), + ok = emqx_s3_uploader:write(Pid1, Data0), + ok = emqx_s3_uploader:complete(Pid1), + + ?assertObjectEqual( + iolist_to_binary(Data0), + NewTestAwsConfig, + NewBucket, + Key + ). + +t_start_multipart_error(Config) -> + _ = process_flag(trap_exit, true), + + Key = emqx_s3_test_helpers:unique_key(), + {ok, Pid} = emqx_s3:start_uploader(profile_id(), #{key => Key}), + + _ = erlang:monitor(process, Pid), + + [Data] = data($a, 6 * 1024 * 1024, 1), + + emqx_s3_test_helpers:with_failure( + ?config(conn_type, Config), + ?config(failure, Config), + fun() -> + ?assertMatch( + {error, _}, + emqx_s3_uploader:write(Pid, Data) + ) + end + ), + + ?assertProcessExited( + {error, _}, + Pid + ). + +t_upload_part_error(Config) -> + _ = process_flag(trap_exit, true), + + Key = emqx_s3_test_helpers:unique_key(), + {ok, Pid} = emqx_s3:start_uploader(profile_id(), #{key => Key}), + + _ = erlang:monitor(process, Pid), + + [Data0, Data1] = data($a, 6 * 1024 * 1024, 2), + + ok = emqx_s3_uploader:write(Pid, Data0), + + emqx_s3_test_helpers:with_failure( + ?config(conn_type, Config), + ?config(failure, Config), + fun() -> + ?assertMatch( + {error, _}, + emqx_s3_uploader:write(Pid, Data1) + ) + end + ), + + ?assertProcessExited( + {error, _}, + Pid + ). + +t_abort_multipart_error(Config) -> + _ = process_flag(trap_exit, true), + + Key = emqx_s3_test_helpers:unique_key(), + {ok, Pid} = emqx_s3:start_uploader(profile_id(), #{key => Key}), + + _ = erlang:monitor(process, Pid), + + [Data] = data($a, 6 * 1024 * 1024, 1), + + ok = emqx_s3_uploader:write(Pid, Data), + + emqx_s3_test_helpers:with_failure( + ?config(conn_type, Config), + ?config(failure, Config), + fun() -> + ?assertMatch( + {error, _}, + emqx_s3_uploader:abort(Pid) + ) + end + ), + + ?assertProcessExited( + {error, _}, + Pid + ). + +t_complete_multipart_error(Config) -> + _ = process_flag(trap_exit, true), + + Key = emqx_s3_test_helpers:unique_key(), + {ok, Pid} = emqx_s3:start_uploader(profile_id(), #{key => Key}), + + _ = erlang:monitor(process, Pid), + + [Data] = data($a, 6 * 1024 * 1024, 1), + + ok = emqx_s3_uploader:write(Pid, Data), + + emqx_s3_test_helpers:with_failure( + ?config(conn_type, Config), + ?config(failure, Config), + fun() -> + ?assertMatch( + {error, _}, + emqx_s3_uploader:complete(Pid) + ) + end + ), + + ?assertProcessExited( + {error, _}, + Pid + ). + +t_put_object_error(Config) -> + _ = process_flag(trap_exit, true), + + Key = emqx_s3_test_helpers:unique_key(), + {ok, Pid} = emqx_s3:start_uploader(profile_id(), #{key => Key}), + + _ = erlang:monitor(process, Pid), + + %% Little data to avoid multipart upload + [Data] = data($a, 1024, 1), + + emqx_s3_test_helpers:with_failure( + ?config(conn_type, Config), + ?config(failure, Config), + fun() -> + ok = emqx_s3_uploader:write(Pid, Data), + ?assertMatch( + {error, _}, + emqx_s3_uploader:complete(Pid) + ) + end + ), + + ?assertProcessExited( + {error, _}, + Pid + ). + +t_too_large(Config) -> + Key = emqx_s3_test_helpers:unique_key(), + {ok, Pid} = emqx_s3:start_uploader(profile_id(), #{key => Key}), + + _ = erlang:monitor(process, Pid), + + [Data] = data($a, 1024, 1), + + [DataLarge] = data($a, 20 * 1024 * 1024, 1), + + ?assertMatch( + {error, {too_large, _}}, + emqx_s3_uploader:write(Pid, DataLarge) + ), + + ok = emqx_s3_uploader:write(Pid, Data), + ok = emqx_s3_uploader:complete(Pid), + + ?assertProcessExited( + normal, + Pid + ), + + ?assertObjectEqual( + iolist_to_binary(Data), + ?config(test_aws_config, Config), + ?config(bucket, Config), + Key + ). + +t_tls_error(Config) -> + _ = process_flag(trap_exit, true), + + ProfileBaseConfig = ?config(profile_config, Config), + ProfileConfig = emqx_utils_maps:deep_put( + [transport_options, ssl, server_name_indication], ProfileBaseConfig, "invalid-hostname" + ), + ok = emqx_s3:update_profile(profile_id(), ProfileConfig), + Key = emqx_s3_test_helpers:unique_key(), + {ok, Pid} = emqx_s3:start_uploader(profile_id(), #{key => Key}), + + _ = erlang:monitor(process, Pid), + + [Data] = data($a, 6 * 1024 * 1024, 1), + + ?assertMatch( + {error, _}, + emqx_s3_uploader:write(Pid, Data) + ), + + ?assertProcessExited( + {error, _}, + Pid + ). + +t_no_profile(_Config) -> + Key = emqx_s3_test_helpers:unique_key(), + ?assertMatch( + {error, profile_not_found}, + emqx_s3:start_uploader(<<"no-profile">>, #{key => Key}) + ). + +%%-------------------------------------------------------------------- +%% Test helpers +%%-------------------------------------------------------------------- + +profile_id() -> + <<"test">>. + +data(Byte, ChunkSize, ChunkCount) -> + Chunk = iolist_to_binary([Byte || _ <- lists:seq(1, ChunkSize)]), + [Chunk || _ <- lists:seq(1, ChunkCount)]. + +list_objects(Config) -> + Props = erlcloud_s3:list_objects(?config(bucket, Config), [], ?config(test_aws_config, Config)), + proplists:get_value(contents, Props). + +upload(Key, ChunkSize, ChunkCount) -> + {ok, Pid} = emqx_s3:start_uploader(profile_id(), #{key => Key}), + + _ = erlang:monitor(process, Pid), + + Data = data($a, ChunkSize, ChunkCount), + + ok = lists:foreach( + fun(Chunk) -> ?assertEqual(ok, emqx_s3_uploader:write(Pid, Chunk)) end, + Data + ), + + ok = emqx_s3_uploader:complete(Pid), + + ok = ?assertProcessExited( + normal, + Pid + ), + + {ok, Data}. diff --git a/changes/ee/feat-9927.en.md b/changes/ee/feat-9927.en.md new file mode 100644 index 000000000..c20a0c51e --- /dev/null +++ b/changes/ee/feat-9927.en.md @@ -0,0 +1 @@ +Introduce support for the File Transfer over MQTT feature as described in [EIP-0021](https://github.com/emqx/eip), with support to publish transferred files either to the node-local file system or to the S3 API compatible remote object storage. diff --git a/lib-ee/emqx_ee_conf/src/emqx_ee_conf.app.src b/lib-ee/emqx_ee_conf/src/emqx_ee_conf.app.src index 3df18ce7a..599b0798c 100644 --- a/lib-ee/emqx_ee_conf/src/emqx_ee_conf.app.src +++ b/lib-ee/emqx_ee_conf/src/emqx_ee_conf.app.src @@ -1,6 +1,6 @@ {application, emqx_ee_conf, [ {description, "EMQX Enterprise Edition configuration schema"}, - {vsn, "0.1.2"}, + {vsn, "0.1.3"}, {registered, []}, {applications, [ kernel, diff --git a/lib-ee/emqx_ee_conf/src/emqx_ee_conf_schema.erl b/lib-ee/emqx_ee_conf/src/emqx_ee_conf_schema.erl index c1b1a002c..f4a0b3a28 100644 --- a/lib-ee/emqx_ee_conf/src/emqx_ee_conf_schema.erl +++ b/lib-ee/emqx_ee_conf/src/emqx_ee_conf_schema.erl @@ -6,24 +6,22 @@ -behaviour(hocon_schema). --export([namespace/0, roots/0, fields/1, translations/0, translation/1, validations/0]). +-export([namespace/0, roots/0, fields/1, translations/0, translation/1, desc/1, validations/0]). --define(EE_SCHEMA_MODULES, [emqx_license_schema, emqx_ee_schema_registry_schema]). +-define(EE_SCHEMA_MODULES, [ + emqx_license_schema, + emqx_ee_schema_registry_schema, + emqx_ft_schema +]). namespace() -> emqx_conf_schema:namespace(). roots() -> - lists:foldl( - fun(Module, Roots) -> - Roots ++ apply(Module, roots, []) - end, - emqx_conf_schema:roots(), - ?EE_SCHEMA_MODULES - ). + emqx_conf_schema:roots() ++ ee_roots(). fields(Name) -> - emqx_conf_schema:fields(Name). + ee_delegate(fields, ?EE_SCHEMA_MODULES, Name). translations() -> emqx_conf_schema:translations(). @@ -31,5 +29,30 @@ translations() -> translation(Name) -> emqx_conf_schema:translation(Name). +desc(Name) -> + ee_delegate(desc, ?EE_SCHEMA_MODULES, Name). + validations() -> emqx_conf_schema:validations(). + +%%------------------------------------------------------------------------------ +%% helpers +%%------------------------------------------------------------------------------ + +ee_roots() -> + lists:flatmap( + fun(Module) -> + apply(Module, roots, []) + end, + ?EE_SCHEMA_MODULES + ). + +ee_delegate(Method, [EEMod | EEMods], Name) -> + case lists:member(Name, apply(EEMod, roots, [])) of + true -> + apply(EEMod, Method, [Name]); + false -> + ee_delegate(Method, EEMods, Name) + end; +ee_delegate(Method, [], Name) -> + apply(emqx_conf_schema, Method, [Name]). diff --git a/mix.exs b/mix.exs index 3e6098258..1c50970ea 100644 --- a/mix.exs +++ b/mix.exs @@ -58,7 +58,7 @@ defmodule EMQXUmbrella.MixProject do {:ekka, github: "emqx/ekka", tag: "0.15.1", override: true}, {:gen_rpc, github: "emqx/gen_rpc", tag: "2.8.1", override: true}, {:grpc, github: "emqx/grpc-erl", tag: "0.6.7", override: true}, - {:minirest, github: "emqx/minirest", tag: "1.3.8", override: true}, + {:minirest, github: "emqx/minirest", tag: "1.3.9", override: true}, {:ecpool, github: "emqx/ecpool", tag: "0.5.3", override: true}, {:replayq, github: "emqx/replayq", tag: "0.3.7", override: true}, {:pbkdf2, github: "emqx/erlang-pbkdf2", tag: "2.0.4", override: true}, @@ -175,7 +175,9 @@ defmodule EMQXUmbrella.MixProject do :emqx_bridge_pulsar, :emqx_oracle, :emqx_bridge_oracle, - :emqx_bridge_rabbitmq + :emqx_bridge_rabbitmq, + :emqx_ft, + :emqx_s3 ]) end @@ -190,13 +192,6 @@ defmodule EMQXUmbrella.MixProject do {:snappyer, "1.2.8", override: true}, {:crc32cer, "0.1.8", override: true}, {:supervisor3, "1.1.12", override: true}, - {:erlcloud, github: "emqx/erlcloud", tag: "3.5.16-emqx-1", override: true}, - # erlcloud's rebar.config requires rebar3 and does not support Mix, - # so it tries to fetch deps from git. We need to override this. - {:lhttpc, tag: "1.6.2", override: true}, - {:eini, "1.2.9", override: true}, - {:base16, "1.0.0", override: true}, - # end of erlcloud's deps {:opentsdb, github: "emqx/opentsdb-client-erl", tag: "v0.5.1", override: true}, # The following two are dependencies of rabbit_common. They are needed here to # make mix not complain about conflicting versions @@ -212,7 +207,14 @@ defmodule EMQXUmbrella.MixProject do github: "emqx/rabbitmq-server", tag: "v3.11.13-emqx", sparse: "deps/amqp_client", - override: true} + override: true}, + {:erlcloud, github: "emqx/erlcloud", tag: "3.6.8-emqx-1", override: true}, + # erlcloud's rebar.config requires rebar3 and does not support Mix, + # so it tries to fetch deps from git. We need to override this. + {:lhttpc, github: "erlcloud/lhttpc", tag: "1.6.2", override: true}, + {:eini, "1.2.9", override: true}, + {:base16, "1.0.0", override: true} + # end of erlcloud's deps ] end @@ -411,7 +413,8 @@ defmodule EMQXUmbrella.MixProject do emqx_bridge_rabbitmq: :permanent, emqx_ee_schema_registry: :permanent, emqx_eviction_agent: :permanent, - emqx_node_rebalance: :permanent + emqx_node_rebalance: :permanent, + emqx_ft: :permanent ], else: [] ) diff --git a/rebar.config b/rebar.config index b6f5a479f..bb6bb87fe 100644 --- a/rebar.config +++ b/rebar.config @@ -65,7 +65,7 @@ , {ekka, {git, "https://github.com/emqx/ekka", {tag, "0.15.1"}}} , {gen_rpc, {git, "https://github.com/emqx/gen_rpc", {tag, "2.8.1"}}} , {grpc, {git, "https://github.com/emqx/grpc-erl", {tag, "0.6.7"}}} - , {minirest, {git, "https://github.com/emqx/minirest", {tag, "1.3.8"}}} + , {minirest, {git, "https://github.com/emqx/minirest", {tag, "1.3.9"}}} , {ecpool, {git, "https://github.com/emqx/ecpool", {tag, "0.5.3"}}} , {replayq, {git, "https://github.com/emqx/replayq.git", {tag, "0.3.7"}}} , {pbkdf2, {git, "https://github.com/emqx/erlang-pbkdf2.git", {tag, "2.0.4"}}} diff --git a/rebar.config.erl b/rebar.config.erl index d556b41aa..3a8ff6d84 100644 --- a/rebar.config.erl +++ b/rebar.config.erl @@ -99,6 +99,8 @@ is_community_umbrella_app("apps/emqx_bridge_oracle") -> false; is_community_umbrella_app("apps/emqx_bridge_sqlserver") -> false; is_community_umbrella_app("apps/emqx_oracle") -> false; is_community_umbrella_app("apps/emqx_bridge_rabbitmq") -> false; +is_community_umbrella_app("apps/emqx_ft") -> false; +is_community_umbrella_app("apps/emqx_s3") -> false; is_community_umbrella_app(_) -> true. is_jq_supported() -> @@ -480,7 +482,8 @@ relx_apps_per_edition(ee) -> emqx_bridge_rabbitmq, emqx_ee_schema_registry, emqx_eviction_agent, - emqx_node_rebalance + emqx_node_rebalance, + emqx_ft ]; relx_apps_per_edition(ce) -> []. diff --git a/rel/i18n/emqx_ft_api.hocon b/rel/i18n/emqx_ft_api.hocon new file mode 100644 index 000000000..bf6c22411 --- /dev/null +++ b/rel/i18n/emqx_ft_api.hocon @@ -0,0 +1,16 @@ +emqx_ft_api { + +file_list.desc: +"""List all uploaded files.""" + +file_list_transfer.desc +"""List a file uploaded during specified transfer, identified by client id and file id.""" + +} + +emqx_ft_storage_exporter_fs_api { + +file_get.desc: +"""Get a file by its id.""" + +} diff --git a/rel/i18n/emqx_ft_schema.hocon b/rel/i18n/emqx_ft_schema.hocon new file mode 100644 index 000000000..bafda331a --- /dev/null +++ b/rel/i18n/emqx_ft_schema.hocon @@ -0,0 +1,62 @@ +emqx_ft_schema { + +enable.desc: +"""Enable the File Transfer feature.
+Enabling File Transfer implies reserving special MQTT topics in order to serve the protocol.
+This toggle also affects the availability of the File Transfer REST API and +storage-dependent background activities (e.g. garbage collection).""" + +init_timeout.desc: +"""Timeout for initializing the file transfer.
+After reaching the timeout, `init` message will be acked with an error""" + +assemble_timeout.desc: +"""Timeout for assembling and exporting file segments into a final file.
+After reaching the timeout, `fin` message will be acked with an error""" + +store_segment_timeout.desc: +"""Timeout for storing a file segment.
+After reaching the timeout, message with the segment will be acked with an error""" + +storage_backend.desc: +"""Storage settings for file transfer.""" + +local_storage.desc: +"""Local file system backend to store uploaded fragments and temporary data.""" + +local_storage_segments.desc: +"""Settings for local segments storage, which include uploaded transfer fragments and temporary data.""" + +local_storage_segments_root.desc: +"""File system path to keep uploaded fragments and temporary data.""" + +local_storage_exporter_backend.desc: +"""Exporter for the local file system storage backend.
+Exporter defines where and how fully transferred and assembled files are stored.""" + +local_storage_exporter.desc: +"""Exporter to the local file system.""" + +s3_exporter.desc: +"""Exporter to the S3 API compatible object storage.""" + +local_storage_exporter_root.desc: +"""Directory where the uploaded files are kept.""" + +local_storage_segments_gc.desc: +"""Garbage collection settings for the intermediate and temporary files in the local file system.""" + +storage_gc_interval.desc: +"""Interval of periodic garbage collection.""" + +storage_gc_max_segments_ttl.desc: +"""Maximum TTL of a segment kept in the local file system.
+This is a hard limit: no segment will outlive this TTL, even if some file transfer specifies a +TTL more than that.""" + +storage_gc_min_segments_ttl.desc: +"""Minimum TTL of a segment kept in the local file system.
+This is a hard limit: no segment will be garbage collected before reaching this TTL, +even if some file transfer specifies a TTL less than that.""" + +} diff --git a/rel/i18n/emqx_s3_schema.hocon b/rel/i18n/emqx_s3_schema.hocon new file mode 100644 index 000000000..df4b973fa --- /dev/null +++ b/rel/i18n/emqx_s3_schema.hocon @@ -0,0 +1,38 @@ +emqx_s3_schema { + +access_key_id.desc: +"""The access key ID of the S3 bucket.""" + +secret_access_key.desc: +"""The secret access key of the S3 bucket.""" + +bucket.desc: +"""The name of the S3 bucket.""" + +host.desc: +"""The host of the S3 endpoint.""" + +port.desc: +"""The port of the S3 endpoint.""" + +url_expire_time.desc: +"""The time in seconds for which the signed URLs to the S3 objects are valid.""" + +min_part_size.desc: +"""The minimum part size for multipart uploads.
+Uploaded data will be accumulated in memory until this size is reached.""" + +max_part_size.desc: +"""The maximum part size for multipart uploads.
+S3 uploader won't try to upload parts larger than this size.""" + +acl.desc: +"""The ACL to use for the uploaded objects.""" + +transport_options.desc: +"""Options for the HTTP transport layer used by the S3 client.""" + +ipv6_probe.desc: +"""Whether to probe for IPv6 support.""" + +} diff --git a/scripts/ct/run.sh b/scripts/ct/run.sh index 4824fbdf3..b24c760f0 100755 --- a/scripts/ct/run.sh +++ b/scripts/ct/run.sh @@ -92,6 +92,12 @@ if [ "${WHICH_APP}" = 'novalue' ]; then exit 1 fi +if [ ! -d "${WHICH_APP}" ]; then + echo "must provide an existing path for --app arg" + help + exit 1 +fi + if [[ "${WHICH_APP}" == lib-ee* && (-z "${PROFILE+x}" || "${PROFILE}" != emqx-enterprise) ]]; then echo 'You are trying to run an enterprise test case without the emqx-enterprise profile.' echo 'This will most likely not work.' @@ -203,6 +209,10 @@ for dep in ${CT_DEPS}; do rabbitmq) FILES+=( '.ci/docker-compose-file/docker-compose-rabbitmq.yaml' ) ;; + minio) + FILES+=( '.ci/docker-compose-file/docker-compose-minio-tcp.yaml' + '.ci/docker-compose-file/docker-compose-minio-tls.yaml' ) + ;; *) echo "unknown_ct_dependency $dep" exit 1