feat(ft): introduce simple filesystem storage backend + assembler

This commit is contained in:
Andrew Mayorov 2023-01-20 18:55:45 +03:00 committed by Ilya Averyanov
parent aaaef30be6
commit 81e04ce93a
6 changed files with 1009 additions and 1 deletions

View File

@ -5,7 +5,8 @@
{mod, {emqx_ft_app, []}},
{applications, [
kernel,
stdlib
stdlib,
gproc
]},
{env, []},
{modules, []}

View File

@ -30,6 +30,20 @@
on_channel_takeovered/3
]).
-export_type([clientid/0]).
-export_type([transfer/0]).
-export_type([offset/0]).
%% Number of bytes
-type bytes() :: non_neg_integer().
%% MQTT Client ID
-type clientid() :: emqx_types:clientid().
-type fileid() :: binary().
-type transfer() :: {clientid(), fileid()}.
-type offset() :: bytes().
-type ft_data() :: #{
nodes := list(node())
}.

View File

@ -0,0 +1,160 @@
%%--------------------------------------------------------------------
%% Copyright (c) 2020-2023 EMQ Technologies Co., Ltd. All Rights Reserved.
%%
%% Licensed under the Apache License, Version 2.0 (the "License");
%% you may not use this file except in compliance with the License.
%% You may obtain a copy of the License at
%%
%% http://www.apache.org/licenses/LICENSE-2.0
%%
%% Unless required by applicable law or agreed to in writing, software
%% distributed under the License is distributed on an "AS IS" BASIS,
%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
%% See the License for the specific language governing permissions and
%% limitations under the License.
%%--------------------------------------------------------------------
-module(emqx_ft_assembler).
-export([start_link/2]).
-behaviour(gen_statem).
-export([callback_mode/0]).
-export([init/1]).
% -export([list_local_fragments/3]).
% -export([list_remote_fragments/3]).
% -export([start_assembling/3]).
-export([handle_event/4]).
% -export([handle_continue/2]).
% -export([handle_call/3]).
% -export([handle_cast/2]).
-record(st, {
storage :: _Storage,
transfer :: emqx_ft:transfer(),
assembly :: _TODO,
file :: io:device(),
hash
}).
-define(RPC_LIST_TIMEOUT, 1000).
-define(RPC_READSEG_TIMEOUT, 5000).
%%
start_link(Storage, Transfer) ->
gen_server:start_link(?MODULE, {Storage, Transfer}, []).
%%
-define(internal(C), {next_event, internal, C}).
callback_mode() ->
handle_event_function.
init({Storage, Transfer}) ->
St = #st{
storage = Storage,
transfer = Transfer,
assembly = emqx_ft_assembly:new(),
hash = crypto:hash_init(sha256)
},
{ok, list_local_fragments, St, ?internal([])}.
handle_event(list_local_fragments, internal, _, St = #st{assembly = Asm}) ->
% TODO: what we do with non-transients errors here (e.g. `eacces`)?
{ok, Fragments} = emqx_ft_storage_fs:list(St#st.storage, St#st.transfer),
NAsm = emqx_ft_assembly:update(emqx_ft_assembly:append(Asm, node(), Fragments)),
NSt = St#st{assembly = NAsm},
case emqx_ft_assembly:status(NAsm) of
complete ->
{next_state, start_assembling, NSt, ?internal([])};
{incomplete, _} ->
Nodes = ekka:nodelist() -- [node()],
{next_state, {list_remote_fragments, Nodes}, NSt, ?internal([])}
% TODO: recovery?
% {error, _} = Reason ->
% {stop, Reason}
end;
handle_event({list_remote_fragments, Nodes}, internal, _, St) ->
% TODO: portable "storage" ref
Args = [St#st.storage, St#st.transfer],
% TODO
% Async would better because we would not need to wait for some lagging nodes if
% the coverage is already complete.
% TODO: BP API?
Results = erpc:multicall(Nodes, emqx_ft_storage_fs, list, Args, ?RPC_LIST_TIMEOUT),
NodeResults = lists:zip(Nodes, Results),
NAsm = emqx_ft_assembly:update(
lists:foldl(
fun
({Node, {ok, {ok, Fragments}}}, Asm) ->
emqx_ft_assembly:append(Asm, Node, Fragments);
({Node, Result}, Asm) ->
% TODO: log?
Asm
end,
St#st.assembly,
NodeResults
)
),
NSt = St#st{assembly = NAsm},
case emqx_ft_assembly:status(NAsm) of
complete ->
{next_state, start_assembling, NSt, ?internal([])};
% TODO: retries / recovery?
{incomplete, _} = Status ->
{stop, {error, Status}}
end;
handle_event(start_assembling, internal, _, St = #st{assembly = Asm}) ->
Filemeta = emqx_ft_assembly:filemeta(Asm),
Coverage = emqx_ft_assembly:coverage(Asm),
% TODO: errors
{ok, Handle} = emqx_ft_storage_fs:open_file(St#st.storage, St#st.transfer, Filemeta),
{next_state, {assemble, Coverage}, St#st{file = Handle}, ?internal([])};
handle_event({assemble, [{Node, Segment} | Rest]}, internal, _, St = #st{}) ->
% TODO
% Currently, race is possible between getting segment info from the remote node and
% this node garbage collecting the segment itself.
Args = [St#st.storage, St#st.transfer, Segment, 0, segsize(Segment)],
% TODO: pipelining
case erpc:call(Node, emqx_ft_storage_fs, read_segment, Args, ?RPC_READSEG_TIMEOUT) of
{ok, Content} ->
{ok, NHandle} = emqx_ft_storage_fs:write(St#st.file, Content),
{next_state, {assemble, Rest}, St#st{file = NHandle}, ?internal([])}
% {error, _} ->
% ...
end;
handle_event({assemble, []}, internal, _, St = #st{}) ->
{next_state, complete, St, ?internal([])};
handle_event(complete, internal, _, St = #st{assembly = Asm, file = Handle}) ->
Filemeta = emqx_ft_assembly:filemeta(Asm),
ok = emqx_ft_storage_fs:complete(St#st.storage, St#st.transfer, Filemeta, Handle),
{stop, shutdown}.
% handle_continue(list_local, St = #st{storage = Storage, transfer = Transfer, assembly = Asm}) ->
% % TODO: what we do with non-transients errors here (e.g. `eacces`)?
% {ok, Fragments} = emqx_ft_storage_fs:list(Storage, Transfer),
% NAsm = emqx_ft_assembly:update(emqx_ft_assembly:append(Asm, node(), Fragments)),
% NSt = St#st{assembly = NAsm},
% case emqx_ft_assembly:status(NAsm) of
% complete ->
% {noreply, NSt, {continue}};
% {more, _} ->
% error(noimpl);
% {error, _} ->
% error(noimpl)
% end,
% {noreply, St}.
% handle_call(_Call, _From, St) ->
% {reply, {error, badcall}, St}.
% handle_cast(_Cast, St) ->
% {noreply, St}.
%%
segsize(#{fragment := {segmentinfo, Info}}) ->
maps:get(size, Info).

View File

@ -0,0 +1,44 @@
%%--------------------------------------------------------------------
%% Copyright (c) 2020-2023 EMQ Technologies Co., Ltd. All Rights Reserved.
%%
%% Licensed under the Apache License, Version 2.0 (the "License");
%% you may not use this file except in compliance with the License.
%% You may obtain a copy of the License at
%%
%% http://www.apache.org/licenses/LICENSE-2.0
%%
%% Unless required by applicable law or agreed to in writing, software
%% distributed under the License is distributed on an "AS IS" BASIS,
%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
%% See the License for the specific language governing permissions and
%% limitations under the License.
%%--------------------------------------------------------------------
-module(emqx_ft_assembler_sup).
-export([start_link/1]).
-export([start_child/3]).
-behaviour(supervisor).
-export([init/1]).
-define(REF(ID), {via, gproc, {n, l, {?MODULE, ID}}}).
start_link(ID) ->
supervisor:start_link(?REF(ID), ?MODULE, []).
start_child(ID, Storage, Transfer) ->
Childspec = #{
id => {Storage, Transfer},
start => {emqx_ft_assembler, start_link, [Storage, Transfer]},
restart => transient
},
supervisor:start_child(?REF(ID), Childspec).
init(_) ->
SupFlags = #{
strategy => one_for_one,
intensity => 100,
period => 1000
},
{ok, SupFlags, []}.

View File

@ -0,0 +1,364 @@
%%--------------------------------------------------------------------
%% Copyright (c) 2020-2023 EMQ Technologies Co., Ltd. All Rights Reserved.
%%
%% Licensed under the Apache License, Version 2.0 (the "License");
%% you may not use this file except in compliance with the License.
%% You may obtain a copy of the License at
%%
%% http://www.apache.org/licenses/LICENSE-2.0
%%
%% Unless required by applicable law or agreed to in writing, software
%% distributed under the License is distributed on an "AS IS" BASIS,
%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
%% See the License for the specific language governing permissions and
%% limitations under the License.
%%--------------------------------------------------------------------
-module(emqx_ft_assembly).
-export([new/0]).
-export([append/3]).
-export([update/1]).
-export([status/1]).
-export([filemeta/1]).
-export([coverage/1]).
-export([properties/1]).
-record(asm, {
status :: _TODO,
coverage :: _TODO,
properties :: _TODO,
meta :: _TODO,
% orddict:orddict(K, V)
segs :: _TODO,
size
}).
new() ->
#asm{
status = {incomplete, {missing, filemeta}},
meta = orddict:new(),
segs = orddict:new(),
size = 0
}.
append(Asm, Node, Fragments) when is_list(Fragments) ->
lists:foldl(fun(F, AsmIn) -> append(AsmIn, Node, F) end, Asm, Fragments);
append(Asm, Node, Fragment = #{fragment := {filemeta, _}}) ->
append_filemeta(Asm, Node, Fragment);
append(Asm, Node, Segment = #{fragment := {segmentinfo, _}}) ->
append_segmentinfo(Asm, Node, Segment).
update(Asm) ->
case status(meta, Asm) of
{complete, _Meta} ->
case status(coverage, Asm) of
{complete, Coverage, Props} ->
Asm#asm{
status = complete,
coverage = Coverage,
properties = Props
};
Status ->
Asm#asm{status = Status}
end;
Status ->
Asm#asm{status = Status}
end.
status(#asm{status = Status}) ->
Status.
filemeta(Asm) ->
case status(meta, Asm) of
{complete, Meta} -> Meta;
_Other -> undefined
end.
coverage(#asm{coverage = Coverage}) ->
Coverage.
properties(#asm{properties = Properties}) ->
Properties.
status(meta, #asm{meta = Meta}) ->
status(meta, orddict:to_list(Meta));
status(meta, [{Meta, {_Node, _Frag}}]) ->
{complete, Meta};
status(meta, []) ->
{incomplete, {missing, filemeta}};
status(meta, [_M1, _M2 | _] = Metas) ->
{error, {inconsistent, [Frag#{node => Node} || {_, {Node, Frag}} <- Metas]}};
status(coverage, #asm{segs = Segments, size = Size}) ->
case coverage(orddict:to_list(Segments), 0, Size) of
Coverage when is_list(Coverage) ->
{complete, Coverage, #{
dominant => dominant(Coverage)
}};
Missing = {missing, _} ->
{incomplete, Missing}
end.
append_filemeta(Asm, Node, Fragment = #{fragment := {filemeta, Meta}}) ->
Asm#asm{
meta = orddict:store(Meta, {Node, Fragment}, Asm#asm.meta)
}.
append_segmentinfo(Asm, Node, Fragment = #{fragment := {segmentinfo, Info}}) ->
Offset = maps:get(offset, Info),
Size = maps:get(size, Info),
End = Offset + Size,
Asm#asm{
% TODO
% In theory it's possible to have two segments with same offset + size on
% different nodes but with differing content. We'd need a checksum to
% be able to disambiguate them though.
segs = orddict:store({Offset, locality(Node), -End, Node}, Fragment, Asm#asm.segs),
size = max(End, Asm#asm.size)
}.
coverage([{{Offset, _, _, _}, _Segment} | Rest], Cursor, Sz) when Offset < Cursor ->
coverage(Rest, Cursor, Sz);
coverage([{{Cursor, _Locality, MEnd, Node}, Segment} | Rest], Cursor, Sz) ->
% NOTE
% We consider only whole fragments here, so for example from the point of view of
% this algo `[{Offset1 = 0, Size1 = 15}, {Offset2 = 10, Size2 = 10}]` has no
% coverage.
case coverage(Rest, -MEnd, Sz) of
Coverage when is_list(Coverage) ->
[{Node, Segment} | Coverage];
Missing = {missing, _} ->
case coverage(Rest, Cursor, Sz) of
CoverageAlt when is_list(CoverageAlt) ->
CoverageAlt;
{missing, _} ->
Missing
end
end;
coverage([{{Offset, _MEnd, _, _}, _Segment} | _], Cursor, _Sz) when Offset > Cursor ->
{missing, {segment, Cursor, Offset}};
coverage([], Cursor, Sz) when Cursor < Sz ->
{missing, {segment, Cursor, Sz}};
coverage([], Cursor, Cursor) ->
[].
dominant(Coverage) ->
% TODO: needs improvement, better defined _dominance_, maybe some score
Freqs = frequencies(fun({Node, Segment}) -> {Node, segsize(Segment)} end, Coverage),
maxfreq(Freqs, node()).
frequencies(Fun, List) ->
lists:foldl(
fun(E, Acc) ->
{K, N} = Fun(E),
maps:update_with(K, fun(M) -> M + N end, N, Acc)
end,
#{},
List
).
maxfreq(Freqs, Init) ->
{_, Max} = maps:fold(
fun
(F, N, {M, _MF}) when N > M -> {N, F};
(_F, _N, {M, MF}) -> {M, MF}
end,
{0, Init},
Freqs
),
Max.
locality(Node) when Node =:= node() ->
% NOTE
% This should prioritize locally available segments over those on remote nodes.
0;
locality(_RemoteNode) ->
1.
segsize(#{fragment := {segmentinfo, Info}}) ->
maps:get(size, Info).
-ifdef(TEST).
-include_lib("eunit/include/eunit.hrl").
incomplete_new_test() ->
?assertEqual(
{incomplete, {missing, filemeta}},
status(update(new()))
).
incomplete_test() ->
?assertEqual(
{incomplete, {missing, filemeta}},
status(
update(
append(new(), node(), [
segmentinfo(p1, 0, 42),
segmentinfo(p1, 42, 100)
])
)
)
).
consistent_test() ->
Asm1 = append(new(), n1, [filemeta(m1, "blarg")]),
Asm2 = append(Asm1, n2, [segmentinfo(s2, 0, 42)]),
Asm3 = append(Asm2, n3, [filemeta(m3, "blarg")]),
?assertMatch({complete, _}, status(meta, Asm3)).
inconsistent_test() ->
Asm1 = append(new(), node(), [segmentinfo(s1, 0, 42)]),
Asm2 = append(Asm1, n1, [filemeta(m1, "blarg")]),
Asm3 = append(Asm2, n2, [segmentinfo(s2, 0, 42), filemeta(m1, "blorg")]),
Asm4 = append(Asm3, n3, [filemeta(m3, "blarg")]),
?assertMatch(
{error,
{inconsistent, [
% blarg < blorg
#{node := n3, path := m3, fragment := {filemeta, #{name := "blarg"}}},
#{node := n2, path := m1, fragment := {filemeta, #{name := "blorg"}}}
]}},
status(meta, Asm4)
).
simple_coverage_test() ->
Node = node(),
Segs = [
{node42, segmentinfo(n1, 20, 30)},
{Node, segmentinfo(n2, 0, 10)},
{Node, segmentinfo(n3, 50, 50)},
{Node, segmentinfo(n4, 10, 10)}
],
Asm = append_many(new(), Segs),
?assertMatch(
{complete,
[
{Node, #{path := n2}},
{Node, #{path := n4}},
{node42, #{path := n1}},
{Node, #{path := n3}}
],
#{dominant := Node}},
status(coverage, Asm)
).
redundant_coverage_test() ->
Node = node(),
Segs = [
{Node, segmentinfo(n1, 0, 20)},
{node1, segmentinfo(n2, 0, 10)},
{Node, segmentinfo(n3, 20, 40)},
{node2, segmentinfo(n4, 10, 10)},
{node2, segmentinfo(n5, 50, 20)},
{node3, segmentinfo(n6, 20, 20)},
{Node, segmentinfo(n7, 50, 10)},
{node1, segmentinfo(n8, 40, 10)}
],
Asm = append_many(new(), Segs),
?assertMatch(
{complete,
[
{Node, #{path := n1}},
{node3, #{path := n6}},
{node1, #{path := n8}},
{node2, #{path := n5}}
],
#{dominant := _}},
status(coverage, Asm)
).
redundant_coverage_prefer_local_test() ->
Node = node(),
Segs = [
{node1, segmentinfo(n1, 0, 20)},
{Node, segmentinfo(n2, 0, 10)},
{Node, segmentinfo(n3, 10, 10)},
{node2, segmentinfo(n4, 20, 20)},
{Node, segmentinfo(n5, 30, 10)},
{Node, segmentinfo(n6, 20, 10)}
],
Asm = append_many(new(), Segs),
?assertMatch(
{complete,
[
{Node, #{path := n2}},
{Node, #{path := n3}},
{Node, #{path := n6}},
{Node, #{path := n5}}
],
#{dominant := Node}},
status(coverage, Asm)
).
missing_coverage_test() ->
Node = node(),
Segs = [
{Node, segmentinfo(n1, 0, 10)},
{node1, segmentinfo(n3, 10, 20)},
{Node, segmentinfo(n2, 0, 20)},
{node2, segmentinfo(n4, 50, 50)},
{Node, segmentinfo(n5, 40, 60)}
],
Asm = append_many(new(), Segs),
?assertEqual(
% {incomplete, {missing, {segment, 30, 40}}}, ???
{incomplete, {missing, {segment, 20, 40}}},
status(coverage, Asm)
).
missing_end_coverage_test() ->
Node = node(),
Segs = [
{Node, segmentinfo(n1, 0, 15)},
{node1, segmentinfo(n3, 10, 10)}
],
Asm = append_many(new(), Segs),
?assertEqual(
{incomplete, {missing, {segment, 15, 20}}},
status(coverage, Asm)
).
missing_coverage_with_redudancy_test() ->
Segs = [
{node(), segmentinfo(n1, 0, 10)},
{node(), segmentinfo(n2, 0, 20)},
{node42, segmentinfo(n3, 10, 20)},
{node43, segmentinfo(n4, 10, 50)},
{node(), segmentinfo(n5, 40, 60)}
],
Asm = append_many(new(), Segs),
?assertEqual(
% {incomplete, {missing, {segment, 50, 60}}}, ???
{incomplete, {missing, {segment, 20, 40}}},
status(coverage, Asm)
).
append_many(Asm, List) ->
lists:foldl(
fun({Node, Frag}, Acc) -> append(Acc, Node, Frag) end,
Asm,
List
).
filemeta(Path, Name) ->
#{
path => Path,
fragment =>
{filemeta, #{
name => Name
}}
}.
segmentinfo(Path, Offset, Size) ->
#{
path => Path,
fragment =>
{segmentinfo, #{
offset => Offset,
size => Size
}}
}.
-endif.

View File

@ -0,0 +1,425 @@
%%--------------------------------------------------------------------
%% Copyright (c) 2020-2023 EMQ Technologies Co., Ltd. All Rights Reserved.
%%
%% Licensed under the Apache License, Version 2.0 (the "License");
%% you may not use this file except in compliance with the License.
%% You may obtain a copy of the License at
%%
%% http://www.apache.org/licenses/LICENSE-2.0
%%
%% Unless required by applicable law or agreed to in writing, software
%% distributed under the License is distributed on an "AS IS" BASIS,
%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
%% See the License for the specific language governing permissions and
%% limitations under the License.
%%--------------------------------------------------------------------
-module(emqx_ft_storage_fs).
-include_lib("typerefl/include/types.hrl").
-include_lib("hocon/include/hoconsc.hrl").
% -compile(export_all).
-export([store_filemeta/3]).
-export([store_segment/3]).
-export([list/2]).
-export([assemble/2]).
-export([open_file/3]).
-export([complete/4]).
-export([write/2]).
-export([discard/1]).
% -behaviour(gen_server).
% -export([init/1]).
% -export([handle_call/3]).
% -export([handle_cast/2]).
-type json_value() ::
null
| boolean()
| binary()
| number()
| [json_value()]
| #{binary() => json_value()}.
-reflect_type([json_value/0]).
-type transfer() :: emqx_ft:transfer().
-type offset() :: emqx_ft:offset().
%% TODO: move to `emqx_ft` interface module
% -type sha256_hex() :: <<_:512>>.
-type filemeta() :: #{
%% Display name
name := string(),
%% Size in bytes, as advertised by the client.
%% Client is free to specify here whatever it wants, which means we can end
%% up with a file of different size after assembly. It's not clear from
%% specification what that means (e.g. what are clients' expectations), we
%% currently do not condider that an error (or, specifically, a signal that
%% the resulting file is corrupted during transmission).
size => _Bytes :: non_neg_integer(),
checksum => {sha256, <<_:256>>},
expire_at := emqx_datetime:epoch_second(),
%% TTL of individual segments
%% Somewhat confusing that we won't know it on the nodes where the filemeta
%% is missing.
segments_ttl => _Seconds :: pos_integer(),
user_data => json_value()
}.
-type segment() :: {offset(), _Content :: binary()}.
-type segmentinfo() :: #{
offset := offset(),
size := _Bytes :: non_neg_integer()
}.
-type filefrag(T) :: #{
path := file:name(),
timestamp := emqx_datetime:epoch_second(),
fragment := T
}.
-type filefrag() :: filefrag({filemeta, filemeta()} | {segment, segmentinfo()}).
-define(MANIFEST, "MANIFEST.json").
-define(SEGMENT, "SEG").
-define(TEMP, "TMP").
-type root() :: file:name().
% -record(st, {
% root :: file:name()
% }).
%% TODO
-type storage() :: root().
%%
-define(PROCREF(Root), {via, gproc, {n, l, {?MODULE, Root}}}).
-spec start_link(root()) ->
{ok, pid()} | {error, already_started}.
start_link(Root) ->
gen_server:start_link(?PROCREF(Root), ?MODULE, [], []).
%% Store manifest in the backing filesystem.
%% Atomic operation.
-spec store_filemeta(storage(), transfer(), filemeta()) ->
% Quota? Some lower level errors?
ok | {error, conflict} | {error, _TODO}.
store_filemeta(Storage, Transfer, Meta) ->
Filepath = mk_filepath(Storage, Transfer, ?MANIFEST),
case read_file(Filepath, fun decode_filemeta/1) of
{ok, Meta} ->
_ = touch_file(Filepath),
ok;
{ok, Conflict} ->
% TODO
% We won't see conflicts in case of concurrent `store_filemeta`
% requests. It's rather odd scenario so it's fine not to worry
% about it too much now.
{error, conflict};
{error, Reason} when Reason =:= notfound; Reason =:= corrupted ->
write_file_atomic(Filepath, encode_filemeta(Meta))
end.
%% Store a segment in the backing filesystem.
%% Atomic operation.
-spec store_segment(storage(), transfer(), segment()) ->
% Where is the checksum gets verified? Upper level probably.
% Quota? Some lower level errors?
ok | {error, _TODO}.
store_segment(Storage, Transfer, Segment = {_Offset, Content}) ->
Filepath = mk_filepath(Storage, Transfer, mk_segment_filename(Segment)),
write_file_atomic(Filepath, Content).
-spec list(storage(), transfer()) ->
% Some lower level errors? {error, notfound}?
% Result will contain zero or only one filemeta.
{ok, list(filefrag())} | {error, _TODO}.
list(Storage, Transfer) ->
Dirname = mk_filedir(Storage, Transfer),
case file:list_dir(Dirname) of
{ok, Filenames} ->
{ok, filtermap_files(fun mk_filefrag/2, Dirname, Filenames)};
{error, enoent} ->
{ok, []};
{error, _} = Error ->
Error
end.
-spec assemble(storage(), transfer()) ->
% {ok, _Assembler :: pid()} | {error, incomplete} | {error, badrpc} | {error, _TODO}.
{ok, _Assembler :: pid()} | {error, _TODO}.
assemble(Storage, Transfer) ->
emqx_ft_assembler_sup:start_child(Storage, Storage, Transfer).
%%
-opaque handle() :: {file:name(), io:device(), crypto:hash_state()}.
-spec open_file(storage(), transfer(), filemeta()) ->
{ok, handle()} | {error, _TODO}.
open_file(Storage, Transfer, Filemeta) ->
Filename = maps:get(name, Filemeta),
Filepath = mk_filepath(Storage, Transfer, Filename),
TempFilepath = mk_temp_filepath(Filepath),
case file:open(TempFilepath, [write, raw]) of
{ok, Handle} ->
_ = file:truncate(Handle),
{ok, {TempFilepath, Handle, init_checksum(Filemeta)}};
{error, _} = Error ->
Error
end.
-spec write(handle(), iodata()) ->
ok | {error, _TODO}.
write({Filepath, IoDevice, Ctx}, IoData) ->
case file:write(IoDevice, IoData) of
ok ->
{ok, {Filepath, IoDevice, update_checksum(Ctx, IoData)}};
{error, _} = Error ->
Error
end.
-spec complete(storage(), transfer(), filemeta(), handle()) ->
ok | {error, {checksum, _Algo, _Computed}} | {error, _TODO}.
complete(Storage, Transfer, Filemeta, Handle = {Filepath, IoDevice, Ctx}) ->
TargetFilepath = mk_filepath(Storage, Transfer, maps:get(name, Filemeta)),
case verify_checksum(Ctx, Filemeta) of
ok ->
ok = file:close(IoDevice),
file:rename(Filepath, TargetFilepath);
{error, _} = Error ->
_ = discard(Handle),
Error
end.
-spec discard(handle()) ->
ok.
discard({Filepath, IoDevice, _Ctx}) ->
ok = file:close(IoDevice),
file:delete(Filepath).
init_checksum(#{checksum := {Algo, _}}) ->
crypto:hash_init(Algo);
init_checksum(#{}) ->
undefined.
update_checksum(Ctx, IoData) when Ctx /= undefined ->
crypto:hash_update(Ctx, IoData);
update_checksum(undefined, _IoData) ->
undefined.
verify_checksum(Ctx, #{checksum := {Algo, Digest}}) when Ctx /= undefined ->
case crypto:hash_final(Ctx) of
Digest ->
ok;
Mismatch ->
{error, {checksum, Algo, binary:encode_hex(Mismatch)}}
end;
verify_checksum(undefined, _) ->
ok.
%%
-spec init(root()) -> {ok, storage()}.
init(Root) ->
% TODO: garbage_collect(...)
{ok, Root}.
%%
-define(PRELUDE(Vsn, Meta), [<<"filemeta">>, Vsn, Meta]).
schema() ->
#{
roots => [
{name, hoconsc:mk(string(), #{required => true})},
{size, hoconsc:mk(non_neg_integer())},
{expire_at, hoconsc:mk(non_neg_integer(), #{required => true})},
{checksum, hoconsc:mk({atom(), binary()}, #{converter => converter(checksum)})},
{segments_ttl, hoconsc:mk(pos_integer())},
{user_data, hoconsc:mk(json_value())}
]
}.
% encode_filemeta(Meta) ->
% emqx_json:encode(
% ?PRELUDE(
% _Vsn = 1,
% maps:map(
% fun
% (name, Name) ->
% {<<"name">>, Name};
% (size, Size) ->
% {<<"size">>, Size};
% (checksum, {sha256, Hash}) ->
% {<<"checksum">>, <<"sha256:", (binary:encode_hex(Hash))/binary>>};
% (expire_at, ExpiresAt) ->
% {<<"expire_at">>, ExpiresAt};
% (segments_ttl, TTL) ->
% {<<"segments_ttl">>, TTL};
% (user_data, UserData) ->
% {<<"user_data">>, UserData}
% end,
% Meta
% )
% )
% ).
encode_filemeta(Meta) ->
% TODO: Looks like this should be hocon's responsibility.
Term = hocon_tconf:make_serializable(schema(), emqx_map_lib:binary_key_map(Meta), #{}),
emqx_json:encode(?PRELUDE(_Vsn = 1, Term)).
decode_filemeta(Binary) ->
?PRELUDE(_Vsn = 1, Term) = emqx_json:decode(Binary, [return_maps]),
hocon_tconf:check_plain(schema(), Term, #{atom_key => true, required => false}).
converter(checksum) ->
fun
(undefined, #{}) ->
undefined;
({sha256, Bin}, #{make_serializable := true}) ->
_ = is_binary(Bin) orelse throw({expected_type, string}),
_ = byte_size(Bin) =:= 32 orelse throw({expected_length, 32}),
binary:encode_hex(Bin);
(Hex, #{}) ->
_ = is_binary(Hex) orelse throw({expected_type, string}),
_ = byte_size(Hex) =:= 64 orelse throw({expected_length, 64}),
{sha256, binary:decode_hex(Hex)}
end.
% map_into(Fun, Into, Ks, Map) ->
% map_foldr(map_into_fn(Fun, Into), Into, Ks, Map).
% map_into_fn(Fun, L) when is_list(L) ->
% fun(K, V, Acc) -> [{K, Fun(K, V)} || Acc] end.
% map_foldr(_Fun, Acc, [], _) ->
% Acc;
% map_foldr(Fun, Acc, [K | Ks], Map) when is_map_key(K, Map) ->
% Fun(K, maps:get(K, Map), map_foldr(Fun, Acc, Ks, Map));
% map_foldr(Fun, Acc, [_ | Ks], Map) ->
% map_foldr(Fun, Acc, Ks, Map).
%%
mk_segment_filename({Offset, Content}) ->
lists:concat([?SEGMENT, ".", Offset, ".", byte_size(Content)]).
break_segment_filename(Filename) ->
Regex = "^" ?SEGMENT "[.]([0-9]+)[.]([0-9]+)$",
Result = re:run(Filename, Regex, [{capture, all_but_first, list}]),
case Result of
{match, [Offset, Size]} ->
{ok, #{offset => list_to_integer(Offset), size => list_to_integer(Size)}};
nomatch ->
{error, invalid}
end.
mk_filedir(Storage, {ClientId, FileId}) ->
filename:join([get_storage_root(Storage), ClientId, FileId]).
mk_filepath(Storage, Transfer, Filename) ->
filename:join(mk_filedir(Storage, Transfer), Filename).
get_storage_root(Storage) ->
Storage.
%%
-include_lib("kernel/include/file.hrl").
read_file(Filepath) ->
file:read_file(Filepath).
read_file(Filepath, DecodeFun) ->
case read_file(Filepath) of
{ok, Content} ->
safe_decode(Content, DecodeFun);
{error, _} = Error ->
Error
end.
safe_decode(Content, DecodeFun) ->
try
{ok, DecodeFun(Content)}
catch
C:R:Stacktrace ->
% TODO: Log?
{error, corrupted}
end.
write_file_atomic(Filepath, Content) when is_binary(Content) ->
Result = emqx_misc:pipeline(
[
fun filelib:ensure_dir/1,
fun mk_temp_filepath/1,
fun write_contents/2,
fun mv_temp_file/1
],
Filepath,
Content
),
case Result of
{ok, {Filepath, TempFilepath}, _} ->
_ = file:delete(TempFilepath),
ok;
{error, Reason, _} ->
{error, Reason}
end.
mk_temp_filepath(Filepath) ->
Dirname = filename:dirname(Filepath),
Filename = filename:basename(Filepath),
Unique = erlang:unique_integer([positive]),
TempFilepath = filename:join(Dirname, ?TEMP ++ integer_to_list(Unique) ++ "." ++ Filename),
{Filepath, TempFilepath}.
write_contents({_Filepath, TempFilepath}, Content) ->
file:write_file(TempFilepath, Content).
mv_temp_file({Filepath, TempFilepath}) ->
file:rename(TempFilepath, Filepath).
touch_file(Filepath) ->
Now = erlang:localtime(),
file:change_time(Filepath, _Mtime = Now, _Atime = Now).
filtermap_files(Fun, Dirname, Filenames) ->
lists:filtermap(fun(Filename) -> Fun(Dirname, Filename) end, Filenames).
mk_filefrag(Dirname, Filename = ?MANIFEST) ->
mk_filefrag(Dirname, Filename, fun read_filemeta/2);
mk_filefrag(Dirname, Filename = ?SEGMENT ++ _) ->
mk_filefrag(Dirname, Filename, fun read_segmentinfo/2);
mk_filefrag(_Dirname, _) ->
false.
mk_filefrag(Dirname, Filename, Fun) ->
Filepath = filename:join(Dirname, Filename),
Fileinfo = file:read_file_info(Filepath),
case Fun(Filename, Filepath) of
{ok, Frag} ->
{true, #{
path => Filepath,
timestamp => Fileinfo#file_info.mtime,
fragment => Frag
}};
{error, Reason} ->
false
end.
read_filemeta(_Filename, Filepath) ->
read_file(Filepath, fun decode_filemeta/1).
read_segmentinfo(Filename, _Filepath) ->
break_segment_filename(Filename).