diff --git a/apps/emqx_ft/src/emqx_ft_fs_util.erl b/apps/emqx_ft/src/emqx_ft_fs_util.erl index 198f4ccc5..75fb47c27 100644 --- a/apps/emqx_ft/src/emqx_ft_fs_util.erl +++ b/apps/emqx_ft/src/emqx_ft_fs_util.erl @@ -18,6 +18,10 @@ -include_lib("snabbkaffe/include/trace.hrl"). +-export([is_filename_safe/1]). +-export([escape_filename/1]). +-export([unescape_filename/1]). + -export([read_decode_file/2]). -export([fold/4]). @@ -35,6 +39,86 @@ ) -> Acc ). +-define(IS_UNSAFE(C), + ((C) =:= $% orelse + (C) =:= $: orelse + (C) =:= $\\ orelse + (C) =:= $/) +). + +-define(IS_PRINTABLE(C), + % NOTE: See `io_lib:printable_unicode_list/1` + (((C) >= 32 andalso (C) =< 126) orelse + ((C) >= 16#A0 andalso (C) < 16#D800) orelse + ((C) > 16#DFFF andalso (C) < 16#FFFE) orelse + ((C) > 16#FFFF andalso (C) =< 16#10FFFF)) +). + +%% + +-spec is_filename_safe(file:filename_all()) -> ok | {error, atom()}. +is_filename_safe(FN) when is_binary(FN) -> + is_filename_safe(unicode:characters_to_list(FN)); +is_filename_safe("") -> + {error, empty}; +is_filename_safe(FN) when FN == "." orelse FN == ".." -> + {error, special}; +is_filename_safe(FN) -> + verify_filename_safe(FN). + +verify_filename_safe([$% | Rest]) -> + verify_filename_safe(Rest); +verify_filename_safe([C | _]) when ?IS_UNSAFE(C) -> + {error, unsafe}; +verify_filename_safe([C | _]) when not ?IS_PRINTABLE(C) -> + {error, nonprintable}; +verify_filename_safe([_ | Rest]) -> + verify_filename_safe(Rest); +verify_filename_safe([]) -> + ok. + +-spec escape_filename(binary()) -> file:name(). +escape_filename(Name) when Name == <<".">> orelse Name == <<"..">> -> + lists:reverse(percent_encode(Name, "")); +escape_filename(Name) -> + escape(Name, ""). + +escape(<>, Acc) when ?IS_UNSAFE(C) -> + escape(Rest, percent_encode(<>, Acc)); +escape(<>, Acc) when not ?IS_PRINTABLE(C) -> + escape(Rest, percent_encode(<>, Acc)); +escape(<>, Acc) -> + escape(Rest, [C | Acc]); +escape(<<>>, Acc) -> + lists:reverse(Acc). + +-spec unescape_filename(file:name()) -> binary(). +unescape_filename(Name) -> + unescape(Name, <<>>). + +unescape([$%, A, B | Rest], Acc) -> + unescape(Rest, percent_decode(A, B, Acc)); +unescape([C | Rest], Acc) -> + unescape(Rest, <>); +unescape([], Acc) -> + Acc. + +percent_encode(<>, Acc) -> + percent_encode(Rest, [dec2hex(B), dec2hex(A), $% | Acc]); +percent_encode(<<>>, Acc) -> + Acc. + +percent_decode(A, B, Acc) -> + <>. + +dec2hex(X) when (X >= 0) andalso (X =< 9) -> X + $0; +dec2hex(X) when (X >= 10) andalso (X =< 15) -> X + $A - 10. + +hex2dec(X) when (X >= $0) andalso (X =< $9) -> X - $0; +hex2dec(X) when (X >= $A) andalso (X =< $F) -> X - $A + 10; +hex2dec(X) when (X >= $a) andalso (X =< $f) -> X - $a + 10; +hex2dec(_) -> error(badarg). + %% -spec read_decode_file(file:name(), fun((binary()) -> Value)) -> diff --git a/apps/emqx_ft/src/emqx_ft_schema.erl b/apps/emqx_ft/src/emqx_ft_schema.erl index 37e2adafc..5c0a764a2 100644 --- a/apps/emqx_ft/src/emqx_ft_schema.erl +++ b/apps/emqx_ft/src/emqx_ft_schema.erl @@ -130,8 +130,11 @@ desc(local_storage_gc) -> schema(filemeta) -> #{ roots => [ - % TODO nonempty - {name, hoconsc:mk(string(), #{required => true})}, + {name, + hoconsc:mk(string(), #{ + required => true, + validator => validator(filename) + })}, {size, hoconsc:mk(non_neg_integer())}, {expire_at, hoconsc:mk(non_neg_integer())}, {checksum, hoconsc:mk({atom(), binary()}, #{converter => converter(checksum)})}, @@ -140,6 +143,9 @@ schema(filemeta) -> ] }. +validator(filename) -> + fun emqx_ft_fs_util:is_filename_safe/1. + converter(checksum) -> fun (undefined, #{}) -> diff --git a/apps/emqx_ft/src/emqx_ft_storage_exporter_fs.erl b/apps/emqx_ft/src/emqx_ft_storage_exporter_fs.erl index e0bffd444..6456e2ba5 100644 --- a/apps/emqx_ft/src/emqx_ft_storage_exporter_fs.erl +++ b/apps/emqx_ft/src/emqx_ft_storage_exporter_fs.erl @@ -343,7 +343,16 @@ mk_result_reldir(Transfer = {ClientId, FileId}) -> Bucket2:?BUCKET2_LEN/binary, BucketRest/binary >> = binary:encode_hex(Hash), - [Bucket1, Bucket2, BucketRest, ClientId, FileId]. + [ + Bucket1, + Bucket2, + BucketRest, + emqx_ft_fs_util:escape_filename(ClientId), + emqx_ft_fs_util:escape_filename(FileId) + ]. + +dirnames_to_transfer(ClientId, FileId) -> + {emqx_ft_fs_util:unescape_filename(ClientId), emqx_ft_fs_util:unescape_filename(FileId)}. mk_transfer_hash(Transfer) -> crypto:hash(?BUCKET_HASH, term_to_binary(Transfer)). diff --git a/apps/emqx_ft/src/emqx_ft_storage_fs.erl b/apps/emqx_ft/src/emqx_ft_storage_fs.erl index 4a613dbcf..6c5b6962c 100644 --- a/apps/emqx_ft/src/emqx_ft_storage_fs.erl +++ b/apps/emqx_ft/src/emqx_ft_storage_fs.erl @@ -250,12 +250,12 @@ transfers(Storage) -> )}. transfers(Storage, ClientId, AccIn) -> - Dirname = mk_client_filedir(Storage, ClientId), + Dirname = filename:join(get_storage_root(Storage), ClientId), case file:list_dir(Dirname) of {ok, FileIds} -> lists:foldl( fun(FileId, Acc) -> - Transfer = {filename_to_binary(ClientId), filename_to_binary(FileId)}, + Transfer = dirnames_to_transfer(ClientId, FileId), read_transferinfo(Storage, Transfer, Acc) end, AccIn, @@ -327,10 +327,15 @@ break_segment_filename(Filename) -> end. mk_filedir(Storage, {ClientId, FileId}, SubDirs) -> - filename:join([get_storage_root(Storage), ClientId, FileId | SubDirs]). + filename:join([ + get_storage_root(Storage), + emqx_ft_fs_util:escape_filename(ClientId), + emqx_ft_fs_util:escape_filename(FileId) + | SubDirs + ]). -mk_client_filedir(Storage, ClientId) -> - filename:join([get_storage_root(Storage), ClientId]). +dirnames_to_transfer(ClientId, FileId) -> + {emqx_ft_fs_util:unescape_filename(ClientId), emqx_ft_fs_util:unescape_filename(FileId)}. mk_filepath(Storage, Transfer, SubDirs, Filename) -> filename:join(mk_filedir(Storage, Transfer, SubDirs), Filename). @@ -432,6 +437,3 @@ read_frag_filemeta(_Filename, Filepath) -> read_frag_segmentinfo(Filename, _Filepath) -> break_segment_filename(Filename). - -filename_to_binary(S) when is_list(S) -> unicode:characters_to_binary(S); -filename_to_binary(B) when is_binary(B) -> B. diff --git a/apps/emqx_ft/test/emqx_ft_SUITE.erl b/apps/emqx_ft/test/emqx_ft_SUITE.erl index 5fb384c16..1a886c00d 100644 --- a/apps/emqx_ft/test/emqx_ft_SUITE.erl +++ b/apps/emqx_ft/test/emqx_ft_SUITE.erl @@ -165,6 +165,29 @@ t_invalid_fileid(Config) -> emqtt:publish(C, <<"$file//init">>, <<>>, 1) ). +t_invalid_filename(Config) -> + C = ?config(client, Config), + ?assertRCName( + unspecified_error, + emqtt:publish(C, mk_init_topic(<<"f1">>), emqx_json:encode(meta(".", <<>>)), 1) + ), + ?assertRCName( + unspecified_error, + emqtt:publish(C, mk_init_topic(<<"f2">>), emqx_json:encode(meta("..", <<>>)), 1) + ), + ?assertRCName( + unspecified_error, + emqtt:publish(C, mk_init_topic(<<"f2">>), emqx_json:encode(meta("../nice", <<>>)), 1) + ), + ?assertRCName( + unspecified_error, + emqtt:publish(C, mk_init_topic(<<"f3">>), emqx_json:encode(meta("/etc/passwd", <<>>)), 1) + ), + ?assertRCName( + success, + emqtt:publish(C, mk_init_topic(<<"f4">>), emqx_json:encode(meta("146%", <<>>)), 1) + ). + t_simple_transfer(Config) -> C = ?config(client, Config), @@ -202,6 +225,24 @@ t_simple_transfer(Config) -> read_export(Export) ). +t_nasty_clientids_fileids(_Config) -> + Transfers = [ + {<<".">>, <<".">>}, + {<<"🌚"/utf8>>, <<"🌝"/utf8>>}, + {<<"../..">>, <<"😤"/utf8>>}, + {<<"/etc/passwd">>, <<"whitehat">>}, + {<<"; rm -rf / ;">>, <<"whitehat">>} + ], + + ok = lists:foreach( + fun({ClientId, FileId}) -> + ok = emqx_ft_test_helpers:upload_file(ClientId, FileId, "justfile", ClientId), + [Export] = list_exports(ClientId), + ?assertEqual({ok, ClientId}, read_export(Export)) + end, + Transfers + ). + t_meta_conflict(Config) -> C = ?config(client, Config), diff --git a/apps/emqx_ft/test/emqx_ft_api_SUITE.erl b/apps/emqx_ft/test/emqx_ft_api_SUITE.erl index aff4d864c..56622f3cd 100644 --- a/apps/emqx_ft/test/emqx_ft_api_SUITE.erl +++ b/apps/emqx_ft/test/emqx_ft_api_SUITE.erl @@ -60,7 +60,7 @@ end_per_testcase(_Case, _Config) -> t_list_ready_transfers(Config) -> ClientId = client_id(Config), - ok = emqx_ft_test_helpers:upload_file(ClientId, <<"f1">>, <<"data">>, node()), + ok = emqx_ft_test_helpers:upload_file(ClientId, <<"f1">>, "f1", <<"data">>), {ok, 200, #{<<"files">> := Files}} = request(get, uri(["file_transfer", "files"]), fun json/1), @@ -73,7 +73,7 @@ t_list_ready_transfers(Config) -> t_download_transfer(Config) -> ClientId = client_id(Config), - ok = emqx_ft_test_helpers:upload_file(ClientId, <<"f1">>, <<"data">>, node()), + ok = emqx_ft_test_helpers:upload_file(ClientId, <<"f1">>, "f1", <<"data">>), ?assertMatch( {ok, 400, #{<<"code">> := <<"BAD_REQUEST">>}}, diff --git a/apps/emqx_ft/test/emqx_ft_fs_util_tests.erl b/apps/emqx_ft/test/emqx_ft_fs_util_tests.erl new file mode 100644 index 000000000..1939e74c6 --- /dev/null +++ b/apps/emqx_ft/test/emqx_ft_fs_util_tests.erl @@ -0,0 +1,65 @@ +%%-------------------------------------------------------------------- +%% Copyright (c) 2020-2023 EMQ Technologies Co., Ltd. All Rights Reserved. +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. +%%-------------------------------------------------------------------- + +-module(emqx_ft_fs_util_tests). + +-include_lib("eunit/include/eunit.hrl"). + +filename_safe_test_() -> + [ + ?_assertEqual(ok, emqx_ft_fs_util:is_filename_safe("im.safe")), + ?_assertEqual(ok, emqx_ft_fs_util:is_filename_safe(<<"im.safe">>)), + ?_assertEqual(ok, emqx_ft_fs_util:is_filename_safe(<<".safe.100%">>)), + ?_assertEqual(ok, emqx_ft_fs_util:is_filename_safe(<<"safe.as.🦺"/utf8>>)) + ]. + +filename_unsafe_test_() -> + [ + ?_assertEqual({error, empty}, emqx_ft_fs_util:is_filename_safe("")), + ?_assertEqual({error, special}, emqx_ft_fs_util:is_filename_safe(".")), + ?_assertEqual({error, special}, emqx_ft_fs_util:is_filename_safe("..")), + ?_assertEqual({error, special}, emqx_ft_fs_util:is_filename_safe(<<"..">>)), + ?_assertEqual({error, unsafe}, emqx_ft_fs_util:is_filename_safe(<<".././..">>)), + ?_assertEqual({error, unsafe}, emqx_ft_fs_util:is_filename_safe("/etc/passwd")), + ?_assertEqual({error, unsafe}, emqx_ft_fs_util:is_filename_safe("../cookie")), + ?_assertEqual({error, unsafe}, emqx_ft_fs_util:is_filename_safe("C:$cookie")), + ?_assertEqual({error, nonprintable}, emqx_ft_fs_util:is_filename_safe([1, 2, 3])), + ?_assertEqual({error, nonprintable}, emqx_ft_fs_util:is_filename_safe(<<4, 5, 6>>)), + ?_assertEqual({error, nonprintable}, emqx_ft_fs_util:is_filename_safe([$a, 16#7F, $z])) + ]. + +-define(NAMES, [ + {"just.file", <<"just.file">>}, + {".hidden", <<".hidden">>}, + {".~what", <<".~what">>}, + {"100%25.file", <<"100%.file">>}, + {"%2E%2E", <<"..">>}, + {"...", <<"...">>}, + {"%2Fetc%2Fpasswd", <<"/etc/passwd">>}, + {"%01%02%0A ", <<1, 2, 10, 32>>} +]). + +escape_filename_test_() -> + [ + ?_assertEqual(Filename, emqx_ft_fs_util:escape_filename(Input)) + || {Filename, Input} <- ?NAMES + ]. + +unescape_filename_test_() -> + [ + ?_assertEqual(Input, emqx_ft_fs_util:unescape_filename(Filename)) + || {Filename, Input} <- ?NAMES + ]. diff --git a/apps/emqx_ft/test/emqx_ft_test_helpers.erl b/apps/emqx_ft/test/emqx_ft_test_helpers.erl index e62c74d81..a38242629 100644 --- a/apps/emqx_ft/test/emqx_ft_test_helpers.erl +++ b/apps/emqx_ft/test/emqx_ft_test_helpers.erl @@ -60,14 +60,17 @@ tcp_port(Node) -> root(Config, Node, Tail) -> filename:join([?config(priv_dir, Config), "file_transfer", Node | Tail]). -upload_file(ClientId, FileId, Data, Node) -> +upload_file(ClientId, FileId, Name, Data) -> + upload_file(ClientId, FileId, Name, Data, node()). + +upload_file(ClientId, FileId, Name, Data, Node) -> Port = tcp_port(Node), Size = byte_size(Data), {ok, C1} = emqtt:start_link([{proto_ver, v5}, {clientid, ClientId}, {port, Port}]), {ok, _} = emqtt:connect(C1), Meta = #{ - name => FileId, + name => unicode:characters_to_binary(Name), expire_at => erlang:system_time(_Unit = second) + 3600, size => Size },