From 69b98c183078d7a0f49c6d61d42873dc86394ecd Mon Sep 17 00:00:00 2001 From: Andrew Mayorov Date: Mon, 5 Jun 2023 23:28:48 +0300 Subject: [PATCH 1/2] fix(ft): ensure temp filenames are under 255 bytes long --- apps/emqx_ft/src/emqx_ft_fs_util.erl | 23 +++++++++++++++++++ apps/emqx_ft/src/emqx_ft_schema.erl | 4 ++-- .../src/emqx_ft_storage_exporter_fs.erl | 3 +-- apps/emqx_ft/src/emqx_ft_storage_fs.erl | 12 ++-------- apps/emqx_ft/test/emqx_ft_SUITE.erl | 3 ++- apps/emqx_ft/test/emqx_ft_conf_SUITE.erl | 6 ++--- apps/emqx_ft/test/emqx_ft_fs_util_tests.erl | 21 +++++++++++++++++ 7 files changed, 54 insertions(+), 18 deletions(-) diff --git a/apps/emqx_ft/src/emqx_ft_fs_util.erl b/apps/emqx_ft/src/emqx_ft_fs_util.erl index 9028722aa..544585110 100644 --- a/apps/emqx_ft/src/emqx_ft_fs_util.erl +++ b/apps/emqx_ft/src/emqx_ft_fs_util.erl @@ -29,6 +29,8 @@ -export([fold/4]). +-export([mk_temp_filename/1]). + -type foldfun(Acc) :: fun( ( @@ -178,3 +180,24 @@ fold(FoldFun, Acc, It) -> none -> Acc end. + +-spec mk_temp_filename(file:filename()) -> + file:filename(). +mk_temp_filename(Filename) -> + % NOTE + % Using only the first 200 characters of the filename to avoid making filenames + % exceeding 255 bytes in UTF-8. It's actually too conservative, `Suffix` can be + % at most 16 bytes. + Unique = erlang:unique_integer([positive]), + Suffix = binary:encode_hex(<>), + mk_filename([string:slice(Filename, 0, 200), ".", Suffix]). + +mk_filename(Comps) -> + lists:append(lists:map(fun mk_filename_component/1, Comps)). + +mk_filename_component(A) when is_atom(A) -> + atom_to_list(A); +mk_filename_component(B) when is_binary(B) -> + unicode:characters_to_list(B); +mk_filename_component(S) when is_list(S) -> + S. diff --git a/apps/emqx_ft/src/emqx_ft_schema.erl b/apps/emqx_ft/src/emqx_ft_schema.erl index 37508fe3e..2b98562b4 100644 --- a/apps/emqx_ft/src/emqx_ft_schema.erl +++ b/apps/emqx_ft/src/emqx_ft_schema.erl @@ -145,7 +145,7 @@ fields(local_storage_segments) -> [ {root, mk( - binary(), + string(), #{ desc => ?DESC("local_storage_segments_root"), required => false @@ -182,7 +182,7 @@ fields(local_storage_exporter) -> [ {root, mk( - binary(), + string(), #{ desc => ?DESC("local_storage_exporter_root"), required => false diff --git a/apps/emqx_ft/src/emqx_ft_storage_exporter_fs.erl b/apps/emqx_ft/src/emqx_ft_storage_exporter_fs.erl index 702bc35ce..ae709dd52 100644 --- a/apps/emqx_ft/src/emqx_ft_storage_exporter_fs.erl +++ b/apps/emqx_ft/src/emqx_ft_storage_exporter_fs.erl @@ -452,8 +452,7 @@ mk_manifest_filename(Filename) when is_binary(Filename) -> <>. mk_temp_absfilepath(Options, Transfer, Filename) -> - Unique = erlang:unique_integer([positive]), - TempFilename = integer_to_list(Unique) ++ "." ++ Filename, + TempFilename = emqx_ft_fs_util:mk_temp_filename(Filename), filename:join(mk_absdir(Options, Transfer, temporary), TempFilename). mk_absdir(Options, _Transfer, temporary) -> diff --git a/apps/emqx_ft/src/emqx_ft_storage_fs.erl b/apps/emqx_ft/src/emqx_ft_storage_fs.erl index 99720f521..e84d35328 100644 --- a/apps/emqx_ft/src/emqx_ft_storage_fs.erl +++ b/apps/emqx_ft/src/emqx_ft_storage_fs.erl @@ -445,16 +445,8 @@ write_file_atomic(Storage, Transfer, Filepath, Content) when is_binary(Content) end. mk_temp_filepath(Storage, Transfer, Filename) -> - Unique = erlang:unique_integer([positive]), - filename:join(get_subdir(Storage, Transfer, temporary), mk_filename([Unique, ".", Filename])). - -mk_filename(Comps) -> - lists:append(lists:map(fun mk_filename_component/1, Comps)). - -mk_filename_component(I) when is_integer(I) -> integer_to_list(I); -mk_filename_component(A) when is_atom(A) -> atom_to_list(A); -mk_filename_component(B) when is_binary(B) -> unicode:characters_to_list(B); -mk_filename_component(S) when is_list(S) -> S. + TempFilename = emqx_ft_fs_util:mk_temp_filename(Filename), + filename:join(get_subdir(Storage, Transfer, temporary), TempFilename). write_contents(Filepath, Content) -> file:write_file(Filepath, Content). diff --git a/apps/emqx_ft/test/emqx_ft_SUITE.erl b/apps/emqx_ft/test/emqx_ft_SUITE.erl index ae274cd86..6861038e9 100644 --- a/apps/emqx_ft/test/emqx_ft_SUITE.erl +++ b/apps/emqx_ft/test/emqx_ft_SUITE.erl @@ -270,7 +270,8 @@ t_nasty_filenames(_Config) -> Filenames = [ {<<"nasty1">>, "146%"}, {<<"nasty2">>, "🌚"}, - {<<"nasty3">>, "中文.txt"} + {<<"nasty3">>, "中文.txt"}, + {<<"nasty4">>, _254Bytes = string:join(lists:duplicate(255 div 5, "LONG"), ".")} ], ok = lists:foreach( fun({ClientId, Filename}) -> diff --git a/apps/emqx_ft/test/emqx_ft_conf_SUITE.erl b/apps/emqx_ft/test/emqx_ft_conf_SUITE.erl index 1f53f88af..f235b5ebb 100644 --- a/apps/emqx_ft/test/emqx_ft_conf_SUITE.erl +++ b/apps/emqx_ft/test/emqx_ft_conf_SUITE.erl @@ -87,7 +87,7 @@ t_update_config(_Config) -> ) ), ?assertEqual( - <<"/tmp/path">>, + "/tmp/path", emqx_config:get([file_transfer, storage, local, segments, root]) ), ?assertEqual( @@ -150,7 +150,7 @@ t_disable_restore_config(Config) -> ), ok = emqtt:stop(Client), % Restore local storage backend - Root = iolist_to_binary(emqx_ft_test_helpers:root(Config, node(), [segments])), + Root = emqx_ft_test_helpers:root(Config, node(), [segments]), ?assertMatch( {ok, _}, emqx_conf:update( @@ -177,7 +177,7 @@ t_disable_restore_config(Config) -> [ #{ ?snk_kind := garbage_collection, - storage := #{segments := #{root := Root}} + storage := #{segments := #{gc := #{interval := 1000}}} } ], ?of_kind(garbage_collection, Trace) diff --git a/apps/emqx_ft/test/emqx_ft_fs_util_tests.erl b/apps/emqx_ft/test/emqx_ft_fs_util_tests.erl index 1939e74c6..f26dea7e7 100644 --- a/apps/emqx_ft/test/emqx_ft_fs_util_tests.erl +++ b/apps/emqx_ft/test/emqx_ft_fs_util_tests.erl @@ -63,3 +63,24 @@ unescape_filename_test_() -> ?_assertEqual(Input, emqx_ft_fs_util:unescape_filename(Filename)) || {Filename, Input} <- ?NAMES ]. + +mk_temp_filename_test_() -> + [ + ?_assertMatch( + "." ++ Suffix when length(Suffix) == 16, + emqx_ft_fs_util:mk_temp_filename(<<>>) + ), + ?_assertMatch( + "file.name." ++ Suffix when length(Suffix) == 16, + emqx_ft_fs_util:mk_temp_filename("file.name") + ), + ?_assertMatch( + "safe.🦺." ++ Suffix when length(Suffix) == 16, + emqx_ft_fs_util:mk_temp_filename(<<"safe.🦺"/utf8>>) + ), + ?_assertEqual( + % FilenameSlice + Dot + Suffix + 200 + 1 + 16, + length(emqx_ft_fs_util:mk_temp_filename(lists:duplicate(63, "LONG"))) + ) + ]. From dcd59e4f1b069498dd669d9f4efbffa51a624f79 Mon Sep 17 00:00:00 2001 From: Andrew Mayorov Date: Tue, 6 Jun 2023 10:40:20 +0300 Subject: [PATCH 2/2] fix(ft): set more conservative filename length limit Otherwise, local fs exporter will have a hard time preserving the filemeta, because its filename is even 13 bytes longer. --- apps/emqx_ft/src/emqx_ft_schema.erl | 4 +++- apps/emqx_ft/src/emqx_ft_storage_exporter_fs.erl | 12 +++++++++++- apps/emqx_ft/test/emqx_ft_SUITE.erl | 4 +++- 3 files changed, 17 insertions(+), 3 deletions(-) diff --git a/apps/emqx_ft/src/emqx_ft_schema.erl b/apps/emqx_ft/src/emqx_ft_schema.erl index 2b98562b4..c1ee41d0d 100644 --- a/apps/emqx_ft/src/emqx_ft_schema.erl +++ b/apps/emqx_ft/src/emqx_ft_schema.erl @@ -42,7 +42,9 @@ %% on most filesystems. Even though, say, S3 does not have such limitations, it's %% still useful to have a limit on the filename length, to avoid having to deal with %% limits in the storage backends. --define(MAX_FILENAME_BYTELEN, 255). +%% Usual realistic limit is 255 bytes actually, but we leave some room for backends +%% to spare. +-define(MAX_FILENAME_BYTELEN, 240). -import(hoconsc, [ref/2, mk/2]). diff --git a/apps/emqx_ft/src/emqx_ft_storage_exporter_fs.erl b/apps/emqx_ft/src/emqx_ft_storage_exporter_fs.erl index ae709dd52..e211cb421 100644 --- a/apps/emqx_ft/src/emqx_ft_storage_exporter_fs.erl +++ b/apps/emqx_ft/src/emqx_ft_storage_exporter_fs.erl @@ -128,7 +128,17 @@ complete( Filemeta = FilemetaIn#{checksum => Checksum}, ok = file:close(Handle), _ = filelib:ensure_dir(ResultFilepath), - _ = file:write_file(mk_manifest_filename(ResultFilepath), encode_filemeta(Filemeta)), + ManifestFilepath = mk_manifest_filename(ResultFilepath), + case file:write_file(ManifestFilepath, encode_filemeta(Filemeta)) of + ok -> + ok; + {error, Reason} -> + ?SLOG(warning, "filemeta_write_failed", #{ + path => ManifestFilepath, + meta => Filemeta, + reason => Reason + }) + end, file:rename(Filepath, ResultFilepath). -spec discard(export_st()) -> diff --git a/apps/emqx_ft/test/emqx_ft_SUITE.erl b/apps/emqx_ft/test/emqx_ft_SUITE.erl index 6861038e9..c48c77d93 100644 --- a/apps/emqx_ft/test/emqx_ft_SUITE.erl +++ b/apps/emqx_ft/test/emqx_ft_SUITE.erl @@ -261,6 +261,7 @@ t_nasty_clientids_fileids(_Config) -> fun({ClientId, FileId}) -> ok = emqx_ft_test_helpers:upload_file(ClientId, FileId, "justfile", ClientId), [Export] = list_files(ClientId), + ?assertMatch(#{meta := #{name := "justfile"}}, Export), ?assertEqual({ok, ClientId}, read_export(Export)) end, Transfers @@ -271,13 +272,14 @@ t_nasty_filenames(_Config) -> {<<"nasty1">>, "146%"}, {<<"nasty2">>, "🌚"}, {<<"nasty3">>, "中文.txt"}, - {<<"nasty4">>, _254Bytes = string:join(lists:duplicate(255 div 5, "LONG"), ".")} + {<<"nasty4">>, _239Bytes = string:join(lists:duplicate(240 div 5, "LONG"), ".")} ], ok = lists:foreach( fun({ClientId, Filename}) -> FileId = unicode:characters_to_binary(Filename), ok = emqx_ft_test_helpers:upload_file(ClientId, FileId, Filename, FileId), [Export] = list_files(ClientId), + ?assertMatch(#{meta := #{name := Filename}}, Export), ?assertEqual({ok, FileId}, read_export(Export)) end, Filenames