File 4031-Add-max_size-N-extraction-option-to-erl_tar-for-zip-.patch of Package erlang
From 6dfc396de1c8ea581c9b736fc805d3eaf4b3bd2c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Eric=20Meadows-J=C3=B6nsson?=
<eric.meadows.jonsson@gmail.com>
Date: Mon, 9 Mar 2026 19:42:03 +0100
Subject: [PATCH] Add {max_size, N} extraction option to erl_tar for zip bomb
protection
Two possible vulnerabilities exist for users of erl_tar:
binary decompression in open1 decompresses
the entire binary into memory at once with no limit, and the extraction
loop has no cumulative size tracking. The new max_size option addresses
both by stream-inflating compressed binaries with a size limit and
tracking cumulative extracted file sizes during the extraction loop.
---
lib/stdlib/src/erl_tar.erl | 85 ++++++++++++++++++++++++++++++++++-
lib/stdlib/src/erl_tar.hrl | 4 +-
lib/stdlib/test/tar_SUITE.erl | 74 +++++++++++++++++++++++++++++-
3 files changed, 158 insertions(+), 5 deletions(-)
diff --git a/lib/stdlib/src/erl_tar.erl b/lib/stdlib/src/erl_tar.erl
index 58f498463b..0552b289ad 100644
--- a/lib/stdlib/src/erl_tar.erl
+++ b/lib/stdlib/src/erl_tar.erl
@@ -144,6 +144,8 @@ format_error({invalid_gnu_0_1_sparsemap, Format}) ->
lists:flatten(io_lib:format("Invalid GNU sparse map (version ~s)", [Format]));
format_error(unsafe_path) ->
"The path points above the current working directory";
+format_error(too_big) ->
+ "Extraction size exceeds the configured max_size limit";
format_error({Name,Reason}) ->
lists:flatten(io_lib:format("~ts: ~ts", [Name,format_error(Reason)]));
format_error(Atom) when is_atom(Atom) ->
@@ -314,6 +316,12 @@ The following options modify the defaults for the extraction as follows:
- **`{chunks,ChunkSize}`** - Sets the chunk size, in bytes, for writing extracted
file data to disk. Defaults to 65536 bytes.
+- **`{max_size,Size}`** - Sets a limit on the total size of extracted data. If
+ the cumulative size of all extracted files exceeds `Size` bytes, extraction
+ fails with `{error, too_big}`. When extracting a compressed binary archive,
+ the decompressed binary is also subject to this limit. Defaults to `infinity`
+ (no limit).
+
> #### Warning {: .warning }
>
> The `compressed` and `cooked` flags are invalid when passing a file descriptor
@@ -335,9 +343,80 @@ extract(Name, Opts) when is_list(Name); is_binary(Name), is_list(Opts) ->
do_extract(Handle, Opts) when is_list(Opts) ->
Opts2 = extract_opts(Opts),
- Acc = if Opts2#read_opts.output =:= memory -> []; true -> ok end,
- foldl_read(Handle, fun extract1/4, Acc, Opts2).
+ case maybe_inflate_with_limit(Handle, Opts2) of
+ {error, _} = Err ->
+ Err;
+ {ok, Handle2, Opts3} ->
+ Acc0 = if Opts3#read_opts.output =:= memory -> []; true -> ok end,
+ Acc = case Opts3#read_opts.max_size of
+ infinity -> Acc0;
+ _ -> {size_tracked, 0, Acc0}
+ end,
+ foldl_read(Handle2, fun extract1/4, Acc, Opts3)
+ end.
+
+maybe_inflate_with_limit({binary, Bin}, #read_opts{max_size=MaxSize}=Opts)
+ when is_integer(MaxSize), is_binary(Bin) ->
+ case lists:member(compressed, Opts#read_opts.open_mode) of
+ true ->
+ case inflate_with_limit(Bin, MaxSize) of
+ {ok, Inflated} ->
+ OpenMode = Opts#read_opts.open_mode -- [compressed],
+ {ok, {binary, Inflated}, Opts#read_opts{open_mode=OpenMode}};
+ {error, too_big} ->
+ {error, too_big}
+ end;
+ false ->
+ {ok, {binary, Bin}, Opts}
+ end;
+maybe_inflate_with_limit(Handle, Opts) ->
+ {ok, Handle, Opts}.
+
+inflate_with_limit(Bin, MaxSize) ->
+ Z = zlib:open(),
+ try
+ zlib:inflateInit(Z, 31, cut),
+ inflate_with_limit_loop(Z, Bin, MaxSize, 0, [])
+ catch
+ _:_ -> {ok, Bin}
+ after
+ zlib:close(Z)
+ end.
+inflate_with_limit_loop(Z, Bin, MaxSize, Total, Acc) ->
+ case zlib:safeInflate(Z, Bin) of
+ {finished, Chunks} ->
+ Size = iolist_size(Chunks),
+ NewTotal = Total + Size,
+ if NewTotal > MaxSize -> {error, too_big};
+ true -> {ok, iolist_to_binary(lists:reverse(Acc, Chunks))}
+ end;
+ {continue, Chunks} ->
+ Size = iolist_size(Chunks),
+ NewTotal = Total + Size,
+ if NewTotal > MaxSize -> {error, too_big};
+ true -> inflate_with_limit_loop(Z, <<>>, MaxSize, NewTotal, [Chunks|Acc])
+ end
+ end.
+
+extract1(eof, Reader, _, {size_tracked, _, Acc}) when is_list(Acc) ->
+ {ok, {ok, lists:reverse(Acc)}, Reader};
+extract1(eof, Reader, _, {size_tracked, _, leading_slash}) ->
+ error_logger:info_msg("erl_tar: removed leading '/' from member names\n"),
+ {ok, ok, Reader};
+extract1(eof, Reader, _, {size_tracked, _, Acc}) ->
+ {ok, Acc, Reader};
+extract1(#tar_header{size=Size}=Header, Reader0, Opts,
+ {size_tracked, Total, InnerAcc}) ->
+ NewTotal = Total + Size,
+ case NewTotal > Opts#read_opts.max_size of
+ true -> throw({error, too_big});
+ false -> ok
+ end,
+ case extract1(Header, Reader0, Opts, InnerAcc) of
+ {ok, NewInnerAcc, Reader1} ->
+ {ok, {size_tracked, NewTotal, NewInnerAcc}, Reader1}
+ end;
extract1(eof, Reader, _, Acc) when is_list(Acc) ->
{ok, {ok, lists:reverse(Acc)}, Reader};
extract1(eof, Reader, _, leading_slash) ->
@@ -2393,6 +2472,8 @@ extract_opts([verbose|Rest], Opts) ->
extract_opts(Rest, Opts#read_opts{verbose=true});
extract_opts([{chunks,N}|Rest], Opts) ->
extract_opts(Rest, Opts#read_opts{chunk_size=N});
+extract_opts([{max_size,N}|Rest], Opts) ->
+ extract_opts(Rest, Opts#read_opts{max_size=N});
extract_opts([Other|Rest], Opts) ->
extract_opts(Rest, read_opts([Other], Opts));
extract_opts([], Opts) ->
diff --git a/lib/stdlib/src/erl_tar.hrl b/lib/stdlib/src/erl_tar.hrl
index b4282c27ac..375ac4bcf8 100644
--- a/lib/stdlib/src/erl_tar.hrl
+++ b/lib/stdlib/src/erl_tar.hrl
@@ -40,7 +40,8 @@
output = file :: 'file' | 'memory',
open_mode = [], %% Open mode options.
verbose = false :: boolean(), %% Verbose on/off.
- chunk_size = 65536}). %% Chunk size for streaming to disk.
+ chunk_size = 65536, %% Chunk size for streaming to disk.
+ max_size = infinity :: pos_integer() | 'infinity'}).
-type read_opts() :: #read_opts{}.
-type add_opt() :: dereference |
@@ -58,6 +59,7 @@
-type extract_opt() :: {cwd, string()} |
{files, [name_in_archive()]} |
{chunks, pos_integer()} |
+ {max_size, pos_integer() | infinity} |
compressed |
cooked |
memory |
diff --git a/lib/stdlib/test/tar_SUITE.erl b/lib/stdlib/test/tar_SUITE.erl
index b535773274..0b5f6be8bb 100644
--- a/lib/stdlib/test/tar_SUITE.erl
+++ b/lib/stdlib/test/tar_SUITE.erl
@@ -32,7 +32,8 @@
sparse/1, init/1, leading_slash/1, dotdot/1,
roundtrip_metadata/1, apply_file_info_opts/1,
incompatible_options/1, table_absolute_names/1,
- streamed_extract/1, symlink_parent_dir/1]).
+ streamed_extract/1, symlink_parent_dir/1,
+ streamed_extract/1, max_size/1]).
-include_lib("common_test/include/ct.hrl").
-include_lib("kernel/include/file.hrl").
@@ -48,7 +49,8 @@ all() ->
read_other_implementations, bsdtgz,
sparse,init,leading_slash,dotdot,roundtrip_metadata,
apply_file_info_opts,incompatible_options, table_absolute_names,
- streamed_extract, symlink_parent_dir].
+ streamed_extract, symlink_parent_dir,
+ max_size].
groups() ->
[].
@@ -1242,6 +1244,74 @@ streamed_extract(Config) ->
ok.
+max_size(Config) when is_list(Config) ->
+ PrivDir = proplists:get_value(priv_dir, Config),
+ Dir = filename:join(PrivDir, "max_size"),
+ ok = file:make_dir(Dir),
+
+ Data1 = crypto:strong_rand_bytes(1000),
+ Data2 = crypto:strong_rand_bytes(2000),
+ FileBins = [{"file1", Data1}, {"file2", Data2}],
+ TotalSize = byte_size(Data1) + byte_size(Data2),
+
+ TarName = filename:join(Dir, "test.tar"),
+ ok = erl_tar:create(TarName, FileBins),
+
+ %% Memory extraction: limit smaller than total should fail.
+ {error, too_big} = erl_tar:extract(TarName,
+ [memory, {max_size, TotalSize - 1}]),
+
+ %% Memory extraction: limit equal to total should succeed.
+ {ok, _} = erl_tar:extract(TarName, [memory, {max_size, TotalSize}]),
+
+ %% Memory extraction: infinity (default) should succeed.
+ {ok, _} = erl_tar:extract(TarName, [memory]),
+
+ %% Disk extraction: limit smaller than total should fail.
+ ExtractDir1 = filename:join(Dir, "extract1"),
+ ok = file:make_dir(ExtractDir1),
+ {error, too_big} = erl_tar:extract(TarName,
+ [{cwd, ExtractDir1},
+ {max_size, TotalSize - 1}]),
+
+ %% Disk extraction: limit equal to total should succeed.
+ ExtractDir2 = filename:join(Dir, "extract2"),
+ ok = file:make_dir(ExtractDir2),
+ ok = erl_tar:extract(TarName, [{cwd, ExtractDir2},
+ {max_size, TotalSize}]),
+
+ %% Binary extraction: limit should work.
+ {ok, TarBin} = file:read_file(TarName),
+ {error, too_big} = erl_tar:extract({binary, TarBin},
+ [memory, {max_size, TotalSize - 1}]),
+ {ok, _} = erl_tar:extract({binary, TarBin},
+ [memory, {max_size, TotalSize}]),
+
+ %% Compressed binary: limit should apply to decompressed data.
+ %% The decompressed tar includes headers and padding so it's larger
+ %% than just the file content. A very small limit should still trigger
+ %% too_big during decompression.
+ GzTarName = filename:join(Dir, "test.tar.gz"),
+ ok = erl_tar:create(GzTarName, FileBins, [compressed]),
+ {ok, GzBin} = file:read_file(GzTarName),
+ {error, too_big} = erl_tar:extract({binary, GzBin},
+ [memory, compressed,
+ {max_size, 1}]),
+ %% A large enough limit should succeed (tar overhead is headers + padding).
+ {ok, _} = erl_tar:extract({binary, GzBin},
+ [memory, compressed, {max_size, 10 * TotalSize}]),
+
+ %% File path extraction with max_size.
+ {error, too_big} = erl_tar:extract(TarName,
+ [memory, {max_size, 1}]),
+ {ok, _} = erl_tar:extract(TarName,
+ [memory, {max_size, TotalSize}]),
+
+ %% Clean up.
+ ok = delete_files([Dir]),
+
+ verify_ports(Config).
+
%% Delete the given list of files.
delete_files([]) -> ok;
delete_files([Item|Rest]) ->
--
2.51.0