File 4031-Add-max_size-N-extraction-option-to-erl_tar-for-zip-.patch of Package erlang

From 6dfc396de1c8ea581c9b736fc805d3eaf4b3bd2c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Eric=20Meadows-J=C3=B6nsson?=
 <eric.meadows.jonsson@gmail.com>
Date: Mon, 9 Mar 2026 19:42:03 +0100
Subject: [PATCH] Add {max_size, N} extraction option to erl_tar for zip bomb
 protection

Two possible vulnerabilities exist for users of erl_tar:
binary decompression in open1 decompresses
the entire binary into memory at once with no limit, and the extraction
loop has no cumulative size tracking. The new max_size option addresses
both by stream-inflating compressed binaries with a size limit and
tracking cumulative extracted file sizes during the extraction loop.
---
 lib/stdlib/src/erl_tar.erl    | 85 ++++++++++++++++++++++++++++++++++-
 lib/stdlib/src/erl_tar.hrl    |  4 +-
 lib/stdlib/test/tar_SUITE.erl | 74 +++++++++++++++++++++++++++++-
 3 files changed, 158 insertions(+), 5 deletions(-)

diff --git a/lib/stdlib/src/erl_tar.erl b/lib/stdlib/src/erl_tar.erl
index 58f498463b..0552b289ad 100644
--- a/lib/stdlib/src/erl_tar.erl
+++ b/lib/stdlib/src/erl_tar.erl
@@ -144,6 +144,8 @@ format_error({invalid_gnu_0_1_sparsemap, Format}) ->
     lists:flatten(io_lib:format("Invalid GNU sparse map (version ~s)", [Format]));
 format_error(unsafe_path) ->
     "The path points above the current working directory";
+format_error(too_big) ->
+    "Extraction size exceeds the configured max_size limit";
 format_error({Name,Reason}) ->
     lists:flatten(io_lib:format("~ts: ~ts", [Name,format_error(Reason)]));
 format_error(Atom) when is_atom(Atom) ->
@@ -314,6 +316,12 @@ The following options modify the defaults for the extraction as follows:
 - **`{chunks,ChunkSize}`** - Sets the chunk size, in bytes, for writing extracted
   file data to disk. Defaults to 65536 bytes.
 
+- **`{max_size,Size}`** - Sets a limit on the total size of extracted data. If
+  the cumulative size of all extracted files exceeds `Size` bytes, extraction
+  fails with `{error, too_big}`. When extracting a compressed binary archive,
+  the decompressed binary is also subject to this limit. Defaults to `infinity`
+  (no limit).
+
 > #### Warning {: .warning }
 >
 > The `compressed` and `cooked` flags are invalid when passing a file descriptor
@@ -335,9 +343,80 @@ extract(Name, Opts) when is_list(Name); is_binary(Name), is_list(Opts) ->
 
 do_extract(Handle, Opts) when is_list(Opts) ->
     Opts2 = extract_opts(Opts),
-    Acc = if Opts2#read_opts.output =:= memory -> []; true -> ok end,
-    foldl_read(Handle, fun extract1/4, Acc, Opts2).
+    case maybe_inflate_with_limit(Handle, Opts2) of
+        {error, _} = Err ->
+            Err;
+        {ok, Handle2, Opts3} ->
+            Acc0 = if Opts3#read_opts.output =:= memory -> []; true -> ok end,
+            Acc = case Opts3#read_opts.max_size of
+                      infinity -> Acc0;
+                      _ -> {size_tracked, 0, Acc0}
+                  end,
+            foldl_read(Handle2, fun extract1/4, Acc, Opts3)
+    end.
+
+maybe_inflate_with_limit({binary, Bin}, #read_opts{max_size=MaxSize}=Opts)
+  when is_integer(MaxSize), is_binary(Bin) ->
+    case lists:member(compressed, Opts#read_opts.open_mode) of
+        true ->
+            case inflate_with_limit(Bin, MaxSize) of
+                {ok, Inflated} ->
+                    OpenMode = Opts#read_opts.open_mode -- [compressed],
+                    {ok, {binary, Inflated}, Opts#read_opts{open_mode=OpenMode}};
+                {error, too_big} ->
+                    {error, too_big}
+            end;
+        false ->
+            {ok, {binary, Bin}, Opts}
+    end;
+maybe_inflate_with_limit(Handle, Opts) ->
+    {ok, Handle, Opts}.
+
+inflate_with_limit(Bin, MaxSize) ->
+    Z = zlib:open(),
+    try
+        zlib:inflateInit(Z, 31, cut),
+        inflate_with_limit_loop(Z, Bin, MaxSize, 0, [])
+    catch
+        _:_ -> {ok, Bin}
+    after
+        zlib:close(Z)
+    end.
 
+inflate_with_limit_loop(Z, Bin, MaxSize, Total, Acc) ->
+    case zlib:safeInflate(Z, Bin) of
+        {finished, Chunks} ->
+            Size = iolist_size(Chunks),
+            NewTotal = Total + Size,
+            if NewTotal > MaxSize -> {error, too_big};
+               true -> {ok, iolist_to_binary(lists:reverse(Acc, Chunks))}
+            end;
+        {continue, Chunks} ->
+            Size = iolist_size(Chunks),
+            NewTotal = Total + Size,
+            if NewTotal > MaxSize -> {error, too_big};
+               true -> inflate_with_limit_loop(Z, <<>>, MaxSize, NewTotal, [Chunks|Acc])
+            end
+    end.
+
+extract1(eof, Reader, _, {size_tracked, _, Acc}) when is_list(Acc) ->
+    {ok, {ok, lists:reverse(Acc)}, Reader};
+extract1(eof, Reader, _, {size_tracked, _, leading_slash}) ->
+    error_logger:info_msg("erl_tar: removed leading '/' from member names\n"),
+    {ok, ok, Reader};
+extract1(eof, Reader, _, {size_tracked, _, Acc}) ->
+    {ok, Acc, Reader};
+extract1(#tar_header{size=Size}=Header, Reader0, Opts,
+         {size_tracked, Total, InnerAcc}) ->
+    NewTotal = Total + Size,
+    case NewTotal > Opts#read_opts.max_size of
+        true -> throw({error, too_big});
+        false -> ok
+    end,
+    case extract1(Header, Reader0, Opts, InnerAcc) of
+        {ok, NewInnerAcc, Reader1} ->
+            {ok, {size_tracked, NewTotal, NewInnerAcc}, Reader1}
+    end;
 extract1(eof, Reader, _, Acc) when is_list(Acc) ->
     {ok, {ok, lists:reverse(Acc)}, Reader};
 extract1(eof, Reader, _, leading_slash) ->
@@ -2393,6 +2472,8 @@ extract_opts([verbose|Rest], Opts) ->
     extract_opts(Rest, Opts#read_opts{verbose=true});
 extract_opts([{chunks,N}|Rest], Opts) ->
     extract_opts(Rest, Opts#read_opts{chunk_size=N});
+extract_opts([{max_size,N}|Rest], Opts) ->
+    extract_opts(Rest, Opts#read_opts{max_size=N});
 extract_opts([Other|Rest], Opts) ->
     extract_opts(Rest, read_opts([Other], Opts));
 extract_opts([], Opts) ->
diff --git a/lib/stdlib/src/erl_tar.hrl b/lib/stdlib/src/erl_tar.hrl
index b4282c27ac..375ac4bcf8 100644
--- a/lib/stdlib/src/erl_tar.hrl
+++ b/lib/stdlib/src/erl_tar.hrl
@@ -40,7 +40,8 @@
           output = file :: 'file' | 'memory',
           open_mode = [],                      %% Open mode options.
           verbose = false :: boolean(),        %% Verbose on/off.
-          chunk_size = 65536}).                 %% Chunk size for streaming to disk.
+          chunk_size = 65536,                  %% Chunk size for streaming to disk.
+          max_size = infinity :: pos_integer() | 'infinity'}).
 -type read_opts() :: #read_opts{}.
 
 -type add_opt() :: dereference |
@@ -58,6 +59,7 @@
 -type extract_opt() :: {cwd, string()} |
                        {files, [name_in_archive()]} |
                        {chunks, pos_integer()} |
+                       {max_size, pos_integer() | infinity} |
                        compressed |
                        cooked |
                        memory |
diff --git a/lib/stdlib/test/tar_SUITE.erl b/lib/stdlib/test/tar_SUITE.erl
index b535773274..0b5f6be8bb 100644
--- a/lib/stdlib/test/tar_SUITE.erl
+++ b/lib/stdlib/test/tar_SUITE.erl
@@ -32,7 +32,8 @@
          sparse/1, init/1, leading_slash/1, dotdot/1,
          roundtrip_metadata/1, apply_file_info_opts/1,
          incompatible_options/1, table_absolute_names/1,
-         streamed_extract/1, symlink_parent_dir/1]).
+         streamed_extract/1, symlink_parent_dir/1,
+         streamed_extract/1, max_size/1]).
 
 -include_lib("common_test/include/ct.hrl").
 -include_lib("kernel/include/file.hrl").
@@ -48,7 +49,8 @@ all() ->
      read_other_implementations, bsdtgz,
      sparse,init,leading_slash,dotdot,roundtrip_metadata,
      apply_file_info_opts,incompatible_options, table_absolute_names,
-     streamed_extract, symlink_parent_dir].
+     streamed_extract, symlink_parent_dir,
+     max_size].
 
 groups() -> 
     [].
@@ -1242,6 +1244,74 @@ streamed_extract(Config) ->
 
     ok.
 
+max_size(Config) when is_list(Config) ->
+    PrivDir = proplists:get_value(priv_dir, Config),
+    Dir = filename:join(PrivDir, "max_size"),
+    ok = file:make_dir(Dir),
+
+    Data1 = crypto:strong_rand_bytes(1000),
+    Data2 = crypto:strong_rand_bytes(2000),
+    FileBins = [{"file1", Data1}, {"file2", Data2}],
+    TotalSize = byte_size(Data1) + byte_size(Data2),
+
+    TarName = filename:join(Dir, "test.tar"),
+    ok = erl_tar:create(TarName, FileBins),
+
+    %% Memory extraction: limit smaller than total should fail.
+    {error, too_big} = erl_tar:extract(TarName,
+                                       [memory, {max_size, TotalSize - 1}]),
+
+    %% Memory extraction: limit equal to total should succeed.
+    {ok, _} = erl_tar:extract(TarName, [memory, {max_size, TotalSize}]),
+
+    %% Memory extraction: infinity (default) should succeed.
+    {ok, _} = erl_tar:extract(TarName, [memory]),
+
+    %% Disk extraction: limit smaller than total should fail.
+    ExtractDir1 = filename:join(Dir, "extract1"),
+    ok = file:make_dir(ExtractDir1),
+    {error, too_big} = erl_tar:extract(TarName,
+                                       [{cwd, ExtractDir1},
+                                        {max_size, TotalSize - 1}]),
+
+    %% Disk extraction: limit equal to total should succeed.
+    ExtractDir2 = filename:join(Dir, "extract2"),
+    ok = file:make_dir(ExtractDir2),
+    ok = erl_tar:extract(TarName, [{cwd, ExtractDir2},
+                                   {max_size, TotalSize}]),
+
+    %% Binary extraction: limit should work.
+    {ok, TarBin} = file:read_file(TarName),
+    {error, too_big} = erl_tar:extract({binary, TarBin},
+                                       [memory, {max_size, TotalSize - 1}]),
+    {ok, _} = erl_tar:extract({binary, TarBin},
+                              [memory, {max_size, TotalSize}]),
+
+    %% Compressed binary: limit should apply to decompressed data.
+    %% The decompressed tar includes headers and padding so it's larger
+    %% than just the file content. A very small limit should still trigger
+    %% too_big during decompression.
+    GzTarName = filename:join(Dir, "test.tar.gz"),
+    ok = erl_tar:create(GzTarName, FileBins, [compressed]),
+    {ok, GzBin} = file:read_file(GzTarName),
+    {error, too_big} = erl_tar:extract({binary, GzBin},
+                                       [memory, compressed,
+                                        {max_size, 1}]),
+    %% A large enough limit should succeed (tar overhead is headers + padding).
+    {ok, _} = erl_tar:extract({binary, GzBin},
+                              [memory, compressed, {max_size, 10 * TotalSize}]),
+
+    %% File path extraction with max_size.
+    {error, too_big} = erl_tar:extract(TarName,
+                                       [memory, {max_size, 1}]),
+    {ok, _} = erl_tar:extract(TarName,
+                              [memory, {max_size, TotalSize}]),
+
+    %% Clean up.
+    ok = delete_files([Dir]),
+
+    verify_ports(Config).
+
 %% Delete the given list of files.
 delete_files([]) -> ok;
 delete_files([Item|Rest]) ->
-- 
2.51.0

openSUSE Build Service is sponsored by