File CVE-2025-66471.patch of Package python-urllib3.42446
From c19571de34c47de3a766541b041637ba5f716ed7 Mon Sep 17 00:00:00 2001
From: Illia Volochii <illia.volochii@gmail.com>
Date: Fri, 5 Dec 2025 16:40:41 +0200
Subject: [PATCH] Merge commit from fork
* Prevent decompression bomb for zstd in Python 3.14
* Add experimental `decompress_iter` for Brotli
* Update changes for Brotli
* Add `GzipDecoder.decompress_iter`
* Test https://github.com/python-hyper/brotlicffi/pull/207
* Pin Brotli
* Add `decompress_iter` to all decoders and make tests pass
* Pin brotlicffi to an official release
* Revert changes to response.py
* Add `max_length` parameter to all `decompress` methods
* Fix the `test_brotlipy` session
* Unset `_data` on gzip error
* Add a test for memory usage
* Test more methods
* Fix the test for `stream`
* Cover more lines with tests
* Add more coverage
* Make `read1` a bit more efficient
* Fix PyPy tests for Brotli
* Revert an unnecessarily moved check
* Add some comments
* Leave just one `self._obj.decompress` call in `GzipDecoder`
* Refactor test params
* Test reads with all data already in the decompressor
* Prevent needless copying of data decoded with `max_length`
* Rename the changed test
* Note that responses of unknown length should be streamed too
* Add a changelog entry
* Avoid returning a memory view from `BytesQueueBuffer`
* Add one more note to the changelog entry
---
CHANGES.rst | 22 ++++
docs/advanced-usage.rst | 3 +-
docs/user-guide.rst | 4 +-
noxfile.py | 16 ++-
pyproject.toml | 5 +-
src/urllib3/response.py | 279 ++++++++++++++++++++++++++++++++++------
test/test_response.py | 269 +++++++++++++++++++++++++++++++++++++-
uv.lock | 177 +++++++++++--------------
8 files changed, 621 insertions(+), 154 deletions(-)
Index: urllib3-2.0.7/docs/advanced-usage.rst
===================================================================
--- urllib3-2.0.7.orig/docs/advanced-usage.rst
+++ urllib3-2.0.7/docs/advanced-usage.rst
@@ -66,7 +66,8 @@ When using ``preload_content=True`` (the
response body will be read immediately into memory and the HTTP connection
will be released back into the pool without manual intervention.
-However, when dealing with large responses it's often better to stream the response
+However, when dealing with responses of large or unknown length,
+it's often better to stream the response
content using ``preload_content=False``. Setting ``preload_content`` to ``False`` means
that urllib3 will only read from the socket when data is requested.
Index: urllib3-2.0.7/docs/user-guide.rst
===================================================================
--- urllib3-2.0.7.orig/docs/user-guide.rst
+++ urllib3-2.0.7/docs/user-guide.rst
@@ -143,8 +143,8 @@ to a byte string representing the respon
print(resp.data)
# b"\xaa\xa5H?\x95\xe9\x9b\x11"
-.. note:: For larger responses, it's sometimes better to :ref:`stream <stream>`
- the response.
+.. note:: For responses of large or unknown length, it's sometimes better to
+ :ref:`stream <stream>` the response.
Using io Wrappers with Response Content
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Index: urllib3-2.0.7/pyproject.toml
===================================================================
--- urllib3-2.0.7.orig/pyproject.toml
+++ urllib3-2.0.7/pyproject.toml
@@ -40,8 +40,8 @@ dynamic = ["version"]
[project.optional-dependencies]
brotli = [
- "brotli>=1.0.9; platform_python_implementation == 'CPython'",
- "brotlicffi>=0.8.0; platform_python_implementation != 'CPython'"
+ "brotli>=1.2.0; platform_python_implementation == 'CPython'",
+ "brotlicffi>=1.2.0.0; platform_python_implementation != 'CPython'"
]
zstd = [
"zstandard>=0.18.0",
@@ -98,6 +98,7 @@ filterwarnings = [
'''default:ssl\.PROTOCOL_TLSv1_2 is deprecated:DeprecationWarning''',
'''default:unclosed .*:ResourceWarning''',
'''default:ssl NPN is deprecated, use ALPN instead:DeprecationWarning''',
+ '''default:Brotli >= 1.2.0 is required to prevent decompression bombs\.:urllib3.exceptions.DependencyWarning''',
]
[tool.isort]
Index: urllib3-2.0.7/src/urllib3/response.py
===================================================================
--- urllib3-2.0.7.orig/src/urllib3/response.py
+++ urllib3-2.0.7/src/urllib3/response.py
@@ -22,21 +22,6 @@ try:
except ImportError:
brotli = None
-try:
- import zstandard as zstd # type: ignore[import]
-
- # The package 'zstandard' added the 'eof' property starting
- # in v0.18.0 which we require to ensure a complete and
- # valid zstd stream was fed into the ZstdDecoder.
- # See: https://github.com/urllib3/urllib3/pull/2624
- _zstd_version = _zstd_version = tuple(
- map(int, re.search(r"^([0-9]+)\.([0-9]+)", zstd.__version__).groups()) # type: ignore[union-attr]
- )
- if _zstd_version < (0, 18): # Defensive:
- zstd = None
-
-except (AttributeError, ImportError, ValueError): # Defensive:
- zstd = None
from . import util
from ._base_connection import _TYPE_BODY
@@ -45,6 +30,7 @@ from .connection import BaseSSLError, HT
from .exceptions import (
BodyNotHttplibCompatible,
DecodeError,
+ DependencyWarning,
HTTPError,
IncompleteRead,
InvalidChunkLength,
@@ -66,7 +52,11 @@ log = logging.getLogger(__name__)
class ContentDecoder:
- def decompress(self, data: bytes) -> bytes:
+ def decompress(self, data: bytes, max_length: int = -1) -> bytes:
+ raise NotImplementedError()
+
+ @property
+ def has_unconsumed_tail(self) -> bool:
raise NotImplementedError()
def flush(self) -> bytes:
@@ -76,30 +66,57 @@ class ContentDecoder:
class DeflateDecoder(ContentDecoder):
def __init__(self) -> None:
self._first_try = True
- self._data = b""
+ self._first_try_data = b""
+ self._unfed_data = b""
self._obj = zlib.decompressobj()
- def decompress(self, data: bytes) -> bytes:
- if not data:
+ def decompress(self, data: bytes, max_length: int = -1) -> bytes:
+ data = self._unfed_data + data
+ self._unfed_data = b""
+ if not data and not self._obj.unconsumed_tail:
return data
+ original_max_length = max_length
+ if original_max_length < 0:
+ max_length = 0
+ elif original_max_length == 0:
+ # We should not pass 0 to the zlib decompressor because 0 is
+ # the default value that will make zlib decompress without a
+ # length limit.
+ # Data should be stored for subsequent calls.
+ self._unfed_data = data
+ return b""
+ # Subsequent calls always reuse `self._obj`. zlib requires
+ # passing the unconsumed tail if decompression is to continue.
if not self._first_try:
- return self._obj.decompress(data)
+ return self._obj.decompress(
+ self._obj.unconsumed_tail + data, max_length=max_length
+ )
- self._data += data
+ # First call tries with RFC 1950 ZLIB format.
+ self._first_try_data += data
try:
- decompressed = self._obj.decompress(data)
+ decompressed = self._obj.decompress(data, max_length=max_length)
if decompressed:
self._first_try = False
- self._data = None # type: ignore[assignment]
+ self._first_try_data = b""
return decompressed
+ # On failure, it falls back to RFC 1951 DEFLATE format.
except zlib.error:
self._first_try = False
self._obj = zlib.decompressobj(-zlib.MAX_WBITS)
try:
- return self.decompress(self._data)
+ return self.decompress(
+ self._first_try_data, max_length=original_max_length
+ )
finally:
- self._data = None # type: ignore[assignment]
+ self._first_try_data = b""
+
+ @property
+ def has_unconsumed_tail(self) -> bool:
+ return bool(self._unfed_data) or (
+ bool(self._obj.unconsumed_tail) and not self._first_try
+ )
def flush(self) -> bytes:
return self._obj.flush()
@@ -115,27 +132,61 @@ class GzipDecoder(ContentDecoder):
def __init__(self) -> None:
self._obj = zlib.decompressobj(16 + zlib.MAX_WBITS)
self._state = GzipDecoderState.FIRST_MEMBER
+ self._unconsumed_tail = b""
- def decompress(self, data: bytes) -> bytes:
+ def decompress(self, data: bytes, max_length: int = -1) -> bytes:
ret = bytearray()
- if self._state == GzipDecoderState.SWALLOW_DATA or not data:
+ if self._state == GzipDecoderState.SWALLOW_DATA:
+ return bytes(ret)
+
+ if max_length == 0:
+ # We should not pass 0 to the zlib decompressor because 0 is
+ # the default value that will make zlib decompress without a
+ # length limit.
+ # Data should be stored for subsequent calls.
+ self._unconsumed_tail += data
+ return b""
+
+ # zlib requires passing the unconsumed tail to the subsequent
+ # call if decompression is to continue.
+ data = self._unconsumed_tail + data
+ if not data and self._obj.eof:
return bytes(ret)
+
while True:
try:
- ret += self._obj.decompress(data)
+ ret += self._obj.decompress(
+ data, max_length=max(max_length - len(ret), 0)
+ )
except zlib.error:
previous_state = self._state
# Ignore data after the first error
self._state = GzipDecoderState.SWALLOW_DATA
+ self._unconsumed_tail = b""
if previous_state == GzipDecoderState.OTHER_MEMBERS:
# Allow trailing garbage acceptable in other gzip clients
return bytes(ret)
raise
- data = self._obj.unused_data
+
+ self._unconsumed_tail = data = (
+ self._obj.unconsumed_tail or self._obj.unused_data
+ )
+ if max_length > 0 and len(ret) >= max_length:
+ break
+
if not data:
return bytes(ret)
- self._state = GzipDecoderState.OTHER_MEMBERS
- self._obj = zlib.decompressobj(16 + zlib.MAX_WBITS)
+ # When the end of a gzip member is reached, a new decompressor
+ # must be created for unused (possibly future) data.
+ if self._obj.eof:
+ self._state = GzipDecoderState.OTHER_MEMBERS
+ self._obj = zlib.decompressobj(16 + zlib.MAX_WBITS)
+
+ return bytes(ret)
+
+ @property
+ def has_unconsumed_tail(self) -> bool:
+ return bool(self._unconsumed_tail)
def flush(self) -> bytes:
return self._obj.flush()
@@ -150,37 +201,40 @@ if brotli is not None:
def __init__(self) -> None:
self._obj = brotli.Decompressor()
if hasattr(self._obj, "decompress"):
- setattr(self, "decompress", self._obj.decompress)
+ setattr(self, "_decompress", self._obj.decompress)
else:
- setattr(self, "decompress", self._obj.process)
-
- def flush(self) -> bytes:
- if hasattr(self._obj, "flush"):
- return self._obj.flush() # type: ignore[no-any-return]
- return b""
-
+ setattr(self, "_decompress", self._obj.process)
-if zstd is not None:
+ # Requires Brotli >= 1.2.0 for `output_buffer_limit`.
+ def _decompress(self, data: bytes, output_buffer_limit: int = -1) -> bytes:
+ raise NotImplementedError()
- class ZstdDecoder(ContentDecoder):
- def __init__(self) -> None:
- self._obj = zstd.ZstdDecompressor().decompressobj()
+ def decompress(self, data: bytes, max_length: int = -1) -> bytes:
+ try:
+ if max_length > 0:
+ return self._decompress(data, output_buffer_limit=max_length)
+ else:
+ return self._decompress(data)
+ except TypeError:
+ # Fallback for Brotli/brotlicffi/brotlipy versions without
+ # the `output_buffer_limit` parameter.
+ warnings.warn(
+ "Brotli >= 1.2.0 is required to prevent decompression bombs.",
+ DependencyWarning,
+ )
+ return self._decompress(data)
- def decompress(self, data: bytes) -> bytes:
- if not data:
- return b""
- data_parts = [self._obj.decompress(data)]
- while self._obj.eof and self._obj.unused_data:
- unused_data = self._obj.unused_data
- self._obj = zstd.ZstdDecompressor().decompressobj()
- data_parts.append(self._obj.decompress(unused_data))
- return b"".join(data_parts)
+ @property
+ def has_unconsumed_tail(self) -> bool:
+ try:
+ return not self._obj.can_accept_more_data()
+ except AttributeError:
+ return False
def flush(self) -> bytes:
- ret = self._obj.flush() # note: this is a no-op
- if not self._obj.eof:
- raise DecodeError("Zstandard data is incomplete")
- return ret # type: ignore[no-any-return]
+ if hasattr(self._obj, "flush"):
+ return self._obj.flush() # type: ignore[no-any-return]
+ return b""
class MultiDecoder(ContentDecoder):
@@ -198,10 +252,35 @@ class MultiDecoder(ContentDecoder):
def flush(self) -> bytes:
return self._decoders[0].flush()
- def decompress(self, data: bytes) -> bytes:
- for d in reversed(self._decoders):
- data = d.decompress(data)
- return data
+ def decompress(self, data: bytes, max_length: int = -1) -> bytes:
+ if max_length <= 0:
+ for d in reversed(self._decoders):
+ data = d.decompress(data)
+ return data
+
+ ret = bytearray()
+ # Every while loop iteration goes through all decoders once.
+ # It exits when enough data is read or no more data can be read.
+ # It is possible that the while loop iteration does not produce
+ # any data because we retrieve up to `max_length` from every
+ # decoder, and the amount of bytes may be insufficient for the
+ # next decoder to produce enough/any output.
+ while True:
+ any_data = False
+ for d in reversed(self._decoders):
+ data = d.decompress(data, max_length=max_length - len(ret))
+ if data:
+ any_data = True
+ # We should not break when no data is returned because
+ # next decoders may produce data even with empty input.
+ ret += data
+ if not any_data or len(ret) >= max_length:
+ return bytes(ret)
+ data = b""
+
+ @property
+ def has_unconsumed_tail(self) -> bool:
+ return any(d.has_unconsumed_tail for d in self._decoders)
def _get_decoder(mode: str) -> ContentDecoder:
@@ -214,9 +293,6 @@ def _get_decoder(mode: str) -> ContentDe
if brotli is not None and mode == "br":
return BrotliDecoder()
- if zstd is not None and mode == "zstd":
- return ZstdDecoder()
-
return DeflateDecoder()
@@ -232,9 +308,6 @@ class BytesQueueBuffer:
* self.buffer, which contains the full data
* the largest chunk that we will copy in get()
-
- The worst case scenario is a single chunk, in which case we'll make a full copy of
- the data inside get().
"""
def __init__(self) -> None:
@@ -256,6 +329,10 @@ class BytesQueueBuffer:
elif n < 0:
raise ValueError("n should be > 0")
+ if len(self.buffer[0]) == n and isinstance(self.buffer[0], bytes):
+ self._size -= n
+ return self.buffer.popleft()
+
fetched = 0
ret = io.BytesIO()
while fetched < n:
@@ -283,17 +360,12 @@ class BaseHTTPResponse(io.IOBase):
CONTENT_DECODERS = ["gzip", "deflate"]
if brotli is not None:
CONTENT_DECODERS += ["br"]
- if zstd is not None:
- CONTENT_DECODERS += ["zstd"]
REDIRECT_STATUSES = [301, 302, 303, 307, 308]
DECODER_ERROR_CLASSES: tuple[type[Exception], ...] = (IOError, zlib.error)
if brotli is not None:
DECODER_ERROR_CLASSES += (brotli.error,)
- if zstd is not None:
- DECODER_ERROR_CLASSES += (zstd.ZstdError,)
-
def __init__(
self,
*,
@@ -427,7 +499,11 @@ class BaseHTTPResponse(io.IOBase):
self._decoder = _get_decoder(content_encoding)
def _decode(
- self, data: bytes, decode_content: bool | None, flush_decoder: bool
+ self,
+ data: bytes,
+ decode_content: bool | None,
+ flush_decoder: bool,
+ max_length: int | None = None,
) -> bytes:
"""
Decode the data passed in and potentially flush the decoder.
@@ -440,9 +516,12 @@ class BaseHTTPResponse(io.IOBase):
)
return data
+ if max_length is None or flush_decoder:
+ max_length = -1
+
try:
if self._decoder:
- data = self._decoder.decompress(data)
+ data = self._decoder.decompress(data, max_length=max_length)
self._has_decoded_content = True
except self.DECODER_ERROR_CLASSES as e:
content_encoding = self.headers.get("content-encoding", "").lower()
@@ -877,6 +956,14 @@ class HTTPResponse(BaseHTTPResponse):
if amt is not None:
cache_content = False
+ if self._decoder and self._decoder.has_unconsumed_tail:
+ decoded_data = self._decode(
+ b"",
+ decode_content,
+ flush_decoder=False,
+ max_length=amt - len(self._decoded_buffer),
+ )
+ self._decoded_buffer.put(decoded_data)
if len(self._decoded_buffer) >= amt:
return self._decoded_buffer.get(amt)
@@ -884,7 +971,11 @@ class HTTPResponse(BaseHTTPResponse):
flush_decoder = amt is None or (amt != 0 and not data)
- if not data and len(self._decoded_buffer) == 0:
+ if (
+ not data
+ and len(self._decoded_buffer) == 0
+ and not (self._decoder and self._decoder.has_unconsumed_tail)
+ ):
return data
if amt is None:
@@ -901,7 +992,12 @@ class HTTPResponse(BaseHTTPResponse):
)
return data
- decoded_data = self._decode(data, decode_content, flush_decoder)
+ decoded_data = self._decode(
+ data,
+ decode_content,
+ flush_decoder,
+ max_length=amt - len(self._decoded_buffer),
+ )
self._decoded_buffer.put(decoded_data)
while len(self._decoded_buffer) < amt and data:
@@ -909,7 +1005,12 @@ class HTTPResponse(BaseHTTPResponse):
# For example, the GZ file header takes 10 bytes, we don't want to read
# it one byte at a time
data = self._raw_read(amt)
- decoded_data = self._decode(data, decode_content, flush_decoder)
+ decoded_data = self._decode(
+ data,
+ decode_content,
+ flush_decoder,
+ max_length=amt - len(self._decoded_buffer),
+ )
self._decoded_buffer.put(decoded_data)
data = self._decoded_buffer.get(amt)
@@ -936,7 +1037,11 @@ class HTTPResponse(BaseHTTPResponse):
if self.chunked and self.supports_chunked_reads():
yield from self.read_chunked(amt, decode_content=decode_content)
else:
- while not is_fp_closed(self._fp) or len(self._decoded_buffer) > 0:
+ while (
+ not is_fp_closed(self._fp)
+ or len(self._decoded_buffer) > 0
+ or (self._decoder and self._decoder.has_unconsumed_tail)
+ ):
data = self.read(amt=amt, decode_content=decode_content)
if data:
@@ -1079,7 +1184,10 @@ class HTTPResponse(BaseHTTPResponse):
break
chunk = self._handle_chunk(amt)
decoded = self._decode(
- chunk, decode_content=decode_content, flush_decoder=False
+ chunk,
+ decode_content=decode_content,
+ flush_decoder=False,
+ max_length=amt,
)
if decoded:
yield decoded
Index: urllib3-2.0.7/test/test_response.py
===================================================================
--- urllib3-2.0.7.orig/test/test_response.py
+++ urllib3-2.0.7/test/test_response.py
@@ -1,6 +1,7 @@
from __future__ import annotations
import contextlib
+import gzip
import http.client as httplib
import socket
import ssl
@@ -31,12 +32,26 @@ from urllib3.response import ( # type:
BytesQueueBuffer,
HTTPResponse,
brotli,
- zstd,
)
from urllib3.util.response import is_fp_closed
from urllib3.util.retry import RequestHistory, Retry
+def deflate2_compress(data: bytes) -> bytes:
+ compressor = zlib.compressobj(6, zlib.DEFLATED, -zlib.MAX_WBITS)
+ return compressor.compress(data) + compressor.flush()
+
+
+if brotli:
+ try:
+ brotli.Decompressor().process(b"", output_buffer_limit=1024)
+ _brotli_gte_1_2_0_available = True
+ except (AttributeError, TypeError):
+ _brotli_gte_1_2_0_available = False
+else:
+ _brotli_gte_1_2_0_available = False
+
+
class TestBytesQueueBuffer:
def test_single_chunk(self) -> None:
buffer = BytesQueueBuffer()
@@ -87,6 +102,16 @@ class TestBytesQueueBuffer:
assert len(buffer.get(10 * 2**20)) == 10 * 2**20
+ @pytest.mark.limit_memory("10.01 MB", current_thread_only=True)
+ def test_memory_usage_single_chunk(
+ self
+ ) -> None:
+ get_func = lambda b: b.get(len(b))
+ buffer = BytesQueueBuffer()
+ chunk = bytes(10 * 2**20) # 10 MiB
+ buffer.put(chunk)
+ assert get_func(buffer) is chunk
+
# A known random (i.e, not-too-compressible) payload generated with:
# "".join(random.choice(string.printable) for i in range(512))
@@ -324,66 +349,159 @@ class TestResponse:
with pytest.raises(DecodeError):
HTTPResponse(fp, headers={"content-encoding": "br"})
- @onlyZstd()
- def test_decode_zstd(self) -> None:
- data = zstd.compress(b"foo")
-
- fp = BytesIO(data)
- r = HTTPResponse(fp, headers={"content-encoding": "zstd"})
- assert r.data == b"foo"
-
- @onlyZstd()
- def test_decode_multiframe_zstd(self) -> None:
- data = (
- # Zstandard frame
- zstd.compress(b"foo")
- # skippable frame (must be ignored)
- + bytes.fromhex(
- "50 2A 4D 18" # Magic_Number (little-endian)
- "07 00 00 00" # Frame_Size (little-endian)
- "00 00 00 00 00 00 00" # User_Data
- )
- # Zstandard frame
- + zstd.compress(b"bar")
+ _test_compressor_params: list[
+ tuple[str, tuple[str, typing.Callable[[bytes], bytes]] | None]
+ ] = [
+ ("deflate1", ("deflate", zlib.compress)),
+ ("deflate2", ("deflate", deflate2_compress)),
+ ("gzip", ("gzip", gzip.compress)),
+ ]
+ if _brotli_gte_1_2_0_available:
+ _test_compressor_params.append(("brotli", ("br", brotli.compress)))
+ else:
+ _test_compressor_params.append(("brotli", None))
+
+ @pytest.mark.parametrize(
+ "data",
+ [d[1] for d in _test_compressor_params],
+ ids=[d[0] for d in _test_compressor_params],
+ )
+ def test_read_with_all_data_already_in_decompressor(
+ self,
+ request: pytest.FixtureRequest,
+ data: tuple[str, typing.Callable[[bytes], bytes]] | None,
+ ) -> None:
+ if data is None:
+ pytest.skip(f"Proper {request.node.callspec.id} decoder is not available")
+ original_data = b"bar" * 1000
+ name, compress_func = data
+ compressed_data = compress_func(original_data)
+ fp = mock.Mock(read=mock.Mock(return_value=b""))
+ r = HTTPResponse(fp, headers={"content-encoding": name}, preload_content=False)
+ # Put all data in the decompressor's buffer.
+ r._init_decoder()
+ assert r._decoder is not None # for mypy
+ decoded = r._decoder.decompress(compressed_data, max_length=0)
+ if name == "br":
+ # It's known that some Brotli libraries do not respect
+ # `max_length`.
+ r._decoded_buffer.put(decoded)
+ else:
+ assert decoded == b""
+ # Read the data via `HTTPResponse`.
+ read = getattr(r, "read")
+ assert read(0) == b""
+ assert read(2500) == original_data[:2500]
+ assert read(500) == original_data[2500:]
+ assert read(0) == b""
+ assert read() == b""
+
+ @pytest.mark.parametrize(
+ "delta",
+ (
+ 0, # First read from socket returns all compressed data.
+ -1, # First read from socket returns all but one byte of compressed data.
+ ),
+ )
+ @pytest.mark.parametrize(
+ "data",
+ [d[1] for d in _test_compressor_params],
+ ids=[d[0] for d in _test_compressor_params],
+ )
+ def test_decode_with_max_length_close_to_compressed_data_size(
+ self,
+ request: pytest.FixtureRequest,
+ delta: int,
+ data: tuple[str, typing.Callable[[bytes], bytes]] | None,
+ ) -> None:
+ """
+ Test decoding when the first read from the socket returns all or
+ almost all the compressed data, but then it has to be
+ decompressed in a couple of read calls.
+ """
+ if data is None:
+ pytest.skip(f"Proper {request.node.callspec.id} decoder is not available")
+
+ original_data = b"foo" * 1000
+ name, compress_func = data
+ compressed_data = compress_func(original_data)
+ fp = BytesIO(compressed_data)
+ r = HTTPResponse(fp, headers={"content-encoding": name}, preload_content=False)
+ initial_limit = len(compressed_data) + delta
+ read = getattr(r, "read")
+ initial_chunk = read(amt=initial_limit, decode_content=True)
+ assert len(initial_chunk) == initial_limit
+ assert (
+ len(read(amt=len(original_data), decode_content=True))
+ == len(original_data) - initial_limit
)
- fp = BytesIO(data)
- r = HTTPResponse(fp, headers={"content-encoding": "zstd"})
- assert r.data == b"foobar"
-
- @onlyZstd()
- def test_chunked_decoding_zstd(self) -> None:
- data = zstd.compress(b"foobarbaz")
-
- fp = BytesIO(data)
- r = HTTPResponse(
- fp, headers={"content-encoding": "zstd"}, preload_content=False
+ # Prepare 50 MB of compressed data outside of the test measuring
+ # memory usage.
+ _test_memory_usage_decode_with_max_length_params: list[
+ tuple[str, tuple[str, bytes] | None]
+ ] = [
+ (
+ params[0],
+ (params[1][0], params[1][1](b"A" * (50 * 2**20))) if params[1] else None,
)
+ for params in _test_compressor_params
+ ]
- ret = b""
-
- for _ in range(100):
- ret += r.read(1)
- if r.closed:
- break
- assert ret == b"foobarbaz"
-
- @onlyZstd()
- @pytest.mark.parametrize("data", [b"foo", b"x" * 100])
- def test_decode_zstd_error(self, data: bytes) -> None:
- fp = BytesIO(data)
-
- with pytest.raises(DecodeError):
- HTTPResponse(fp, headers={"content-encoding": "zstd"})
-
- @onlyZstd()
- @pytest.mark.parametrize("data", [b"foo", b"x" * 100])
- def test_decode_zstd_incomplete(self, data: bytes) -> None:
- data = zstd.compress(data)
- fp = BytesIO(data[:-1])
+ @pytest.mark.parametrize(
+ "data",
+ [d[1] for d in _test_memory_usage_decode_with_max_length_params],
+ ids=[d[0] for d in _test_memory_usage_decode_with_max_length_params],
+ )
+ @pytest.mark.parametrize("read_method", ("read", "read_chunked", "stream"))
+ # Decoders consume different amounts of memory during decompression.
+ # We set the 10 MB limit to ensure that the whole decompressed data
+ # is not stored unnecessarily.
+ #
+ # FYI, the following consumption was observed for the test with
+ # `read` on CPython 3.14.0:
+ # - deflate: 2.3 MiB
+ # - deflate2: 2.1 MiB
+ # - gzip: 2.1 MiB
+ # - brotli:
+ # - brotli v1.2.0: 9 MiB
+ # - brotlicffi v1.2.0.0: 6 MiB
+ # - brotlipy v0.7.0: 105.8 MiB
+ @pytest.mark.limit_memory("10 MB", current_thread_only=True)
+ def test_memory_usage_decode_with_max_length(
+ self,
+ request: pytest.FixtureRequest,
+ read_method: str,
+ data: tuple[str, bytes] | None,
+ ) -> None:
+ if data is None:
+ pytest.skip(f"Proper {request.node.callspec.id} decoder is not available")
+
+ name, compressed_data = data
+ limit = 1024 * 1024 # 1 MiB
+ if read_method in ("read_chunked", "stream"):
+ httplib_r = httplib.HTTPResponse(MockSock) # type: ignore[arg-type]
+ httplib_r.fp = MockChunkedEncodingResponse([compressed_data]) # type: ignore[assignment]
+ r = HTTPResponse(
+ httplib_r,
+ preload_content=False,
+ headers={"transfer-encoding": "chunked", "content-encoding": name},
+ )
+ next(getattr(r, read_method)(amt=limit, decode_content=True))
+ else:
+ fp = BytesIO(compressed_data)
+ r = HTTPResponse(
+ fp, headers={"content-encoding": name}, preload_content=False
+ )
+ getattr(r, read_method)(amt=limit, decode_content=True)
- with pytest.raises(DecodeError):
- HTTPResponse(fp, headers={"content-encoding": "zstd"})
+ # Check that the internal decoded buffer is empty unless brotli
+ # is used.
+ # Google's brotli library does not fully respect the output
+ # buffer limit: https://github.com/google/brotli/issues/1396
+ # And unmaintained brotlipy cannot limit the output buffer size.
+ if name != "br" or brotli.__name__ == "brotlicffi":
+ assert len(r._decoded_buffer) == 0
def test_multi_decoding_deflate_deflate(self) -> None:
data = zlib.compress(zlib.compress(b"foo"))