File CVE-2026-33123.patch of Package python-PyPDF2
From 0b5d05de59a055c132b435ee2375bc32ff04d48e Mon Sep 17 00:00:00 2001
From: Stefan <96178532+stefan6419846@users.noreply.github.com>
Date: Tue, 17 Mar 2026 11:38:28 +0100
Subject: [PATCH] SEC: Improve performance and limit length of array-based
content streams (#3686)
---
docs/user/security.md | 3 ++
PyPDF2/filters.py | 1 +
PyPDF2/generic/_data_structures.py | 23 ++++++++++++--
tests/generic/test_data_structures.py | 43 +++++++++++++++++++++++++++
4 files changed, 68 insertions(+), 2 deletions(-)
Index: pypdf-2.11.1/PyPDF2/filters.py
===================================================================
--- pypdf-2.11.1.orig/PyPDF2/filters.py
+++ pypdf-2.11.1/PyPDF2/filters.py
@@ -63,6 +63,7 @@ from .errors import LimitReachedError, P
ZLIB_MAX_RECOVERY_INPUT_LENGTH = 5_000_000
MAX_DECLARED_STREAM_LENGTH = 75_000_000
+MAX_ARRAY_BASED_STREAM_OUTPUT_LENGTH = 75_000_000
# Reuse cached 1-byte values in the fallback loop to avoid per-byte allocations.
_SINGLE_BYTES = tuple(bytes((i,)) for i in range(256))
Index: pypdf-2.11.1/PyPDF2/generic/_data_structures.py
===================================================================
--- pypdf-2.11.1.orig/PyPDF2/generic/_data_structures.py
+++ pypdf-2.11.1/PyPDF2/generic/_data_structures.py
@@ -684,6 +684,9 @@ class EncodedStreamObject(StreamObject):
return self.set_data(data)
+CONTENT_STREAM_ARRAY_MAX_LENGTH = 10_000
+
+
class ContentStream(DecodedStreamObject):
def __init__(
self,
@@ -702,10 +705,27 @@ class ContentStream(DecodedStreamObject)
# multiple StreamObjects to be cat'd together.
stream = stream.get_object()
if isinstance(stream, ArrayObject):
- data = b""
+ from PyPDF2.filters import MAX_ARRAY_BASED_STREAM_OUTPUT_LENGTH # noqa: PLC0415
+
+ if (stream_length := len(stream)) > CONTENT_STREAM_ARRAY_MAX_LENGTH:
+ raise LimitReachedError(
+ f"Array-based stream has {stream_length} > {CONTENT_STREAM_ARRAY_MAX_LENGTH} elements."
+ )
+ data = bytearray()
+ length = 0
+
for s in stream:
- data += b_(s.get_object().get_data())
+ new_data = b_(s.get_object().get_data())
+ length += len(new_data)
+ if length > MAX_ARRAY_BASED_STREAM_OUTPUT_LENGTH:
+ raise LimitReachedError(
+ f"Array-based stream has at least {length} > "
+ f"{MAX_ARRAY_BASED_STREAM_OUTPUT_LENGTH} output bytes."
+ )
+ data += new_data
if len(data) == 0 or data[-1] != b"\n":
+ # There should be no direct need to check for a change of one byte.
+ length += 1
data += b"\n"
stream_bytes = BytesIO(data)
else: