File CVE-2026-27025.patch of Package python-PyPDF2
From 77d7b8d7cfbe8dd179858dfa42666f73fc6e57a2 Mon Sep 17 00:00:00 2001
From: Stefan <96178532+stefan6419846@users.noreply.github.com>
Date: Tue, 17 Feb 2026 17:46:56 +0100
Subject: [PATCH] SEC: Limit size of `/ToUnicode` entries (#3646)
---
PyPDF2/_cmap.py | 20 ++++++++++
tests/test_cmap.py | 91 +++++++++++++++++++++++++++++++++++++++++++++-
2 files changed, 109 insertions(+), 2 deletions(-)
Index: PyPDF2-2.11.1/PyPDF2/_cmap.py
===================================================================
--- PyPDF2-2.11.1.orig/PyPDF2/_cmap.py
+++ PyPDF2-2.11.1/PyPDF2/_cmap.py
@@ -5,7 +5,7 @@ from typing import Any, Dict, List, Tupl
from ._codecs import adobe_glyphs, charset_encoding
from ._utils import logger_warning
-from .errors import PdfReadWarning
+from .errors import PdfReadWarning, LimitReachedError
from .generic import DecodedStreamObject, DictionaryObject
@@ -262,6 +262,15 @@ def process_cm_line(
return process_rg, process_char, multiline_rg
+# Usual values should be up to 65_536.
+MAPPING_DICTIONARY_SIZE_LIMIT = 100_000
+
+
+def _check_mapping_size(size: int) -> None:
+ if size > MAPPING_DICTIONARY_SIZE_LIMIT:
+ raise LimitReachedError(f"Maximum /ToUnicode size limit reached: {size} > {MAPPING_DICTIONARY_SIZE_LIMIT}.")
+
+
def parse_bfrange(
l: bytes,
map_dict: Dict[Any, Any],
@@ -273,6 +282,8 @@ def parse_bfrange(
nbi = max(len(lst[0]), len(lst[1]))
map_dict[-1] = ceil(nbi / 2)
fmt = b"%%0%dX" % (map_dict[-1] * 2)
+ entry_count = len(int_entry)
+ _check_mapping_size(entry_count)
if multiline_rg is not None:
a = multiline_rg[0] # a, b not in the current line
b = multiline_rg[1]
@@ -280,6 +291,8 @@ def parse_bfrange(
if sq == b"]":
closure_found = True
break
+ entry_count += 1
+ _check_mapping_size(entry_count)
map_dict[
unhexlify(fmt % a).decode(
"charmap" if map_dict[-1] == 1 else "utf-16-be",
@@ -296,6 +309,8 @@ def parse_bfrange(
if sq == b"]":
closure_found = True
break
+ entry_count += 1
+ _check_mapping_size(entry_count)
map_dict[
unhexlify(fmt % a).decode(
"charmap" if map_dict[-1] == 1 else "utf-16-be",
@@ -308,6 +323,8 @@ def parse_bfrange(
c = int(lst[2], 16)
fmt2 = b"%%0%dX" % max(4, len(lst[2]))
closure_found = True
+ range_size = max(0, b - a + 1)
+ _check_mapping_size(entry_count + range_size) # This can be checked beforehand.
while a <= b:
map_dict[
unhexlify(fmt % a).decode(
@@ -323,6 +340,8 @@ def parse_bfrange(
def parse_bfchar(l: bytes, map_dict: Dict[Any, Any], int_entry: List[int]) -> None:
lst = [x for x in l.split(b" ") if x]
+ new_count = len(lst) // 2
+ _check_mapping_size(len(int_entry) + new_count) # This can be checked beforehand.
map_dict[-1] = len(lst[0]) // 2
while len(lst) > 1:
map_to = ""
Index: PyPDF2-2.11.1/tests/test_cmap.py
===================================================================
--- PyPDF2-2.11.1.orig/tests/test_cmap.py
+++ PyPDF2-2.11.1/tests/test_cmap.py
@@ -3,7 +3,9 @@ from io import BytesIO
import pytest
from PyPDF2 import PdfReader
-from PyPDF2.errors import PdfReadWarning
+from PyPDF2._cmap import parse_bfchar, parse_bfrange
+from PyPDF2.errors import PdfReadWarning, LimitReachedError
+from PyPDF2.generic import StreamObject
from . import get_pdf_from_url
@@ -91,3 +93,89 @@ def test_iss1379():
name = "02voc.pdf"
reader = PdfReader(BytesIO(get_pdf_from_url(url, name=name)))
reader.pages[2].extract_text()
+
+
+def test_parse_bfrange__iteration_limit():
+ writer = PdfWriter()
+
+ to_unicode = StreamObject()
+ to_unicode.set_data(
+ b"beginbfrange\n"
+ b"<00000000> <001FFFFF> <00000000>\n"
+ b"endbfrange\n"
+ )
+ font = writer._add_object(DictionaryObject({
+ NameObject("/Type"): NameObject("/Font"),
+ NameObject("/Subtype"): NameObject("/Type1"),
+ NameObject("/BaseFont"): NameObject("/Helvetica"),
+ NameObject("/ToUnicode"): to_unicode,
+ }))
+
+ page = writer.add_blank_page(width=100, height=100)
+ page[NameObject("/Resources")] = DictionaryObject({
+ NameObject("/Font"): DictionaryObject({
+ NameObject("/F1"): font.indirect_reference,
+ })
+ })
+
+ # Case without list, exceeding list directly.
+ with pytest.raises(
+ expected_exception=LimitReachedError, match=r"^Maximum /ToUnicode size limit reached: 2097152 > 100000\.$"
+ ):
+ _ = page.extract_text()
+
+ # Use a pre-filled dummy list to simulate multiple calls where the upper bound does
+ # not overflow, but the overall size does. Case without list.
+ int_entry = [0] * 99_999
+ map_dict = {}
+ with pytest.raises(
+ expected_exception=LimitReachedError, match=r"^Maximum /ToUnicode size limit reached: 165535 > 100000\.$"
+ ):
+ _ = parse_bfrange(line=b"0000 FFFF 0000", map_dict=map_dict, int_entry=int_entry, multiline_rg=None)
+ assert map_dict == {-1: 2}
+
+ # Exceeding from previous call.
+ int_entry.append(1)
+ map_dict = {}
+ with pytest.raises(
+ expected_exception=LimitReachedError, match=r"^Maximum /ToUnicode size limit reached: 100001 > 100000\.$"
+ ):
+ _ = parse_bfrange(line=b"00000000 00000000 00000000", map_dict=map_dict, int_entry=int_entry, multiline_rg=None)
+ assert map_dict == {-1: 4}
+
+ # multiline_rg
+ int_entry = [0] * 99_995
+ map_dict = {-1: 1}
+ with pytest.raises(
+ expected_exception=LimitReachedError, match=r"^Maximum /ToUnicode size limit reached: 100001 > 100000\.$"
+ ):
+ _ = parse_bfrange(
+ line=b"0020 0021 0022 0023 0024 0025 0026 2019",
+ map_dict=map_dict, int_entry=int_entry, multiline_rg=(32, 251)
+ )
+ assert map_dict == {-1: 1, " ": " ", "!": "!", '"': '"', "#": "#", "$": "$"}
+
+ # No multiline_rg, but list.
+ int_entry = [0] * 99_995
+ map_dict = {}
+ with pytest.raises(
+ expected_exception=LimitReachedError, match=r"^Maximum /ToUnicode size limit reached: 100001 > 100000\.$"
+ ):
+ _ = parse_bfrange(
+ line=b"01 8A [ FFFD FFFD FFFD FFFF FFAB AAAA BBBB",
+ map_dict=map_dict, int_entry=int_entry, multiline_rg=None
+ )
+ assert map_dict == {-1: 1, "\x01": "�", "\x02": "�", "\x03": "�", "\x04": "\uffff", "\x05": "ᆱ"}
+
+
+def test_parse_bfchar__iteration_limit():
+ int_entry = [0] * 99_995
+ map_dict = {}
+ with pytest.raises(
+ expected_exception=LimitReachedError, match=r"^Maximum /ToUnicode size limit reached: 100002 > 100000\.$"
+ ):
+ parse_bfchar(
+ line=b"0003 0020 0008 0025 0009 0026 000A 0027 000B 0028 000C 0029 000D 002A",
+ map_dict=map_dict, int_entry=int_entry,
+ )
+ assert map_dict == {}