File CVE-2025-8291-consistency-zip64.patch of Package python.41815
From d8a71bec60ca197650bd93196a804083ba529b53 Mon Sep 17 00:00:00 2001
From: Serhiy Storchaka <storchaka@gmail.com>
Date: Tue, 7 Oct 2025 20:55:44 +0300
Subject: [PATCH] [3.13] gh-139700: Check consistency of the zip64 end of
central directory record (GH-139702) (GH-139708) (cherry picked from commit
333d4a6f4967d3ace91492a39ededbcf3faa76a6)
Co-authored-by: Serhiy Storchaka <storchaka@gmail.com>
Support records with "zip64 extensible data" if there are no bytes
prepended to the ZIP file.
(cherry picked from commit 162997bb70e067668c039700141770687bc8f267)
---
Lib/test/test_zipfile.py | 297 ++++++++++
Lib/zipfile.py | 59 +
Misc/NEWS.d/next/Security/2025-10-07-19-31-34.gh-issue-139700.vNHU1O.rst | 3
3 files changed, 341 insertions(+), 18 deletions(-)
create mode 100644 Misc/NEWS.d/next/Security/2025-10-07-19-31-34.gh-issue-139700.vNHU1O.rst
Index: Python-2.7.18/Lib/test/test_zipfile.py
===================================================================
--- Python-2.7.18.orig/Lib/test/test_zipfile.py 2025-11-25 00:13:42.310681562 +0100
+++ Python-2.7.18/Lib/test/test_zipfile.py 2025-11-25 00:20:40.809807681 +0100
@@ -6,9 +6,11 @@
import os
import io
+import re
import sys
import time
import struct
+import itertools
import zipfile
import unittest
@@ -1914,6 +1916,301 @@
finally:
rmtree(extdir)
+ def make_zip64_file(
+ self, file_size_64_set=False, file_size_extra=False,
+ compress_size_64_set=False, compress_size_extra=False,
+ header_offset_64_set=False, header_offset_extra=False,
+ extensible_data=b'',
+ end_of_central_dir_size=None, offset_to_end_of_central_dir=None,
+ ):
+ """Generate bytes sequence for a zip with (incomplete) zip64 data.
+
+ The actual values (not the zip 64 0xffffffff values) stored in the file
+ are:
+ file_size: 8
+ compress_size: 8
+ header_offset: 0
+ """
+ actual_size = 8
+ actual_header_offset = 0
+ local_zip64_fields = []
+ central_zip64_fields = []
+
+ file_size = actual_size
+ if file_size_64_set:
+ file_size = 0xffffffff
+ if file_size_extra:
+ local_zip64_fields.append(actual_size)
+ central_zip64_fields.append(actual_size)
+ file_size = struct.pack("<L", file_size)
+
+ compress_size = actual_size
+ if compress_size_64_set:
+ compress_size = 0xffffffff
+ if compress_size_extra:
+ local_zip64_fields.append(actual_size)
+ central_zip64_fields.append(actual_size)
+ compress_size = struct.pack("<L", compress_size)
+
+ header_offset = actual_header_offset
+ if header_offset_64_set:
+ header_offset = 0xffffffff
+ if header_offset_extra:
+ central_zip64_fields.append(actual_header_offset)
+ header_offset = struct.pack("<L", header_offset)
+
+ local_extra = struct.pack(
+ '<HH' + 'Q'*len(local_zip64_fields),
+ 0x0001,
+ 8*len(local_zip64_fields),
+ *local_zip64_fields
+ )
+
+ central_extra = struct.pack(
+ '<HH' + 'Q'*len(central_zip64_fields),
+ 0x0001,
+ 8*len(central_zip64_fields),
+ *central_zip64_fields
+ )
+
+ central_dir_size = struct.pack('<Q', 58 + 8 * len(central_zip64_fields))
+ offset_to_central_dir = struct.pack('<Q', 50 + 8 * len(local_zip64_fields))
+ if end_of_central_dir_size is None:
+ end_of_central_dir_size = 44 + len(extensible_data)
+ if offset_to_end_of_central_dir is None:
+ offset_to_end_of_central_dir = (108
+ + 8 * len(local_zip64_fields)
+ + 8 * len(central_zip64_fields))
+
+ local_extra_length = struct.pack("<H", 4 + 8 * len(local_zip64_fields))
+ central_extra_length = struct.pack("<H", 4 + 8 * len(central_zip64_fields))
+
+ filename = b"test.txt"
+ content = b"test1234"
+ filename_length = struct.pack("<H", len(filename))
+ zip64_contents = (
+ # Local file header
+ b"PK\x03\x04\x14\x00\x00\x00\x00\x00\x00\x00!\x00\x9e%\xf5\xaf"
+ + compress_size
+ + file_size
+ + filename_length
+ + local_extra_length
+ + filename
+ + local_extra
+ + content
+ # Central directory:
+ + b"PK\x01\x02-\x03-\x00\x00\x00\x00\x00\x00\x00!\x00\x9e%\xf5\xaf"
+ + compress_size
+ + file_size
+ + filename_length
+ + central_extra_length
+ + b"\x00\x00\x00\x00\x00\x00\x00\x00\x80\x01"
+ + header_offset
+ + filename
+ + central_extra
+ # Zip64 end of central directory
+ + b"PK\x06\x06"
+ + struct.pack('<Q', end_of_central_dir_size)
+ + b"-\x00-\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00"
+ + b"\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00"
+ + central_dir_size
+ + offset_to_central_dir
+ + extensible_data
+ # Zip64 end of central directory locator
+ + b"PK\x06\x07\x00\x00\x00\x00"
+ + struct.pack('<Q', offset_to_end_of_central_dir)
+ + b"\x01\x00\x00\x00"
+ # end of central directory
+ + b"PK\x05\x06\x00\x00\x00\x00\x01\x00\x01\x00:\x00\x00\x002\x00"
+ + b"\x00\x00\x00\x00"
+ )
+ return zip64_contents
+
+ def test_bad_zip64_extra(self):
+ """Missing zip64 extra records raises an exception.
+
+ There are 4 fields that the zip64 format handles (the disk number is
+ not used in this module and so is ignored here). According to the zip
+ spec:
+ The order of the fields in the zip64 extended
+ information record is fixed, but the fields MUST
+ only appear if the corresponding Local or Central
+ directory record field is set to 0xFFFF or 0xFFFFFFFF.
+
+ If the zip64 extra content doesn't contain enough entries for the
+ number of fields marked with 0xFFFF or 0xFFFFFFFF, we raise an error.
+ This test mismatches the length of the zip64 extra field and the number
+ of fields set to indicate the presence of zip64 data.
+ """
+ # zip64 file size present, no fields in extra, expecting one, equals
+ # missing file size.
+ missing_file_size_extra = self.make_zip64_file(
+ file_size_64_set=True,
+ )
+ with self.assertRaises(zipfile.BadZipFile) as e:
+ zipfile.ZipFile(io.BytesIO(missing_file_size_extra))
+ self.assertIn('file size', str(e.exception).lower())
+ self.assertTrue(zipfile.is_zipfile(io.BytesIO(missing_file_size_extra)))
+
+ # zip64 file size present, zip64 compress size present, one field in
+ # extra, expecting two, equals missing compress size.
+ missing_compress_size_extra = self.make_zip64_file(
+ file_size_64_set=True,
+ file_size_extra=True,
+ compress_size_64_set=True,
+ )
+ with self.assertRaises(zipfile.BadZipFile) as e:
+ zipfile.ZipFile(io.BytesIO(missing_compress_size_extra))
+ self.assertTrue(re.search('compress.{,5}size', str(e.exception).lower()))
+ self.assertTrue(zipfile.is_zipfile(io.BytesIO(missing_compress_size_extra)))
+
+ # zip64 compress size present, no fields in extra, expecting one,
+ # equals missing compress size.
+ missing_compress_size_extra = self.make_zip64_file(
+ compress_size_64_set=True,
+ )
+ with self.assertRaises(zipfile.BadZipFile) as e:
+ zipfile.ZipFile(io.BytesIO(missing_compress_size_extra))
+ self.assertTrue(re.search('compress.{,5}size', str(e.exception).lower()))
+ self.assertTrue(zipfile.is_zipfile(io.BytesIO(missing_compress_size_extra)))
+
+ # zip64 file size present, zip64 compress size present, zip64 header
+ # offset present, two fields in extra, expecting three, equals missing
+ # header offset
+ missing_header_offset_extra = self.make_zip64_file(
+ file_size_64_set=True,
+ file_size_extra=True,
+ compress_size_64_set=True,
+ compress_size_extra=True,
+ header_offset_64_set=True,
+ )
+ with self.assertRaises(zipfile.BadZipFile) as e:
+ zipfile.ZipFile(io.BytesIO(missing_header_offset_extra))
+ self.assertIn('header offset', str(e.exception).lower())
+ self.assertTrue(zipfile.is_zipfile(io.BytesIO(missing_header_offset_extra)))
+
+ # zip64 compress size present, zip64 header offset present, one field
+ # in extra, expecting two, equals missing header offset
+ missing_header_offset_extra = self.make_zip64_file(
+ file_size_64_set=False,
+ compress_size_64_set=True,
+ compress_size_extra=True,
+ header_offset_64_set=True,
+ )
+ with self.assertRaises(zipfile.BadZipFile) as e:
+ zipfile.ZipFile(io.BytesIO(missing_header_offset_extra))
+ self.assertIn('header offset', str(e.exception).lower())
+ self.assertTrue(zipfile.is_zipfile(io.BytesIO(missing_header_offset_extra)))
+
+ # zip64 file size present, zip64 header offset present, one field in
+ # extra, expecting two, equals missing header offset
+ missing_header_offset_extra = self.make_zip64_file(
+ file_size_64_set=True,
+ file_size_extra=True,
+ compress_size_64_set=False,
+ header_offset_64_set=True,
+ )
+ with self.assertRaises(zipfile.BadZipFile) as e:
+ zipfile.ZipFile(io.BytesIO(missing_header_offset_extra))
+ self.assertIn('header offset', str(e.exception).lower())
+ self.assertTrue(zipfile.is_zipfile(io.BytesIO(missing_header_offset_extra)))
+
+ # zip64 header offset present, no fields in extra, expecting one,
+ # equals missing header offset
+ missing_header_offset_extra = self.make_zip64_file(
+ file_size_64_set=False,
+ compress_size_64_set=False,
+ header_offset_64_set=True,
+ )
+ with self.assertRaises(zipfile.BadZipFile) as e:
+ zipfile.ZipFile(io.BytesIO(missing_header_offset_extra))
+ self.assertIn('header offset', str(e.exception).lower())
+ self.assertTrue(zipfile.is_zipfile(io.BytesIO(missing_header_offset_extra)))
+
+ def test_bad_zip64_end_of_central_dir(self):
+ zipdata = self.make_zip64_file(end_of_central_dir_size=0)
+ with self.assertRaisesRegex(zipfile.BadZipFile, 'Corrupt.*record'):
+ zipfile.ZipFile(io.BytesIO(zipdata))
+ self.assertFalse(zipfile.is_zipfile(io.BytesIO(zipdata)))
+
+ zipdata = self.make_zip64_file(end_of_central_dir_size=100)
+ with self.assertRaisesRegex(zipfile.BadZipFile, 'Corrupt.*record'):
+ zipfile.ZipFile(io.BytesIO(zipdata))
+ self.assertFalse(zipfile.is_zipfile(io.BytesIO(zipdata)))
+
+ zipdata = self.make_zip64_file(offset_to_end_of_central_dir=0)
+ with self.assertRaisesRegex(zipfile.BadZipFile, 'Corrupt.*record'):
+ zipfile.ZipFile(io.BytesIO(zipdata))
+ self.assertFalse(zipfile.is_zipfile(io.BytesIO(zipdata)))
+
+ zipdata = self.make_zip64_file(offset_to_end_of_central_dir=1000)
+ with self.assertRaisesRegex(zipfile.BadZipFile, 'Corrupt.*locator'):
+ zipfile.ZipFile(io.BytesIO(zipdata))
+ self.assertFalse(zipfile.is_zipfile(io.BytesIO(zipdata)))
+
+ def test_zip64_end_of_central_dir_record_not_found(self):
+ zipdata = self.make_zip64_file()
+ zipdata = zipdata.replace(b"PK\x06\x06", b'\x00'*4)
+ with self.assertRaisesRegex(zipfile.BadZipFile, 'record not found'):
+ zipfile.ZipFile(io.BytesIO(zipdata))
+ self.assertFalse(zipfile.is_zipfile(io.BytesIO(zipdata)))
+
+ zipdata = self.make_zip64_file(
+ extensible_data=b'\xca\xfe\x04\x00\x00\x00data')
+ zipdata = zipdata.replace(b"PK\x06\x06", b'\x00'*4)
+ with self.assertRaisesRegex(zipfile.BadZipFile, 'record not found'):
+ zipfile.ZipFile(io.BytesIO(zipdata))
+ self.assertFalse(zipfile.is_zipfile(io.BytesIO(zipdata)))
+
+ def test_zip64_extensible_data(self):
+ # These values are what is set in the make_zip64_file method.
+ expected_file_size = 8
+ expected_compress_size = 8
+ expected_header_offset = 0
+ expected_content = b"test1234"
+
+ zipdata = self.make_zip64_file(
+ extensible_data=b'\xca\xfe\x04\x00\x00\x00data')
+ with zipfile.ZipFile(io.BytesIO(zipdata)) as zf:
+ zinfo = zf.infolist()[0]
+ self.assertEqual(zinfo.file_size, expected_file_size)
+ self.assertEqual(zinfo.compress_size, expected_compress_size)
+ self.assertEqual(zinfo.header_offset, expected_header_offset)
+ self.assertEqual(zf.read(zinfo), expected_content)
+ self.assertTrue(zipfile.is_zipfile(io.BytesIO(zipdata)))
+
+ with self.assertRaisesRegex(zipfile.BadZipFile, 'record not found'):
+ zipfile.ZipFile(io.BytesIO(b'prepended' + zipdata))
+ self.assertFalse(zipfile.is_zipfile(io.BytesIO(b'prepended' + zipdata)))
+
+ def test_generated_valid_zip64_extra(self):
+ # These values are what is set in the make_zip64_file method.
+ expected_file_size = 8
+ expected_compress_size = 8
+ expected_header_offset = 0
+ expected_content = b"test1234"
+
+ # Loop through the various valid combinations of zip64 masks
+ # present and extra fields present.
+ params = (
+ {"file_size_64_set": True, "file_size_extra": True},
+ {"compress_size_64_set": True, "compress_size_extra": True},
+ {"header_offset_64_set": True, "header_offset_extra": True},
+ )
+
+ for r in range(1, len(params) + 1):
+ for combo in itertools.combinations(params, r):
+ kwargs = {}
+ for c in combo:
+ kwargs.update(c)
+ with zipfile.ZipFile(io.BytesIO(self.make_zip64_file(**kwargs))) as zf:
+ zinfo = zf.infolist()[0]
+ self.assertEqual(zinfo.file_size, expected_file_size)
+ self.assertEqual(zinfo.compress_size, expected_compress_size)
+ self.assertEqual(zinfo.header_offset, expected_header_offset)
+ self.assertEqual(zf.read(zinfo), expected_content)
+
+
def test_main():
run_unittest(TestsWithSourceFile, TestZip64InSmallFiles, OtherTests,
PyZipFileTests, DecryptionTests, TestsWithMultipleOpens,
Index: Python-2.7.18/Lib/zipfile.py
===================================================================
--- Python-2.7.18.orig/Lib/zipfile.py 2025-11-25 00:13:42.311572733 +0100
+++ Python-2.7.18/Lib/zipfile.py 2025-11-25 00:31:53.056116639 +0100
@@ -174,7 +174,7 @@
else:
with open(filename, "rb") as fp:
result = _check_zipfile(fp)
- except IOError:
+ except (IOError, BadZipFile):
pass
return result
@@ -182,16 +182,16 @@
"""
Read the ZIP64 end-of-archive records and use that to update endrec
"""
- try:
- fpin.seek(offset - sizeEndCentDir64Locator, 2)
- except IOError:
- # If the seek fails, the file is not large enough to contain a ZIP64
+ offset -= sizeEndCentDir64Locator
+ if offset < 0:
+ # The file is not large enough to contain a ZIP64
# end-of-archive record, so just return the end record we were given.
return endrec
+ fpin.seek(offset)
data = fpin.read(sizeEndCentDir64Locator)
if len(data) != sizeEndCentDir64Locator:
- return endrec
+ raise IOError("Unknown I/O error")
sig, diskno, reloff, disks = struct.unpack(structEndArchive64Locator, data)
if sig != stringEndArchive64Locator:
return endrec
@@ -199,16 +199,33 @@
if diskno != 0 or disks != 1:
raise BadZipfile("zipfiles that span multiple disks are not supported")
- # Assume no 'zip64 extensible data'
- fpin.seek(offset - sizeEndCentDir64Locator - sizeEndCentDir64, 2)
+ offset -= sizeEndCentDir64
+ if reloff > offset:
+ raise BadZipFile("Corrupt zip64 end of central directory locator")
+ # First, check the assumption that there is no prepended data.
+ fpin.seek(reloff)
+ extrasz = offset - reloff
data = fpin.read(sizeEndCentDir64)
if len(data) != sizeEndCentDir64:
- return endrec
+ raise IOError("Unknown I/O error")
+ if not data.startswith(stringEndArchive64) and reloff != offset:
+ # Since we already have seen the Zip64 EOCD Locator, it's
+ # possible we got here because there is prepended data.
+ # Assume no 'zip64 extensible data'
+ fpin.seek(offset)
+ extrasz = 0
+ data = fpin.read(sizeEndCentDir64)
+ if len(data) != sizeEndCentDir64:
+ raise IOError("Unknown I/O error")
+ if not data.startswith(stringEndArchive64):
+ raise BadZipFile("Zip64 end of central directory record not found")
+
sig, sz, create_version, read_version, disk_num, disk_dir, \
dircount, dircount2, dirsize, diroffset = \
struct.unpack(structEndArchive64, data)
- if sig != stringEndArchive64:
- return endrec
+ if (diroffset + dirsize != reloff or
+ sz + 12 != sizeEndCentDir64 + extrasz):
+ raise BadZipFile("Corrupt zip64 end of central directory record")
# Update the original endrec using data from the ZIP64 record
endrec[_ECD_SIGNATURE] = sig
@@ -218,6 +235,7 @@
endrec[_ECD_ENTRIES_TOTAL] = dircount2
endrec[_ECD_SIZE] = dirsize
endrec[_ECD_OFFSET] = diroffset
+ endrec[_ECD_LOCATION] = offset - extrasz
return endrec
@@ -251,7 +269,7 @@
endrec.append(filesize - sizeEndCentDir)
# Try to read the "Zip64 end of central directory" structure
- return _EndRecData64(fpin, -sizeEndCentDir, endrec)
+ return _EndRecData64(fpin, filesize - sizeEndCentDir, endrec)
# Either this is not a ZIP file, or it is a ZIP file with an archive
# comment. Search the end of the file for the "end of central directory"
@@ -275,8 +293,7 @@
endrec.append(maxCommentStart + start)
# Try to read the "Zip64 end of central directory" structure
- return _EndRecData64(fpin, maxCommentStart + start - filesize,
- endrec)
+ return _EndRecData64(fpin, maxCommentStart + start, endrec)
# Unable to find a valid end of central directory structure
return None
@@ -427,14 +444,23 @@
# ZIP64 extension (large files and/or large archives)
if self.file_size in (0xffffffffffffffffL, 0xffffffffL):
+ if idx >= len(counts):
+ raise BadZipFile("Corrupt extra field 0x0001: "
+ "missing file size quadlet")
self.file_size = counts[idx]
idx += 1
if self.compress_size == 0xFFFFFFFFL:
+ if idx >= len(counts):
+ raise BadZipFile("Corrupt extra field 0x0001: "
+ "missing compress size quadlet")
self.compress_size = counts[idx]
idx += 1
if self.header_offset == 0xffffffffL:
+ if idx >= len(counts):
+ raise BadZipFile("Corrupt extra field 0x0001: "
+ "missing header offset quadlet")
old = self.header_offset
self.header_offset = counts[idx]
idx+=1
@@ -842,9 +868,6 @@
# "concat" is zero, unless zip was concatenated to another file
concat = endrec[_ECD_LOCATION] - size_cd - offset_cd
- if endrec[_ECD_SIGNATURE] == stringEndArchive64:
- # If Zip64 extension structures are present, account for them
- concat -= (sizeEndCentDir64 + sizeEndCentDir64Locator)
if self.debug > 2:
inferred = concat + offset_cd
@@ -1383,7 +1406,7 @@
" would require ZIP64 extensions")
zip64endrec = struct.pack(
structEndArchive64, stringEndArchive64,
- 44, 45, 45, 0, 0, centDirCount, centDirCount,
+ sizeEndCentDir64 - 12, 45, 45, 0, 0, centDirCount, centDirCount,
centDirSize, centDirOffset)
self.fp.write(zip64endrec)
Index: Python-2.7.18/Misc/NEWS.d/next/Security/2025-10-07-19-31-34.gh-issue-139700.vNHU1O.rst
===================================================================
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
+++ Python-2.7.18/Misc/NEWS.d/next/Security/2025-10-07-19-31-34.gh-issue-139700.vNHU1O.rst 2025-11-25 00:13:48.072880250 +0100
@@ -0,0 +1,3 @@
+Check consistency of the zip64 end of central directory record. Support
+records with "zip64 extensible data" if there are no bytes prepended to the
+ZIP file.