File CVE-2026-31958.patch of Package saltbundlepy-tornado

commit 5e6bc7fde5284d6925b6cc6b4d7a032f771a0c39
Author: Ben Darnell <ben@bendarnell.com>
Date:   Tue Mar 3 14:36:14 2026 -0500

    httputil: Add limits on multipart form data parsing
    
    The new default limits prevent a DoS vulnerability involving
    requests with many multipart parts. It also adds a defense-in-depth
    limit on the size of multipart headers, which would have mitigated
    the vulnerability fixed in 6.5.3.
    
    New data structures are added to allow users to configure these limits,
    and to disable multipart parsing entirely if they choose. However,
    due to the complexity of the plumbing required to pass these
    configuration options through the stack, the only configuration
    provided in this commit is the ability to set a global default.

diff --git a/tornado/httputil.py b/tornado/httputil.py
index 2acc52ea..810550cc 100644
--- a/tornado/httputil.py
+++ b/tornado/httputil.py
@@ -22,6 +22,7 @@ via `tornado.web.RequestHandler.request`.
 import calendar
 import collections.abc
 import copy
+import dataclasses
 import datetime
 import email.utils
 from functools import lru_cache
@@ -755,12 +756,90 @@ def _int_or_none(val: str) -> Optional[int]:
     return int(val)
 
 
+@dataclasses.dataclass
+class ParseMultipartConfig:
+    """This class configures the parsing of ``multipart/form-data`` request bodies.
+
+    Its primary purpose is to place limits on the size and complexity of request messages
+    to avoid potential denial-of-service attacks.
+
+    .. versionadded:: 6.5.5
+    """
+
+    enabled: bool = True
+    """Set this to false to disable the parsing of ``multipart/form-data`` requests entirely.
+
+    This may be desirable for applications that do not need to handle this format, since
+    multipart request have a history of DoS vulnerabilities in Tornado. Multipart requests
+    are used primarily for ``<input type="file">`` in HTML forms, or in APIs that mimic this
+    format. File uploads that use the HTTP ``PUT`` method generally do not use the multipart
+    format.
+    """
+
+    max_parts: int = 100
+    """The maximum number of parts accepted in a multipart request.
+
+    Each ``<input>`` element in an HTML form corresponds to at least one "part".
+    """
+
+    max_part_header_size: int = 10 * 1024
+    """The maximum size of the headers for each part of a multipart request.
+
+    The header for a part contains the name of the form field and optionally the filename
+    and content type of the uploaded file.
+    """
+
+
+@dataclasses.dataclass
+class ParseBodyConfig:
+    """This class configures the parsing of request bodies.
+
+    .. versionadded:: 6.5.5
+    """
+
+    multipart: ParseMultipartConfig = dataclasses.field(
+        default_factory=ParseMultipartConfig
+    )
+    """Configuration for ``multipart/form-data`` request bodies."""
+
+
+_DEFAULT_PARSE_BODY_CONFIG = ParseBodyConfig()
+
+
+def set_parse_body_config(config: ParseBodyConfig) -> None:
+    r"""Sets the **global** default configuration for parsing request bodies.
+
+    This global setting is provided as a stopgap for applications that need to raise the limits
+    introduced in Tornado 6.5.5, or who wish to disable the parsing of multipart/form-data bodies
+    entirely. Non-global configuration for this functionality will be introduced in a future
+    release.
+
+    >>> content_type = "multipart/form-data; boundary=foo"
+    >>> multipart_body = b"--foo--\r\n"
+    >>> parse_body_arguments(content_type, multipart_body, {}, {})
+    >>> multipart_config = ParseMultipartConfig(enabled=False)
+    >>> config = ParseBodyConfig(multipart=multipart_config)
+    >>> set_parse_body_config(config)
+    >>> parse_body_arguments(content_type, multipart_body, {}, {})
+    Traceback (most recent call last):
+        ...
+    tornado.httputil.HTTPInputError: ...: multipart/form-data parsing is disabled
+    >>> set_parse_body_config(ParseBodyConfig())  # reset to defaults
+
+    .. versionadded:: 6.5.5
+    """
+    global _DEFAULT_PARSE_BODY_CONFIG
+    _DEFAULT_PARSE_BODY_CONFIG = config
+
+
 def parse_body_arguments(
     content_type: str,
     body: bytes,
     arguments: Dict[str, List[bytes]],
     files: Dict[str, List[HTTPFile]],
     headers: Optional[HTTPHeaders] = None,
+    *,
+    config: Optional[ParseBodyConfig] = None,
 ) -> None:
     """Parses a form request body.
 
@@ -770,6 +849,8 @@ def parse_body_arguments(
     and ``files`` parameters are dictionaries that will be updated
     with the parsed contents.
     """
+    if config is None:
+        config = _DEFAULT_PARSE_BODY_CONFIG
     if content_type.startswith("application/x-www-form-urlencoded"):
         if headers and "Content-Encoding" in headers:
             raise HTTPInputError(
@@ -790,10 +871,15 @@ def parse_body_arguments(
             )
         try:
             fields = content_type.split(";")
+            if fields[0].strip() != "multipart/form-data":
+                # This catches "Content-Type: multipart/form-dataxyz"
+                raise HTTPInputError("Invalid content type")
             for field in fields:
                 k, sep, v = field.strip().partition("=")
                 if k == "boundary" and v:
-                    parse_multipart_form_data(utf8(v), body, arguments, files)
+                    parse_multipart_form_data(
+                        utf8(v), body, arguments, files, config=config.multipart
+                    )
                     break
             else:
                 raise HTTPInputError("multipart boundary not found")
@@ -806,6 +892,8 @@ def parse_multipart_form_data(
     data: bytes,
     arguments: Dict[str, List[bytes]],
     files: Dict[str, List[HTTPFile]],
+    *,
+    config: Optional[ParseMultipartConfig] = None,
 ) -> None:
     """Parses a ``multipart/form-data`` body.
 
@@ -818,6 +906,10 @@ def parse_multipart_form_data(
        Now recognizes non-ASCII filenames in RFC 2231/5987
        (``filename*=``) format.
     """
+    if config is None:
+        config = _DEFAULT_PARSE_BODY_CONFIG.multipart
+    if not config.enabled:
+        raise HTTPInputError("multipart/form-data parsing is disabled")
     # The standard allows for the boundary to be quoted in the header,
     # although it's rare (it happens at least for google app engine
     # xmpp).  I think we're also supposed to handle backslash-escapes
@@ -829,12 +921,16 @@ def parse_multipart_form_data(
     if final_boundary_index == -1:
         raise HTTPInputError("Invalid multipart/form-data: no final boundary found")
     parts = data[:final_boundary_index].split(b"--" + boundary + b"\r\n")
+    if len(parts) > config.max_parts:
+        raise HTTPInputError("multipart/form-data has too many parts")
     for part in parts:
         if not part:
             continue
         eoh = part.find(b"\r\n\r\n")
         if eoh == -1:
             raise HTTPInputError("multipart/form-data missing headers")
+        if eoh > config.max_part_header_size:
+            raise HTTPInputError("multipart/form-data part header too large")
         headers = HTTPHeaders.parse(part[:eoh].decode("utf-8"))
         disp_header = headers.get("Content-Disposition", "")
         disposition, disp_params = _parse_header(disp_header)
@@ -1043,7 +1139,7 @@ def doctests():
     import warnings
     warnings.simplefilter("ignore", ResourceWarning)
 
-    return doctest.DocTestSuite()
+    return doctest.DocTestSuite(optionflags=doctest.ELLIPSIS)
 
 
 _netloc_re = re.compile(r"^(.+):(\d+)$")
diff --git a/tornado/test/httputil_test.py b/tornado/test/httputil_test.py
index 78a853ba..9f620165 100644
--- a/tornado/test/httputil_test.py
+++ b/tornado/test/httputil_test.py
@@ -9,6 +9,7 @@ from tornado.httputil import (
     qs_to_qsl,
     HTTPInputError,
     HTTPFile,
+    ParseMultipartConfig,
 )
 from tornado.escape import utf8, native_str
 from tornado.log import gen_log
@@ -280,10 +281,45 @@ Foo
             return time.time() - start
 
         d1 = f(1_000)
+        # Note that headers larger than this are blocked by the default configuration.
         d2 = f(10_000)
         if d2 / d1 > 20:
             self.fail(f"Disposition param parsing is not linear: {d1=} vs {d2=}")
 
+    def test_multipart_config(self):
+        boundary = b"1234"
+        body = b"""--1234
+Content-Disposition: form-data; name="files"; filename="ab.txt"
+
+--1234--""".replace(
+            b"\n", b"\r\n"
+        )
+        config = ParseMultipartConfig()
+        args, files = form_data_args()
+        parse_multipart_form_data(boundary, body, args, files, config=config)
+        self.assertEqual(files["files"][0]["filename"], "ab.txt")
+
+        config_no_parts = ParseMultipartConfig(max_parts=0)
+        with self.assertRaises(HTTPInputError) as cm:
+            parse_multipart_form_data(
+                boundary, body, args, files, config=config_no_parts
+            )
+        self.assertIn("too many parts", str(cm.exception))
+
+        config_small_headers = ParseMultipartConfig(max_part_header_size=10)
+        with self.assertRaises(HTTPInputError) as cm:
+            parse_multipart_form_data(
+                boundary, body, args, files, config=config_small_headers
+            )
+        self.assertIn("header too large", str(cm.exception))
+
+        config_disabled = ParseMultipartConfig(enabled=False)
+        with self.assertRaises(HTTPInputError) as cm:
+            parse_multipart_form_data(
+                boundary, body, args, files, config=config_disabled
+            )
+        self.assertIn("multipart/form-data parsing is disabled", str(cm.exception))
+
 
 class HTTPHeadersTest(unittest.TestCase):
     def test_multi_line(self):
commit 04787057fa2653c38e62ba4a5f774cd91fbc1e2c
Author: Ben Darnell <ben@bendarnell.com>
Date:   Fri Mar 6 14:50:25 2026 -0500

    web: Validate characters in all cookie attributes.
    
    Our previous control character check was missing a check for
    U+007F, and also semicolons, which are only allowed in quoted
    parts of values. This commit checks all attributes and
    updates the set of disallowed characters.

diff --git a/tornado/test/web_test.py b/tornado/test/web_test.py
index 2d805913..62e6d867 100644
--- a/tornado/test/web_test.py
+++ b/tornado/test/web_test.py
@@ -1,3 +1,5 @@
+import http
+
 from tornado.concurrent import Future
 from tornado import gen
 from tornado.escape import (
@@ -291,11 +293,67 @@ class CookieTest(WebTestCase):
                 self.set_cookie("unicode_args", "blah", domain="foo.com", path="/foo")
 
         class SetCookieSpecialCharHandler(RequestHandler):
+            # "Special" characters are allowed in cookie values, but trigger special quoting.
             def get(self):
                 self.set_cookie("equals", "a=b")
                 self.set_cookie("semicolon", "a;b")
                 self.set_cookie("quote", 'a"b')
 
+        class SetCookieForbiddenCharHandler(RequestHandler):
+            def get(self):
+                # Control characters and semicolons raise errors in cookie names and attributes
+                # (but not values, which are tested in SetCookieSpecialCharHandler)
+                for char in list(map(chr, range(0x20))) + [chr(0x7F), ";"]:
+                    try:
+                        self.set_cookie("foo" + char, "bar")
+                        self.write(
+                            "Didn't get expected exception for char %r in name\n" % char
+                        )
+                    except http.cookies.CookieError as e:
+                        if "Invalid cookie attribute name" not in str(e):
+                            self.write(
+                                "unexpected exception for char %r in name: %s\n"
+                                % (char, e)
+                            )
+
+                    try:
+                        self.set_cookie("foo", "bar", domain="example" + char + ".com")
+                        self.write(
+                            "Didn't get expected exception for char %r in domain\n"
+                            % char
+                        )
+                    except http.cookies.CookieError as e:
+                        if "Invalid cookie attribute domain" not in str(e):
+                            self.write(
+                                "unexpected exception for char %r in domain: %s\n"
+                                % (char, e)
+                            )
+
+                    try:
+                        self.set_cookie("foo", "bar", path="/" + char)
+                        self.write(
+                            "Didn't get expected exception for char %r in path\n" % char
+                        )
+                    except http.cookies.CookieError as e:
+                        if "Invalid cookie attribute path" not in str(e):
+                            self.write(
+                                "unexpected exception for char %r in path: %s\n"
+                                % (char, e)
+                            )
+
+                    try:
+                        self.set_cookie("foo", "bar", samesite="a" + char)
+                        self.write(
+                            "Didn't get expected exception for char %r in samesite\n"
+                            % char
+                        )
+                    except http.cookies.CookieError as e:
+                        if "Invalid cookie attribute samesite" not in str(e):
+                            self.write(
+                                "unexpected exception for char %r in samesite: %s\n"
+                                % (char, e)
+                            )
+
         class SetCookieOverwriteHandler(RequestHandler):
             def get(self):
                 self.set_cookie("a", "b", domain="example.com")
@@ -329,6 +387,7 @@ class CookieTest(WebTestCase):
             ("/get", GetCookieHandler),
             ("/set_domain", SetCookieDomainHandler),
             ("/special_char", SetCookieSpecialCharHandler),
+            ("/forbidden_char", SetCookieForbiddenCharHandler),
             ("/set_overwrite", SetCookieOverwriteHandler),
             ("/set_max_age", SetCookieMaxAgeHandler),
             ("/set_expires_days", SetCookieExpiresDaysHandler),
@@ -385,6 +444,12 @@ class CookieTest(WebTestCase):
             response = self.fetch("/get", headers={"Cookie": header})
             self.assertEqual(response.body, utf8(expected))
 
+    def test_set_cookie_forbidden_char(self):
+        response = self.fetch("/forbidden_char")
+        self.assertEqual(response.code, 200)
+        self.maxDiff = 10000
+        self.assertMultiLineEqual(to_unicode(response.body), "")
+
     def test_set_cookie_overwrite(self):
         response = self.fetch("/set_overwrite")
         headers = response.headers.get_list("Set-Cookie")
diff --git a/tornado/web.py b/tornado/web.py
index 5e7fb3ac..43a0b157 100644
--- a/tornado/web.py
+++ b/tornado/web.py
@@ -643,9 +643,30 @@ class RequestHandler(object):
         # The cookie library only accepts type str, in both python 2 and 3
         name = escape.native_str(name)
         value = escape.native_str(value)
-        if re.search(r"[\x00-\x20]", name + value):
-            # Don't let us accidentally inject bad stuff
-            raise ValueError("Invalid cookie %r: %r" % (name, value))
+        if re.search(r"[\x00-\x20]", value):
+            # Legacy check for control characters in cookie values. This check is no longer needed
+            # since the cookie library escapes these characters correctly now. It will be removed
+            # in the next feature release.
+            raise ValueError(f"Invalid cookie {name!r}: {value!r}")
+        for attr_name, attr_value in [
+            ("name", name),
+            ("domain", domain),
+            ("path", path),
+            ("samesite", samesite),
+        ]:
+            # Cookie attributes may not contain control characters or semicolons (except when
+            # escaped in the value). A check for control characters was added to the http.cookies
+            # library in a Feb 2026 security release; as of March it still does not check for
+            # semicolons.
+            #
+            # When a semicolon check is added to the standard library (and the release has had time
+            # for adoption), this check may be removed, but be mindful of the fact that this may
+            # change the timing of the exception (to the generation of the Set-Cookie header in
+            # flush()). We m
+            if attr_value is not None and re.search(r"[\x00-\x20\x3b\x7f]", attr_value):
+                raise http.cookies.CookieError(
+                    f"Invalid cookie attribute {attr_name}={attr_value!r} for cookie {name!r}"
+                )
         if not hasattr(self, "_new_cookie"):
             self._new_cookie = (
                 http.cookies.SimpleCookie()
commit b8ed48e6c375040abc56b92a79c041345b088cca
Author: Ben Darnell <ben@bendarnell.com>
Date:   Tue Mar 10 12:19:50 2026 -0400

    httputil: Add CRLF to _FORBIDDEN_HEADER_CHARS_RE
    
    I think these were omitted due to quirks of an older version of the
    parsing code. Linefeeds are already effectively prohibited within
    header values since they are interpreted as delimiters, so the net
    effect of this change is to prohibit bare carriage returns within
    header values. This RE is used only when parsing headers inside
    multipart/form-data bodies; for HTTP headers CR was already prohibited.

diff --git a/tornado/httputil.py b/tornado/httputil.py
index 810550cc..4c07cf45 100644
--- a/tornado/httputil.py
+++ b/tornado/httputil.py
@@ -62,6 +62,13 @@ if typing.TYPE_CHECKING:
     from asyncio import Future  # noqa: F401
     import unittest  # noqa: F401
 
+# To be used with str.strip() and related methods.
+HTTP_WHITESPACE = " \t"
+
+# Roughly the inverse of RequestHandler._VALID_HEADER_CHARS, but permits
+# chars greater than \xFF (which may appear after decoding utf8).
+_FORBIDDEN_HEADER_CHARS_RE = re.compile(r"[\x00-\x08\x0A-\x1F\x7F]")
+
 
 @lru_cache(1000)
 def _normalize_header(name: str) -> str:
@@ -171,11 +178,13 @@ class HTTPHeaders(collections.abc.MutableMapping):
         >>> h.get('content-type')
         'text/html'
         """
-        if line[0].isspace():
+        if line[0] in HTTP_WHITESPACE:
             # continuation of a multi-line header
             if self._last_key is None:
                 raise HTTPInputError("first header line cannot start with whitespace")
-            new_part = " " + line.lstrip()
+            new_part = " " + line.lstrip(HTTP_WHITESPACE)
+            if _FORBIDDEN_HEADER_CHARS_RE.search(new_part):
+                raise HTTPInputError("Invalid header value %r" % new_part)
             self._as_list[self._last_key][-1] += new_part
             self._combined_cache.pop(self._last_key, None)
         else:
@@ -183,7 +192,7 @@ class HTTPHeaders(collections.abc.MutableMapping):
                 name, value = line.split(":", 1)
             except ValueError:
                 raise HTTPInputError("no colon in header line")
-            self.add(name, value.strip())
+            self.add(name, value.strip(HTTP_WHITESPACE))
 
     @classmethod
     def parse(cls, headers: str) -> "HTTPHeaders":
diff --git a/tornado/test/httputil_test.py b/tornado/test/httputil_test.py
index 9f620165..2373ba75 100644
--- a/tornado/test/httputil_test.py
+++ b/tornado/test/httputil_test.py
@@ -135,6 +135,8 @@ Foo
             'a";";.txt',
             'a\\"b.txt',
             "a\\b.txt",
+            "a b.txt",
+            "a\tb.txt",
         ]
         for filename in filenames:
             logging.debug("trying filename %r", filename)
@@ -155,6 +157,29 @@ Foo
             self.assertEqual(file["filename"], filename)
             self.assertEqual(file["body"], b"Foo")
 
+    def test_invalid_chars(self):
+        filenames = [
+            "a\rb.txt",
+            "a\0b.txt",
+            "a\x08b.txt",
+        ]
+        for filename in filenames:
+            str_data = """\
+--1234
+Content-Disposition: form-data; name="files"; filename="%s"
+
+Foo
+--1234--""" % filename.replace(
+                "\\", "\\\\"
+            ).replace(
+                '"', '\\"'
+            )
+            data = utf8(str_data.replace("\n", "\r\n"))
+            args, files = form_data_args()
+            with self.assertRaises(HTTPInputError) as cm:
+                parse_multipart_form_data(b"1234", data, args, files)
+            self.assertIn("Invalid header value", str(cm.exception))
+
     def test_non_ascii_filename(self):
         data = b"""\
 --1234
commit 9eeb0420b8947a7e3f487d2ccd3d8f4f236fee82
Author: Ben Darnell <ben@bendarnell.com>
Date:   Wed Feb 19 14:06:22 2025 -0500

    httputil: Improve handling of trailing whitespace in headers
    
    HTTPHeaders had undocumented assumptions about trailing whitespace,
    leading to an unintentional regression in Tornado 6.4.1 in which
    passing the arguments of an AsyncHTTPClient header_callback to
    HTTPHeaders.parse_line would result in errors.
    
    This commit moves newline parsing from parse to parse_line.
    It also strips trailing whitespace from continuation lines (trailing
    whitespace is not allowed in HTTP headers, but we didn't reject it
    in continuation lines).
    
    This commit also deprecates continuation lines and the legacy
    handling of LF without CR.
    
    Fixes #3321

diff --git a/tornado/httputil.py b/tornado/httputil.py
index 4c07cf45..ed790eda 100644
--- a/tornado/httputil.py
+++ b/tornado/httputil.py
@@ -171,18 +171,39 @@ class HTTPHeaders(collections.abc.MutableMapping):
                 yield (name, value)
 
     def parse_line(self, line: str) -> None:
-        """Updates the dictionary with a single header line.
+        r"""Updates the dictionary with a single header line.
 
         >>> h = HTTPHeaders()
         >>> h.parse_line("Content-Type: text/html")
         >>> h.get('content-type')
         'text/html'
+        >>> h.parse_line("Content-Length: 42\r\n")
+        >>> h.get('content-type')
+        'text/html'
+
+        .. versionchanged:: 6.5
+            Now supports lines with or without the trailing CRLF, making it possible
+            to pass lines from AsyncHTTPClient's header_callback directly to this method.
+
+        .. deprecated:: 6.5
+           In Tornado 7.0, certain deprecated features of HTTP will become errors.
+           Specifically, line folding and the use of LF (with CR) as a line separator
+           will be removed.
         """
+        if m := re.search(r"\r?\n$", line):
+            # RFC 9112 section 2.2: a recipient MAY recognize a single LF as a line
+            # terminator and ignore any preceding CR.
+            # TODO(7.0): Remove this support for LF-only line endings.
+            line = line[: m.start()]
+        if not line:
+            # Empty line, or the final CRLF of a header block.
+            return
         if line[0] in HTTP_WHITESPACE:
             # continuation of a multi-line header
+            # TODO(7.0): Remove support for line folding.
             if self._last_key is None:
                 raise HTTPInputError("first header line cannot start with whitespace")
-            new_part = " " + line.lstrip(HTTP_WHITESPACE)
+            new_part = " " + line.strip(HTTP_WHITESPACE)
             if _FORBIDDEN_HEADER_CHARS_RE.search(new_part):
                 raise HTTPInputError("Invalid header value %r" % new_part)
             self._as_list[self._last_key][-1] += new_part
@@ -209,13 +230,16 @@ class HTTPHeaders(collections.abc.MutableMapping):
 
         """
         h = cls()
-        # RFC 7230 section 3.5: a recipient MAY recognize a single LF as a line
-        # terminator and ignore any preceding CR.
-        for line in headers.split("\n"):
-            if line.endswith("\r"):
-                line = line[:-1]
-            if line:
-                h.parse_line(line)
+
+        start = 0
+        while True:
+            lf = headers.find("\n", start)
+            if lf == -1:
+                h.parse_line(headers[start:])
+                break
+            line = headers[start : lf + 1]
+            start = lf + 1
+            h.parse_line(line)
         return h
 
     # MutableMapping abstract method implementations.
diff --git a/tornado/test/httpclient_test.py b/tornado/test/httpclient_test.py
index a71ec0af..fee12790 100644
--- a/tornado/test/httpclient_test.py
+++ b/tornado/test/httpclient_test.py
@@ -483,6 +483,23 @@ Transfer-Encoding: chunked
         self.assertRegex(first_line[0], "HTTP/[0-9]\\.[0-9] 200.*\r\n")
         self.assertEqual(chunks, [b"asdf", b"qwer"])
 
+    def test_header_callback_to_parse_line(self):
+        # Make a request with header_callback and feed the headers to HTTPHeaders.parse_line.
+        # (Instead of HTTPHeaders.parse which is used in normal cases). Ensure that the resulting
+        # headers are as expected, and in particular do not have trailing whitespace added
+        # due to the final CRLF line.
+        headers = HTTPHeaders()
+
+        def header_callback(line):
+            if line.startswith("HTTP/"):
+                # Ignore the first status line
+                return
+            headers.parse_line(line)
+
+        self.fetch("/hello", header_callback=header_callback)
+        for k, v in headers.get_all():
+            self.assertTrue(v == v.strip(), (k, v))
+
     @gen_test
     def test_configure_defaults(self):
         defaults = dict(user_agent="TestDefaultUserAgent", allow_ipv6=False)
openSUSE Build Service is sponsored by