File CVE-2022-24801-http-1.1-leniency.patch of Package python3-Twisted.34928

From 22b067793cbcd0fb5dee04cfd9115fa85a7ca110 Mon Sep 17 00:00:00 2001
From: Tom Most <twm@freecog.net>
Date: Sat, 5 Mar 2022 23:26:55 -0800
Subject: [PATCH 01/13] Some tests for GHSA-c2jg-hw38-jrqq

---
 src/twisted/web/http.py                    |   95 ++++++++++++--
 src/twisted/web/newsfragments/10323.bugfix |    1 
 src/twisted/web/test/test_http.py          |  195 ++++++++++++++++++++++++++++-
 3 files changed, 278 insertions(+), 13 deletions(-)

--- a/src/twisted/web/http.py
+++ b/src/twisted/web/http.py
@@ -108,7 +108,7 @@ import tempfile
 import time
 import warnings
 from io import BytesIO
-from typing import AnyStr, Callable, Optional
+from typing import AnyStr, Callable, Optional, Tuple
 from urllib.parse import (
     ParseResultBytes,
     unquote_to_bytes as unquote,
@@ -386,10 +386,39 @@ def toChunk(data):
     return (networkString(f"{len(data):x}"), b"\r\n", data, b"\r\n")
 
 
-def fromChunk(data):
+def _ishexdigits(b: bytes) -> bool:
+    """
+    Is the string case-insensitively hexidecimal?
+
+    It must be composed of one or more characters in the ranges a-f, A-F
+    and 0-9.
+    """
+    for c in b:
+        if c not in b"0123456789abcdefABCDEF":
+            return False
+    return b != b""
+
+
+def _hexint(b: bytes) -> int:
+    """
+    Decode a hexadecimal integer.
+
+    Unlike L{int(b, 16)}, this raises L{ValueError} when the integer has
+    a prefix like C{b'0x'}, C{b'+'}, or C{b'-'}, which is desirable when
+    parsing network protocols.
+    """
+    if not _ishexdigits(b):
+        raise ValueError(b)
+    return int(b, 16)
+
+
+def fromChunk(data: bytes) -> Tuple[bytes, bytes]:
     """
     Convert chunk to string.
 
+    Note that this function is not specification compliant: it doesn't handle
+    chunk extensions.
+
     @type data: C{bytes}
 
     @return: tuple of (result, remaining) - both C{bytes}.
@@ -398,7 +427,7 @@ def fromChunk(data):
         byte string.
     """
     prefix, rest = data.split(b"\r\n", 1)
-    length = int(prefix, 16)
+    length = _hexint(prefix)
     if length < 0:
         raise ValueError("Chunk length must be >= 0, not %d" % (length,))
     if rest[length : length + 2] != b"\r\n":
@@ -1766,6 +1795,47 @@ class _IdentityTransferDecoder:
 maxChunkSizeLineLength = 1024
 
 
+_chunkExtChars = (
+    b"\t !\"#$%&'()*+,-./0123456789:;<=>?@"
+    b"ABCDEFGHIJKLMNOPQRSTUVWXYZ[]^_`"
+    b"abcdefghijklmnopqrstuvwxyz{|}~"
+    b"\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
+    b"\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
+    b"\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
+    b"\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
+    b"\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
+    b"\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
+    b"\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
+    b"\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"
+)
+"""
+Characters that are valid in a chunk extension.
+
+See RFC 7230 section 4.1.1::
+
+     chunk-ext      = *( ";" chunk-ext-name [ "=" chunk-ext-val ] )
+
+     chunk-ext-name = token
+     chunk-ext-val  = token / quoted-string
+
+And section 3.2.6::
+
+     token          = 1*tchar
+
+     tchar          = "!" / "#" / "$" / "%" / "&" / "'" / "*"
+                    / "+" / "-" / "." / "^" / "_" / "`" / "|" / "~"
+                    / DIGIT / ALPHA
+                    ; any VCHAR, except delimiters
+
+     quoted-string  = DQUOTE *( qdtext / quoted-pair ) DQUOTE
+     qdtext         = HTAB / SP /%x21 / %x23-5B / %x5D-7E / obs-text
+     obs-text       = %x80-FF
+
+We don't check if chunk extensions are well-formed beyond validating that they
+don't contain characters outside this range.
+"""
+
+
 class _ChunkedTransferDecoder:
     """
     Protocol for decoding I{chunked} Transfer-Encoding, as defined by RFC 7230,
@@ -1859,14 +1929,19 @@ class _ChunkedTransferDecoder:
         endOfLengthIndex = self._buffer.find(b";", 0, eolIndex)
         if endOfLengthIndex == -1:
             endOfLengthIndex = eolIndex
+        rawLength = self._buffer[0:endOfLengthIndex]
         try:
-            length = int(self._buffer[0:endOfLengthIndex], 16)
+            length = _hexint(rawLength)
         except ValueError:
             raise _MalformedChunkedDataError("Chunk-size must be an integer.")
 
-        if length < 0:
-            raise _MalformedChunkedDataError("Chunk-size must not be negative.")
-        elif length == 0:
+        ext = self._buffer[endOfLengthIndex + 1 : eolIndex]
+        if ext and ext.translate(None, _chunkExtChars) != b"":
+            raise _MalformedChunkedDataError(
+                f"Invalid characters in chunk extensions: {ext!r}."
+            )
+
+        if length == 0:
             self.state = "TRAILER"
         else:
             self.state = "BODY"
@@ -2222,7 +2297,7 @@ class HTTPChannel(basic.LineReceiver, po
                 self.setRawMode()
         elif line[0] in b" \t":
             # Continuation of a multi line header.
-            self.__header = self.__header + b"\n" + line
+            self.__header += b" " + line.lstrip(b" \t")
         # Regular header line.
         # Processing of header line is delayed to allow accumulating multi
         # line headers.
@@ -2250,6 +2325,8 @@ class HTTPChannel(basic.LineReceiver, po
 
         # Can this header determine the length?
         if header == b"content-length":
+            if not data.isdigit():
+                return fail()
             try:
                 length = int(data)
             except ValueError:
@@ -2303,7 +2380,7 @@ class HTTPChannel(basic.LineReceiver, po
             return False
 
         header = header.lower()
-        data = data.strip()
+        data = data.strip(b" \t")
 
         if not self._maybeChooseTransferDecoder(header, data):
             return False
--- /dev/null
+++ b/src/twisted/web/newsfragments/10323.bugfix
@@ -0,0 +1 @@
+twisted.web.http had several several defects in HTTP request parsing that could permit HTTP request smuggling. It now disallows signed Content-Length headers, forbids illegal characters in chunked extensions, forbids 0x prefix to chunk lengths, and only strips spaces and horizontal tab characters from header values. These changes address CVE-2022-24801 and GHSA-c2jg-hw38-jrqq.
--- a/src/twisted/web/test/test_http.py
+++ b/src/twisted/web/test/test_http.py
@@ -1279,6 +1279,28 @@ class ChunkedTransferEncodingTests(unitt
         p.dataReceived(b"3; x-foo=bar\r\nabc\r\n")
         self.assertEqual(L, [b"abc"])
 
+    def test_extensionsMalformed(self):
+        """
+        L{_ChunkedTransferDecoder.dataReceived} raises
+        L{_MalformedChunkedDataError} when the chunk extension fields contain
+        invalid characters.
+
+        This is a potential request smuggling vector: see GHSA-c2jg-hw38-jrqq.
+        """
+        invalidControl = (
+            b"\x00\x01\x02\x03\x04\x05\x06\x07\x08\n\x0b\x0c\r\x0e\x0f"
+            b"\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f"
+        )
+        invalidDelimiter = b"\\"
+        invalidDel = b"\x7f"
+        for b in invalidControl + invalidDelimiter + invalidDel:
+            data = b"3; " + bytes((b,)) + b"\r\nabc\r\n"
+            p = http._ChunkedTransferDecoder(
+                lambda b: None,  # pragma: nocov
+                lambda b: None,  # pragma: nocov
+            )
+            self.assertRaises(http._MalformedChunkedDataError, p.dataReceived, data)
+
     def test_oversizedChunkSizeLine(self):
         """
         L{_ChunkedTransferDecoder.dataReceived} raises
@@ -1334,6 +1356,22 @@ class ChunkedTransferEncodingTests(unitt
             http._MalformedChunkedDataError, p.dataReceived, b"-3\r\nabc\r\n"
         )
 
+    def test_malformedChunkSizeHex(self):
+        """
+        L{_ChunkedTransferDecoder.dataReceived} raises
+        L{_MalformedChunkedDataError} when the chunk size is prefixed with
+        "0x", as if it were a Python integer literal.
+
+        This is a potential request smuggling vector: see GHSA-c2jg-hw38-jrqq.
+        """
+        p = http._ChunkedTransferDecoder(
+            lambda b: None,  # pragma: nocov
+            lambda b: None,  # pragma: nocov
+        )
+        self.assertRaises(
+            http._MalformedChunkedDataError, p.dataReceived, b"0x3\r\nabc\r\n"
+        )
+
     def test_malformedChunkEnd(self):
         r"""
         L{_ChunkedTransferDecoder.dataReceived} raises
@@ -1446,6 +1484,8 @@ class ChunkingTests(unittest.TestCase, R
             chunked = b"".join(http.toChunk(s))
             self.assertEqual((s, b""), http.fromChunk(chunked))
         self.assertRaises(ValueError, http.fromChunk, b"-5\r\nmalformed!\r\n")
+        self.assertRaises(ValueError, http.fromChunk, b"0xa\r\nmalformed!\r\n")
+        self.assertRaises(ValueError, http.fromChunk, b"0XA\r\nmalformed!\r\n")
 
     def testConcatenatedChunks(self):
         chunked = b"".join([b"".join(http.toChunk(t)) for t in self.strings])
@@ -1703,7 +1743,12 @@ class ParsingTests(unittest.TestCase):
         Line folded headers are handled by L{HTTPChannel} by replacing each
         fold with a single space by the time they are made available to the
         L{Request}. Any leading whitespace in the folded lines of the header
-        value is preserved.
+        value is replaced with a single space, per:
+
+            A server that receives an obs-fold in a request message ... MUST
+            ... replace each received obs-fold with one or more SP octets prior
+            to interpreting the field value or forwarding the message
+            downstream.
 
         See RFC 7230 section 3.2.4.
         """
@@ -1740,15 +1785,65 @@ class ParsingTests(unittest.TestCase):
         )
         self.assertEqual(
             request.requestHeaders.getRawHeaders(b"space"),
-            [b"space  space"],
+            [b"space space"],
         )
         self.assertEqual(
             request.requestHeaders.getRawHeaders(b"spaces"),
-            [b"spaces   spaces    spaces"],
+            [b"spaces spaces spaces"],
         )
         self.assertEqual(
             request.requestHeaders.getRawHeaders(b"tab"),
-            [b"t \ta \tb"],
+            [b"t a b"],
+        )
+
+    def test_headerStripWhitespace(self):
+        """
+        Leading and trailing space and tab characters are stripped from
+        headers. Other forms of whitespace are preserved.
+
+        See RFC 7230 section 3.2.3 and 3.2.4.
+        """
+        processed = []
+
+        class MyRequest(http.Request):
+            def process(self):
+                processed.append(self)
+                self.finish()
+
+        requestLines = [
+            b"GET / HTTP/1.0",
+            b"spaces:   spaces were stripped   ",
+            b"tabs: \t\ttabs were stripped\t\t",
+            b"spaces-and-tabs: \t \t spaces and tabs were stripped\t \t",
+            b"line-tab:   \v vertical tab was preserved\v\t",
+            b"form-feed: \f form feed was preserved \f  ",
+            b"",
+            b"",
+        ]
+
+        self.runRequest(b"\n".join(requestLines), MyRequest, 0)
+        [request] = processed
+        # All leading and trailing whitespace is stripped from the
+        # header-value.
+        self.assertEqual(
+            request.requestHeaders.getRawHeaders(b"spaces"),
+            [b"spaces were stripped"],
+        )
+        self.assertEqual(
+            request.requestHeaders.getRawHeaders(b"tabs"),
+            [b"tabs were stripped"],
+        )
+        self.assertEqual(
+            request.requestHeaders.getRawHeaders(b"spaces-and-tabs"),
+            [b"spaces and tabs were stripped"],
+        )
+        self.assertEqual(
+            request.requestHeaders.getRawHeaders(b"line-tab"),
+            [b"\v vertical tab was preserved\v"],
+        )
+        self.assertEqual(
+            request.requestHeaders.getRawHeaders(b"form-feed"),
+            [b"\f form feed was preserved \f"],
         )
 
     def test_tooManyHeaders(self):
@@ -2315,6 +2410,58 @@ Hello,
             ]
         )
 
+    def test_contentLengthMalformed(self):
+        """
+        A request with a non-integer C{Content-Length} header fails with a 400
+        response without calling L{Request.process}.
+        """
+        self.assertRequestRejected(
+            [
+                b"GET /a HTTP/1.1",
+                b"Content-Length: MORE THAN NINE THOUSAND!",
+                b"Host: host.invalid",
+                b"",
+                b"",
+                b"x" * 9001,
+            ]
+        )
+
+    def test_contentLengthTooPositive(self):
+        """
+        A request with a C{Content-Length} header that begins with a L{+} fails
+        with a 400 response without calling L{Request.process}.
+
+        This is a potential request smuggling vector: see GHSA-c2jg-hw38-jrqq.
+        """
+        self.assertRequestRejected(
+            [
+                b"GET /a HTTP/1.1",
+                b"Content-Length: +100",
+                b"Host: host.invalid",
+                b"",
+                b"",
+                b"x" * 100,
+            ]
+        )
+
+    def test_contentLengthNegative(self):
+        """
+        A request with a C{Content-Length} header that is negative fails with
+        a 400 response without calling L{Request.process}.
+
+        This is a potential request smuggling vector: see GHSA-c2jg-hw38-jrqq.
+        """
+        self.assertRequestRejected(
+            [
+                b"GET /a HTTP/1.1",
+                b"Content-Length: -100",
+                b"Host: host.invalid",
+                b"",
+                b"",
+                b"x" * 200,
+            ]
+        )
+
     def test_duplicateContentLengthsWithPipelinedRequests(self):
         """
         Two pipelined requests, the first of which includes multiple
@@ -4239,3 +4386,43 @@ class HTTPClientSanitizationTests(unitte
                 transport.value().splitlines(),
                 [b": ".join([sanitizedBytes, sanitizedBytes])],
             )
+
+
+class HexHelperTests(unittest.SynchronousTestCase):
+    """
+    Test the L{http._hexint} and L{http._ishexdigits} helper functions.
+    """
+
+    badStrings = (b"", b"0x1234", b"feds", b"-123" b"+123")
+
+    def test_isHex(self):
+        """
+        L{_ishexdigits()} returns L{True} for nonempy bytestrings containing
+        hexadecimal digits.
+        """
+        for s in (b"10", b"abcdef", b"AB1234", b"fed", b"123467890"):
+            self.assertIs(True, http._ishexdigits(s))
+
+    def test_decodes(self):
+        """
+        L{_hexint()} returns the integer equivalent of the input.
+        """
+        self.assertEqual(10, http._hexint(b"a"))
+        self.assertEqual(0x10, http._hexint(b"10"))
+        self.assertEqual(0xABCD123, http._hexint(b"abCD123"))
+
+    def test_isNotHex(self):
+        """
+        L{_ishexdigits()} returns L{False} for bytestrings that don't contain
+        hexadecimal digits, including the empty string.
+        """
+        for s in self.badStrings:
+            self.assertIs(False, http._ishexdigits(s))
+
+    def test_decodeNotHex(self):
+        """
+        L{_hexint()} raises L{ValueError} for bytestrings that can't
+        be decoded.
+        """
+        for s in self.badStrings:
+            self.assertRaises(ValueError, http._hexint, s)
openSUSE Build Service is sponsored by