File CVE-2022-24801-http-1.1-leniency.patch of Package python-Twisted.26707

Index: Twisted-15.2.1/twisted/web/http.py
===================================================================
--- Twisted-15.2.1.orig/twisted/web/http.py
+++ Twisted-15.2.1/twisted/web/http.py
@@ -64,6 +64,7 @@ import time
 import calendar
 import warnings
 import os
+import sys
 from io import BytesIO as StringIO
 
 try:
@@ -317,10 +318,50 @@ def toChunk(data):
 
 
 
+def _ishexdigits(b):
+    """
+    Is the string case-insensitively hexidecimal?
+
+    It must be composed of one or more characters in the ranges a-f, A-F
+    and 0-9.
+    """
+    if sys.version_info.major == 2:
+        if not isinstance(b, str):
+            b = str(b)
+        for c in b:
+            if c not in "0123456789abcdefABCDEF":
+                return False
+        return b != ""
+    else:
+        for c in b:
+            if c not in b"0123456789abcdefABCDEF":
+                return False
+        return b != b""
+
+
+def _hexint(b):
+    """
+    Decode a hexadecimal integer.
+
+    Unlike L{int(b, 16)}, this raises L{ValueError} when the integer has
+    a prefix like C{b'0x'}, C{b'+'}, or C{b'-'}, which is desirable when
+    parsing network protocols.
+    """
+    if sys.version_info.major == 2:
+        if not isinstance(b, str):
+            b = str(b)
+    if not _ishexdigits(b):
+        raise ValueError(b)
+    return int(b, 16)
+
+
 def fromChunk(data):
     """
     Convert chunk to string.
 
+    Note that this function is not specification compliant: it doesn't handle
+    chunk extensions.
+
     @type data: C{bytes}
 
     @return: tuple of (result, remaining) - both C{bytes}.
@@ -329,7 +370,7 @@ def fromChunk(data):
         byte string.
     """
     prefix, rest = data.split(b'\r\n', 1)
-    length = int(prefix, 16)
+    length = _hexint(prefix)
     if length < 0:
         raise ValueError("Chunk length must be >= 0, not %d" % (length,))
     if rest[length:length + 2] != b'\r\n':
@@ -1454,11 +1495,54 @@ class _IdentityTransferDecoder(object):
             raise _DataLoss()
 
 
+maxChunkSizeLineLength = 1024
+
+
+_chunkExtChars = (
+    b"\t !\"#$%&'()*+,-./0123456789:;<=>?@"
+    b"ABCDEFGHIJKLMNOPQRSTUVWXYZ[]^_`"
+    b"abcdefghijklmnopqrstuvwxyz{|}~"
+    b"\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
+    b"\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
+    b"\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
+    b"\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
+    b"\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
+    b"\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
+    b"\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
+    b"\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"
+)
+"""
+Characters that are valid in a chunk extension.
+
+See RFC 7230 section 4.1.1::
+
+     chunk-ext      = *( ";" chunk-ext-name [ "=" chunk-ext-val ] )
+
+     chunk-ext-name = token
+     chunk-ext-val  = token / quoted-string
+
+And section 3.2.6::
+
+     token          = 1*tchar
+
+     tchar          = "!" / "#" / "$" / "%" / "&" / "'" / "*"
+                    / "+" / "-" / "." / "^" / "_" / "`" / "|" / "~"
+                    / DIGIT / ALPHA
+                    ; any VCHAR, except delimiters
+
+     quoted-string  = DQUOTE *( qdtext / quoted-pair ) DQUOTE
+     qdtext         = HTAB / SP /%x21 / %x23-5B / %x5D-7E / obs-text
+     obs-text       = %x80-FF
+
+We don't check if chunk extensions are well-formed beyond validating that they
+don't contain characters outside this range.
+"""
+
 
 class _ChunkedTransferDecoder(object):
     """
-    Protocol for decoding I{chunked} Transfer-Encoding, as defined by RFC 2616,
-    section 3.6.1.  This protocol can interpret the contents of a request or
+    Protocol for decoding I{chunked} Transfer-Encoding, as defined by RFC 7230,
+    section 4.1.  This protocol can interpret the contents of a request or
     response body which uses the I{chunked} Transfer-Encoding.  It cannot
     interpret any of the rest of the HTTP protocol.
 
@@ -1498,60 +1582,96 @@ class _ChunkedTransferDecoder(object):
     def __init__(self, dataCallback, finishCallback):
         self.dataCallback = dataCallback
         self.finishCallback = finishCallback
-        self._buffer = b''
+        self._buffer = bytearray()
+        self._start = 0
 
 
-    def _dataReceived_CHUNK_LENGTH(self, data):
-        if b'\r\n' in data:
-            line, rest = data.split(b'\r\n', 1)
-            parts = line.split(b';')
-            try:
-                self.length = int(parts[0], 16)
-            except ValueError:
-                raise _MalformedChunkedDataError(
-                    "Chunk-size must be an integer.")
-            if self.length == 0:
-                self.state = 'TRAILER'
-            else:
-                self.state = 'BODY'
-            return rest
-        else:
-            self._buffer = data
-            return b''
+    def _dataReceived_CHUNK_LENGTH(self):
+        eolIndex = self._buffer.find(b"\r\n", self._start)
+
+        if eolIndex >= maxChunkSizeLineLength or (
+            eolIndex == -1 and len(self._buffer) > maxChunkSizeLineLength
+        ):
+            raise _MalformedChunkedDataError(
+                "Chunk size line exceeds maximum of {} bytes.".format(
+                    maxChunkSizeLineLength
+                )
+            )
+
+        if eolIndex == -1:
+            # Restart the search upon receipt of more data at the start of the
+            # new data, minus one in case the last character of the buffer is
+            # CR.
+            self._start = len(self._buffer) - 1
+            return False
+
+        endOfLengthIndex = self._buffer.find(b";", 0, eolIndex)
+        if endOfLengthIndex == -1:
+            endOfLengthIndex = eolIndex
+        rawLength = self._buffer[0:endOfLengthIndex]
+        try:
+            length = _hexint(rawLength)
+        except ValueError:
+            raise _MalformedChunkedDataError("Chunk-size must be an integer.")
 
+        ext = self._buffer[endOfLengthIndex + 1 : eolIndex]
+        if ext and ext.translate(None, _chunkExtChars) != b"":
+            raise _MalformedChunkedDataError(
+                "Invalid characters in chunk extensions: %r." % ext
+            )
 
-    def _dataReceived_CRLF(self, data):
-        if data.startswith(b'\r\n'):
-            self.state = 'CHUNK_LENGTH'
-            return data[2:]
+        if length == 0:
+            self.state = "TRAILER"
         else:
-            self._buffer = data
-            return b''
+            self.state = "BODY"
 
+        self.length = length
+        del self._buffer[0 : eolIndex + 2]
+        self._start = 0
+        return True
 
-    def _dataReceived_TRAILER(self, data):
-        if data.startswith(b'\r\n'):
-            data = data[2:]
-            self.state = 'FINISHED'
-            self.finishCallback(data)
-        else:
-            self._buffer = data
-        return b''
+
+    def _dataReceived_CRLF(self):
+        if len(self._buffer) < 2:
+            return False
+
+        if not self._buffer.startswith(b"\r\n"):
+            raise _MalformedChunkedDataError("Chunk did not end with CRLF")
+
+        self.state = "CHUNK_LENGTH"
+        del self._buffer[0:2]
+        return True
+
+
+    def _dataReceived_TRAILER(self):
+        if len(self._buffer) < 2:
+            return False
+
+        if not self._buffer.startswith(b"\r\n"):
+            raise _MalformedChunkedDataError("Chunk did not end with CRLF")
+
+        data = memoryview(self._buffer)[2:].tobytes()
+        del self._buffer[:]
+        self.state = "FINISHED"
+        self.finishCallback(data)
+        return False
 
 
-    def _dataReceived_BODY(self, data):
-        if len(data) >= self.length:
-            chunk, data = data[:self.length], data[self.length:]
+    def _dataReceived_BODY(self):
+        if len(self._buffer) >= self.length:
+            chunk = memoryview(self._buffer)[: self.length].tobytes()
+            del self._buffer[: self.length]
+            self.state = "CRLF"
             self.dataCallback(chunk)
-            self.state = 'CRLF'
-            return data
-        elif len(data) < self.length:
-            self.length -= len(data)
-            self.dataCallback(data)
-            return b''
+        else:
+            chunk = bytes(self._buffer)
+            self.length -= len(chunk)
+            del self._buffer[:]
+            self.dataCallback(chunk)
+        return True
 
 
-    def _dataReceived_FINISHED(self, data):
+    def _dataReceived_FINISHED(self):
         raise RuntimeError(
             "_ChunkedTransferDecoder.dataReceived called after last "
             "chunk was processed")
@@ -1562,10 +1682,10 @@ class _ChunkedTransferDecoder(object):
         Interpret data from a request or response body which uses the
         I{chunked} Transfer-Encoding.
         """
-        data = self._buffer + data
-        self._buffer = b''
-        while data:
-            data = getattr(self, '_dataReceived_%s' % (self.state,))(data)
+        self._buffer += data
+        goOn = True
+        while goOn and self._buffer:
+            goOn = getattr(self, "_dataReceived_" + self.state)()
 
 
     def noMoreData(self):
@@ -1678,7 +1798,7 @@ class HTTPChannel(basic.LineReceiver, po
                 self.setRawMode()
         elif line[0] in b' \t':
             # Continuation of a multi line header.
-            self.__header = self.__header + '\n' + line
+            self.__header += b" " + line.lstrip(b" \t")
         # Regular header line.
         # Processing of header line is delayed to allow accumulating multi
         # line headers.
@@ -1702,16 +1822,21 @@ class HTTPChannel(basic.LineReceiver, po
         @param line: A line from the header section of a request, excluding the
             line delimiter.
         """
+        def fail():
+            _respondToBadRequestAndDisconnect(self.transport)
+            self.length = None
+            return False
+
         header, data = line.split(b':', 1)
         header = header.lower()
         data = data.strip()
         if header == b'content-length':
+            if not data.isdigit():
+                return fail()
             try:
-                self.length = int(data)
+                self.length = _hexint(data)
             except ValueError:
-                _respondToBadRequestAndDisconnect(self.transport)
-                self.length = None
-                return
+                return fail()
             self._transferDecoder = _IdentityTransferDecoder(
                 self.length, self.requests[-1].handleContentChunk, self._finishRequestBody)
         elif header == b'transfer-encoding' and data.lower() == b'chunked':
Index: Twisted-15.2.1/twisted/web/test/test_http.py
===================================================================
--- Twisted-15.2.1.orig/twisted/web/test/test_http.py
+++ Twisted-15.2.1/twisted/web/test/test_http.py
@@ -622,6 +622,43 @@ class ChunkedTransferEncodingTests(unitt
         self.assertEqual(errors, [])
         self.assertEqual(successes, [True])
 
+    def test_extensionsMalformed(self):
+        """
+        L{_ChunkedTransferDecoder.dataReceived} raises
+        L{_MalformedChunkedDataError} when the chunk extension fields contain
+        invalid characters.
+
+        This is a potential request smuggling vector: see GHSA-c2jg-hw38-jrqq.
+        """
+        invalidControl = (
+            b"\x00\x01\x02\x03\x04\x05\x06\x07\x08\n\x0b\x0c\r\x0e\x0f"
+            b"\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f"
+        )
+        invalidDelimiter = b"\\"
+        invalidDel = b"\x7f"
+        for b in invalidControl + invalidDelimiter + invalidDel:
+            data = b"3; " + bytes((b,)) + b"\r\nabc\r\n"
+            p = http._ChunkedTransferDecoder(
+                lambda b: None,  # pragma: nocov
+                lambda b: None,  # pragma: nocov
+            )
+            self.assertRaises(http._MalformedChunkedDataError, p.dataReceived, data)
+
+    def test_malformedChunkSizeHex(self):
+        """
+        L{_ChunkedTransferDecoder.dataReceived} raises
+        L{_MalformedChunkedDataError} when the chunk size is prefixed with
+        "0x", as if it were a Python integer literal.
+
+        This is a potential request smuggling vector: see GHSA-c2jg-hw38-jrqq.
+        """
+        p = http._ChunkedTransferDecoder(
+            lambda b: None,  # pragma: nocov
+            lambda b: None,  # pragma: nocov
+        )
+        self.assertRaises(
+            http._MalformedChunkedDataError, p.dataReceived, b"0x3\r\nabc\r\n"
+        )
 
 
 class ChunkingTests(unittest.TestCase):
@@ -702,6 +739,31 @@ class ParsingTests(unittest.TestCase):
         return channel
 
 
+    def assertRequestRejected(self, requestLines):
+        """
+        Execute a HTTP request and assert that it is rejected with a 400 Bad
+        Response and disconnection.
+        @param requestLines: Plain text lines of the request. These lines will
+            be joined with newlines to form the HTTP request that is processed.
+        @type requestLines: C{list} of C{bytes}
+        """
+        httpRequest = b"\n".join(requestLines)
+        processed = []
+
+        class MyRequest(http.Request):
+            def process(self):
+                processed.append(self)
+                self.finish()
+
+        channel = self.runRequest(httpRequest, MyRequest, success=False)
+        self.assertEqual(
+            channel.transport.value(),
+            b"HTTP/1.1 400 Bad Request\r\n\r\n",
+        )
+        self.assertTrue(channel.transport.disconnecting)
+        self.assertEqual(processed, [])
+
+
     def test_basicAuth(self):
         """
         L{HTTPChannel} provides username and password information supplied in
@@ -721,6 +783,29 @@ class ParsingTests(unittest.TestCase):
             req = requests.pop()
             self.assertEqual((u, p), req.credentials)
 
+    def assertRequestRejected(self, requestLines):
+        """
+        Execute a HTTP request and assert that it is rejected with a 400 Bad
+        Response and disconnection.
+        @param requestLines: Plain text lines of the request. These lines will
+            be joined with newlines to form the HTTP request that is processed.
+        @type requestLines: C{list} of C{bytes}
+        """
+        httpRequest = b"\n".join(requestLines)
+        processed = []
+
+        class MyRequest(http.Request):
+            def process(self):
+                processed.append(self)
+                self.finish()
+
+        channel = self.runRequest(httpRequest, MyRequest, success=False)
+        self.assertEqual(
+            channel.transport.value(),
+            b"HTTP/1.1 400 Bad Request\r\n\r\n",
+        )
+        self.assertTrue(channel.transport.disconnecting)
+        self.assertEqual(processed, [])
 
     def test_headers(self):
         """
@@ -903,6 +988,56 @@ class ParsingTests(unittest.TestCase):
             )
 
 
+    def test_contentLengthMalformed(self):
+        """
+        A request with a non-integer C{Content-Length} header fails with a 400
+        response without calling L{Request.process}.
+        """
+        self.assertRequestRejected(
+            [
+                b"GET /a HTTP/1.1",
+                b"Content-Length: MORE THAN NINE THOUSAND!",
+                b"Host: host.invalid",
+                b"",
+                b"",
+                b"x" * 9001,
+            ]
+        )
+
+    def test_contentLengthTooPositive(self):
+        """
+        A request with a C{Content-Length} header that begins with a L{+} fails
+        with a 400 response without calling L{Request.process}.
+        This is a potential request smuggling vector: see GHSA-c2jg-hw38-jrqq.
+        """
+        self.assertRequestRejected(
+            [
+                b"GET /a HTTP/1.1",
+                b"Content-Length: +100",
+                b"Host: host.invalid",
+                b"",
+                b"",
+                b"x" * 100,
+            ]
+        )
+
+    def test_contentLengthNegative(self):
+        """
+        A request with a C{Content-Length} header that is negative fails with
+        a 400 response without calling L{Request.process}.
+        This is a potential request smuggling vector: see GHSA-c2jg-hw38-jrqq.
+        """
+        self.assertRequestRejected(
+            [
+                b"GET /a HTTP/1.1",
+                b"Content-Length: -100",
+                b"Host: host.invalid",
+                b"",
+                b"",
+                b"x" * 200,
+            ]
+        )
+
     def testCookies(self):
         """
         Test cookies parsing and reading.
@@ -1394,7 +1529,6 @@ class RequestTests(unittest.TestCase, Re
         test_setResponseCodeAcceptsLongIntegers.skip = (
             "Python 3 has no separate long integer type.")
 
-
     def test_setHost(self):
         """
         L{http.Request.setHost} sets the value of the host request header.
@@ -2294,3 +2428,44 @@ class DeprecatedRequestAttributesTests(u
                     "in Twisted 15.0.0; please use Twisted Names to "
                     "resolve hostnames instead")},
                          sub(["category", "message"], warnings[0]))
+
+
+class HexHelperTests(unittest.SynchronousTestCase):
+    """
+    Test the L{http._hexint} and L{http._ishexdigits} helper functions.
+    """
+
+    badStrings = (b"", b"0x1234", b"feds", b"-123" b"+123")
+
+    def test_isHex(self):
+        """
+        L{_ishexdigits()} returns L{True} for nonempy bytestrings containing
+        hexadecimal digits.
+        """
+        for s in (b"10", b"abcdef", b"AB1234", b"fed", b"123467890"):
+            self.assertIs(True, http._ishexdigits(s))
+
+    def test_decodes(self):
+        """
+        L{_hexint()} returns the integer equivalent of the input.
+        """
+        self.assertEqual(10, http._hexint(b"a"))
+        self.assertEqual(0x10, http._hexint(b"10"))
+        self.assertEqual(0xABCD123, http._hexint(b"abCD123"))
+
+    def test_isNotHex(self):
+        """
+        L{_ishexdigits()} returns L{False} for bytestrings that don't contain
+        hexadecimal digits, including the empty string.
+        """
+        for s in self.badStrings:
+            self.assertIs(False, http._ishexdigits(s))
+
+    def test_decodeNotHex(self):
+        """
+        L{_hexint()} raises L{ValueError} for bytestrings that can't
+        be decoded.
+        """
+        for s in self.badStrings:
+            self.assertRaises(ValueError, http._hexint, s)
+
Places

File CVE-2022-24801-http-1.1-leniency.patch of Package python-Twisted.26707

Places