File CVE-2022-24801-http-1.1-leniency.patch of Package python-Twisted.25323
Index: Twisted-19.10.0/src/twisted/web/http.py
===================================================================
--- Twisted-19.10.0.orig/src/twisted/web/http.py
+++ Twisted-19.10.0/src/twisted/web/http.py
@@ -66,6 +66,7 @@ import time
import calendar
import warnings
import os
+import sys
from io import BytesIO as StringIO
try:
@@ -340,10 +341,50 @@ def toChunk(data):
+def _ishexdigits(b):
+ """
+ Is the string case-insensitively hexidecimal?
+
+ It must be composed of one or more characters in the ranges a-f, A-F
+ and 0-9.
+ """
+ if sys.version_info.major == 2:
+ if not isinstance(b, str):
+ b = str(b)
+ for c in b:
+ if c not in "0123456789abcdefABCDEF":
+ return False
+ return b != ""
+ else:
+ for c in b:
+ if c not in b"0123456789abcdefABCDEF":
+ return False
+ return b != b""
+
+
+def _hexint(b):
+ """
+ Decode a hexadecimal integer.
+
+ Unlike L{int(b, 16)}, this raises L{ValueError} when the integer has
+ a prefix like C{b'0x'}, C{b'+'}, or C{b'-'}, which is desirable when
+ parsing network protocols.
+ """
+ if sys.version_info.major == 2:
+ if not isinstance(b, str):
+ b = str(b)
+ if not _ishexdigits(b):
+ raise ValueError(b)
+ return int(b, 16)
+
+
def fromChunk(data):
"""
Convert chunk to string.
+ Note that this function is not specification compliant: it doesn't handle
+ chunk extensions.
+
@type data: C{bytes}
@return: tuple of (result, remaining) - both C{bytes}.
@@ -352,7 +393,7 @@ def fromChunk(data):
byte string.
"""
prefix, rest = data.split(b'\r\n', 1)
- length = int(prefix, 16)
+ length = _hexint(prefix)
if length < 0:
raise ValueError("Chunk length must be >= 0, not %d" % (length,))
if rest[length:length + 2] != b'\r\n':
@@ -1774,11 +1815,54 @@ class _IdentityTransferDecoder(object):
raise _DataLoss()
+maxChunkSizeLineLength = 1024
+
+
+_chunkExtChars = (
+ b"\t !\"#$%&'()*+,-./0123456789:;<=>?@"
+ b"ABCDEFGHIJKLMNOPQRSTUVWXYZ[]^_`"
+ b"abcdefghijklmnopqrstuvwxyz{|}~"
+ b"\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
+ b"\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
+ b"\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
+ b"\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
+ b"\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
+ b"\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
+ b"\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
+ b"\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"
+)
+"""
+Characters that are valid in a chunk extension.
+
+See RFC 7230 section 4.1.1::
+
+ chunk-ext = *( ";" chunk-ext-name [ "=" chunk-ext-val ] )
+
+ chunk-ext-name = token
+ chunk-ext-val = token / quoted-string
+
+And section 3.2.6::
+
+ token = 1*tchar
+
+ tchar = "!" / "#" / "$" / "%" / "&" / "'" / "*"
+ / "+" / "-" / "." / "^" / "_" / "`" / "|" / "~"
+ / DIGIT / ALPHA
+ ; any VCHAR, except delimiters
+
+ quoted-string = DQUOTE *( qdtext / quoted-pair ) DQUOTE
+ qdtext = HTAB / SP /%x21 / %x23-5B / %x5D-7E / obs-text
+ obs-text = %x80-FF
+
+We don't check if chunk extensions are well-formed beyond validating that they
+don't contain characters outside this range.
+"""
+
class _ChunkedTransferDecoder(object):
"""
- Protocol for decoding I{chunked} Transfer-Encoding, as defined by RFC 2616,
- section 3.6.1. This protocol can interpret the contents of a request or
+ Protocol for decoding I{chunked} Transfer-Encoding, as defined by RFC 7230,
+ section 4.1. This protocol can interpret the contents of a request or
response body which uses the I{chunked} Transfer-Encoding. It cannot
interpret any of the rest of the HTTP protocol.
@@ -1818,60 +1902,96 @@ class _ChunkedTransferDecoder(object):
def __init__(self, dataCallback, finishCallback):
self.dataCallback = dataCallback
self.finishCallback = finishCallback
- self._buffer = b''
+ self._buffer = bytearray()
+ self._start = 0
- def _dataReceived_CHUNK_LENGTH(self, data):
- if b'\r\n' in data:
- line, rest = data.split(b'\r\n', 1)
- parts = line.split(b';')
- try:
- self.length = int(parts[0], 16)
- except ValueError:
- raise _MalformedChunkedDataError(
- "Chunk-size must be an integer.")
- if self.length == 0:
- self.state = 'TRAILER'
- else:
- self.state = 'BODY'
- return rest
- else:
- self._buffer = data
- return b''
+ def _dataReceived_CHUNK_LENGTH(self):
+ eolIndex = self._buffer.find(b"\r\n", self._start)
+ if eolIndex >= maxChunkSizeLineLength or (
+ eolIndex == -1 and len(self._buffer) > maxChunkSizeLineLength
+ ):
+ raise _MalformedChunkedDataError(
+ "Chunk size line exceeds maximum of {} bytes.".format(
+ maxChunkSizeLineLength
+ )
+ )
- def _dataReceived_CRLF(self, data):
- if data.startswith(b'\r\n'):
- self.state = 'CHUNK_LENGTH'
- return data[2:]
- else:
- self._buffer = data
- return b''
+ if eolIndex == -1:
+ # Restart the search upon receipt of more data at the start of the
+ # new data, minus one in case the last character of the buffer is
+ # CR.
+ self._start = len(self._buffer) - 1
+ return False
+ endOfLengthIndex = self._buffer.find(b";", 0, eolIndex)
+ if endOfLengthIndex == -1:
+ endOfLengthIndex = eolIndex
+ rawLength = self._buffer[0:endOfLengthIndex]
+ try:
+ length = _hexint(rawLength)
+ except ValueError:
+ raise _MalformedChunkedDataError("Chunk-size must be an integer.")
- def _dataReceived_TRAILER(self, data):
- if data.startswith(b'\r\n'):
- data = data[2:]
- self.state = 'FINISHED'
- self.finishCallback(data)
+ ext = self._buffer[endOfLengthIndex + 1 : eolIndex]
+ if ext and ext.translate(None, _chunkExtChars) != b"":
+ raise _MalformedChunkedDataError(
+ "Invalid characters in chunk extensions: %r." % ext
+ )
+
+ if length == 0:
+ self.state = "TRAILER"
else:
- self._buffer = data
- return b''
+ self.state = "BODY"
+
+ self.length = length
+ del self._buffer[0 : eolIndex + 2]
+ self._start = 0
+ return True
+
+
+ def _dataReceived_CRLF(self):
+ if len(self._buffer) < 2:
+ return False
+
+ if not self._buffer.startswith(b"\r\n"):
+ raise _MalformedChunkedDataError("Chunk did not end with CRLF")
+
+ self.state = "CHUNK_LENGTH"
+ del self._buffer[0:2]
+ return True
+
+
+ def _dataReceived_TRAILER(self):
+ if len(self._buffer) < 2:
+ return False
+
+ if not self._buffer.startswith(b"\r\n"):
+ raise _MalformedChunkedDataError("Chunk did not end with CRLF")
+
+ data = memoryview(self._buffer)[2:].tobytes()
+ del self._buffer[:]
+ self.state = "FINISHED"
+ self.finishCallback(data)
+ return False
- def _dataReceived_BODY(self, data):
- if len(data) >= self.length:
- chunk, data = data[:self.length], data[self.length:]
+ def _dataReceived_BODY(self):
+ if len(self._buffer) >= self.length:
+ chunk = memoryview(self._buffer)[: self.length].tobytes()
+ del self._buffer[: self.length]
+ self.state = "CRLF"
self.dataCallback(chunk)
- self.state = 'CRLF'
- return data
- elif len(data) < self.length:
- self.length -= len(data)
- self.dataCallback(data)
- return b''
+ else:
+ chunk = bytes(self._buffer)
+ self.length -= len(chunk)
+ del self._buffer[:]
+ self.dataCallback(chunk)
+ return True
- def _dataReceived_FINISHED(self, data):
+ def _dataReceived_FINISHED(self):
raise RuntimeError(
"_ChunkedTransferDecoder.dataReceived called after last "
"chunk was processed")
@@ -1882,10 +2002,10 @@ class _ChunkedTransferDecoder(object):
Interpret data from a request or response body which uses the
I{chunked} Transfer-Encoding.
"""
- data = self._buffer + data
- self._buffer = b''
- while data:
- data = getattr(self, '_dataReceived_%s' % (self.state,))(data)
+ self._buffer += data
+ goOn = True
+ while goOn and self._buffer:
+ goOn = getattr(self, "_dataReceived_" + self.state)()
def noMoreData(self):
@@ -2156,7 +2276,7 @@ class HTTPChannel(basic.LineReceiver, po
self.setRawMode()
elif line[0] in b' \t':
# Continuation of a multi line header.
- self.__header = self.__header + b'\n' + line
+ self.__header += b" " + line.lstrip(b" \t")
# Regular header line.
# Processing of header line is delayed to allow accumulating multi
# line headers.
@@ -2171,6 +2291,52 @@ class HTTPChannel(basic.LineReceiver, po
self._dataBuffer.append(data)
+ def _maybeChooseTransferDecoder(self, header, data):
+ """
+ If the provided header is C{content-length} or
+ C{transfer-encoding}, choose the appropriate decoder if any.
+
+ Returns L{True} if the request can proceed and L{False} if not.
+ """
+
+ def fail():
+ self._respondToBadRequestAndDisconnect()
+ self.length = None
+ return False
+
+ if header == b'content-length':
+ if not data.isdigit():
+ return fail()
+ try:
+ length = int(data)
+ except ValueError:
+ return fail()
+ newTransferDecoder = _IdentityTransferDecoder(
+ length, self.requests[-1].handleContentChunk, self._finishRequestBody)
+ elif header == b'transfer-encoding':
+ # XXX Rather poorly tested code block, apparently only exercised by
+ # test_chunkedEncoding
+ if data.lower() == b'chunked':
+ length = None
+ newTransferDecoder = _ChunkedTransferDecoder(
+ self.requests[-1].handleContentChunk, self._finishRequestBody)
+ elif data.lower() == b"identity":
+ return True
+ else:
+ return fail()
+ else:
+ # It's not a length related header, so exit
+ return True
+
+ if self._transferDecoder is not None:
+ return fail()
+ else:
+ self.length = length
+ self._transferDecoder = newTransferDecoder
+ return True
+
+
+
def headerReceived(self, line):
"""
Do pre-processing (for content-length) and store this header away.
@@ -2189,23 +2355,16 @@ class HTTPChannel(basic.LineReceiver, po
self._respondToBadRequestAndDisconnect()
return False
+ if not header or header[-1:].isspace():
+ self._respondToBadRequestAndDisconnect()
+ return False
+
header = header.lower()
- data = data.strip()
- if header == b'content-length':
- try:
- self.length = int(data)
- except ValueError:
- self._respondToBadRequestAndDisconnect()
- self.length = None
- return False
- self._transferDecoder = _IdentityTransferDecoder(
- self.length, self.requests[-1].handleContentChunk, self._finishRequestBody)
- elif header == b'transfer-encoding' and data.lower() == b'chunked':
- # XXX Rather poorly tested code block, apparently only exercised by
- # test_chunkedEncoding
- self.length = None
- self._transferDecoder = _ChunkedTransferDecoder(
- self.requests[-1].handleContentChunk, self._finishRequestBody)
+ data = data.strip(b" \t")
+
+ if not self._maybeChooseTransferDecoder(header, data):
+ return False
+
reqHeaders = self.requests[-1].requestHeaders
values = reqHeaders.getRawHeaders(header)
if values is not None:
@@ -2220,7 +2379,6 @@ class HTTPChannel(basic.LineReceiver, po
return True
-
def allContentReceived(self):
command = self._command
path = self._path
Index: Twisted-19.10.0/src/twisted/web/test/test_http.py
===================================================================
--- Twisted-19.10.0.orig/src/twisted/web/test/test_http.py
+++ Twisted-19.10.0/src/twisted/web/test/test_http.py
@@ -1376,6 +1376,43 @@ class ChunkedTransferEncodingTests(unitt
self.assertEqual(errors, [])
self.assertEqual(successes, [True])
+ def test_extensionsMalformed(self):
+ """
+ L{_ChunkedTransferDecoder.dataReceived} raises
+ L{_MalformedChunkedDataError} when the chunk extension fields contain
+ invalid characters.
+
+ This is a potential request smuggling vector: see GHSA-c2jg-hw38-jrqq.
+ """
+ invalidControl = (
+ b"\x00\x01\x02\x03\x04\x05\x06\x07\x08\n\x0b\x0c\r\x0e\x0f"
+ b"\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f"
+ )
+ invalidDelimiter = b"\\"
+ invalidDel = b"\x7f"
+ for b in invalidControl + invalidDelimiter + invalidDel:
+ data = b"3; " + bytes((b,)) + b"\r\nabc\r\n"
+ p = http._ChunkedTransferDecoder(
+ lambda b: None, # pragma: nocov
+ lambda b: None, # pragma: nocov
+ )
+ self.assertRaises(http._MalformedChunkedDataError, p.dataReceived, data)
+
+ def test_malformedChunkSizeHex(self):
+ """
+ L{_ChunkedTransferDecoder.dataReceived} raises
+ L{_MalformedChunkedDataError} when the chunk size is prefixed with
+ "0x", as if it were a Python integer literal.
+
+ This is a potential request smuggling vector: see GHSA-c2jg-hw38-jrqq.
+ """
+ p = http._ChunkedTransferDecoder(
+ lambda b: None, # pragma: nocov
+ lambda b: None, # pragma: nocov
+ )
+ self.assertRaises(
+ http._MalformedChunkedDataError, p.dataReceived, b"0x3\r\nabc\r\n"
+ )
class ChunkingTests(unittest.TestCase, ResponseTestMixin):
@@ -1478,6 +1515,29 @@ class ParsingTests(unittest.TestCase):
self.assertFalse(self.didRequest)
return channel
+ def assertRequestRejected(self, requestLines):
+ """
+ Execute a HTTP request and assert that it is rejected with a 400 Bad
+ Response and disconnection.
+ @param requestLines: Plain text lines of the request. These lines will
+ be joined with newlines to form the HTTP request that is processed.
+ @type requestLines: C{list} of C{bytes}
+ """
+ httpRequest = b"\n".join(requestLines)
+ processed = []
+
+ class MyRequest(http.Request):
+ def process(self):
+ processed.append(self)
+ self.finish()
+
+ channel = self.runRequest(httpRequest, MyRequest, success=False)
+ self.assertEqual(
+ channel.transport.value(),
+ b"HTTP/1.1 400 Bad Request\r\n\r\n",
+ )
+ self.assertTrue(channel.transport.disconnecting)
+ self.assertEqual(processed, [])
def test_invalidNonAsciiMethod(self):
"""
@@ -1551,7 +1611,12 @@ class ParsingTests(unittest.TestCase):
Line folded headers are handled by L{HTTPChannel} by replacing each
fold with a single space by the time they are made available to the
L{Request}. Any leading whitespace in the folded lines of the header
- value is preserved.
+ value is replaced with a single space, per:
+
+ A server that receives an obs-fold in a request message ... MUST
+ ... replace each received obs-fold with one or more SP octets prior
+ to interpreting the field value or forwarding the message
+ downstream.
See RFC 7230 section 3.2.4.
"""
@@ -1588,15 +1653,65 @@ class ParsingTests(unittest.TestCase):
)
self.assertEqual(
request.requestHeaders.getRawHeaders(b"space"),
- [b"space space"],
+ [b"space space"],
)
self.assertEqual(
request.requestHeaders.getRawHeaders(b"spaces"),
- [b"spaces spaces spaces"],
+ [b"spaces spaces spaces"],
)
self.assertEqual(
request.requestHeaders.getRawHeaders(b"tab"),
- [b"t \ta \tb"],
+ [b"t a b"],
+ )
+
+ def test_headerStripWhitespace(self):
+ """
+ Leading and trailing space and tab characters are stripped from
+ headers. Other forms of whitespace are preserved.
+
+ See RFC 7230 section 3.2.3 and 3.2.4.
+ """
+ processed = []
+
+ class MyRequest(http.Request):
+ def process(self):
+ processed.append(self)
+ self.finish()
+
+ requestLines = [
+ b"GET / HTTP/1.0",
+ b"spaces: spaces were stripped ",
+ b"tabs: \t\ttabs were stripped\t\t",
+ b"spaces-and-tabs: \t \t spaces and tabs were stripped\t \t",
+ b"line-tab: \v vertical tab was preserved\v\t",
+ b"form-feed: \f form feed was preserved \f ",
+ b"",
+ b"",
+ ]
+
+ self.runRequest(b"\n".join(requestLines), MyRequest, 0)
+ [request] = processed
+ # All leading and trailing whitespace is stripped from the
+ # header-value.
+ self.assertEqual(
+ request.requestHeaders.getRawHeaders(b"spaces"),
+ [b"spaces were stripped"],
+ )
+ self.assertEqual(
+ request.requestHeaders.getRawHeaders(b"tabs"),
+ [b"tabs were stripped"],
+ )
+ self.assertEqual(
+ request.requestHeaders.getRawHeaders(b"spaces-and-tabs"),
+ [b"spaces and tabs were stripped"],
+ )
+ self.assertEqual(
+ request.requestHeaders.getRawHeaders(b"line-tab"),
+ [b"\v vertical tab was preserved\v"],
+ )
+ self.assertEqual(
+ request.requestHeaders.getRawHeaders(b"form-feed"),
+ [b"\f form feed was preserved \f"],
)
@@ -2150,6 +2265,55 @@ Hello,
self.assertIsInstance(f.value, AttributeError)
self.flushLoggedErrors(AttributeError)
+ def test_contentLengthMalformed(self):
+ """
+ A request with a non-integer C{Content-Length} header fails with a 400
+ response without calling L{Request.process}.
+ """
+ self.assertRequestRejected(
+ [
+ b"GET /a HTTP/1.1",
+ b"Content-Length: MORE THAN NINE THOUSAND!",
+ b"Host: host.invalid",
+ b"",
+ b"",
+ b"x" * 9001,
+ ]
+ )
+
+ def test_contentLengthTooPositive(self):
+ """
+ A request with a C{Content-Length} header that begins with a L{+} fails
+ with a 400 response without calling L{Request.process}.
+ This is a potential request smuggling vector: see GHSA-c2jg-hw38-jrqq.
+ """
+ self.assertRequestRejected(
+ [
+ b"GET /a HTTP/1.1",
+ b"Content-Length: +100",
+ b"Host: host.invalid",
+ b"",
+ b"",
+ b"x" * 100,
+ ]
+ )
+
+ def test_contentLengthNegative(self):
+ """
+ A request with a C{Content-Length} header that is negative fails with
+ a 400 response without calling L{Request.process}.
+ This is a potential request smuggling vector: see GHSA-c2jg-hw38-jrqq.
+ """
+ self.assertRequestRejected(
+ [
+ b"GET /a HTTP/1.1",
+ b"Content-Length: -100",
+ b"Host: host.invalid",
+ b"",
+ b"",
+ b"x" * 200,
+ ]
+ )
class QueryArgumentsTests(unittest.TestCase):
@@ -4008,3 +4172,43 @@ class HTTPClientSanitizationTests(unitte
transport.value().splitlines(),
[b": ".join([sanitizedBytes, sanitizedBytes])]
)
+
+
+class HexHelperTests(unittest.SynchronousTestCase):
+ """
+ Test the L{http._hexint} and L{http._ishexdigits} helper functions.
+ """
+
+ badStrings = (b"", b"0x1234", b"feds", b"-123" b"+123")
+
+ def test_isHex(self):
+ """
+ L{_ishexdigits()} returns L{True} for nonempy bytestrings containing
+ hexadecimal digits.
+ """
+ for s in (b"10", b"abcdef", b"AB1234", b"fed", b"123467890"):
+ self.assertIs(True, http._ishexdigits(s))
+
+ def test_decodes(self):
+ """
+ L{_hexint()} returns the integer equivalent of the input.
+ """
+ self.assertEqual(10, http._hexint(b"a"))
+ self.assertEqual(0x10, http._hexint(b"10"))
+ self.assertEqual(0xABCD123, http._hexint(b"abCD123"))
+
+ def test_isNotHex(self):
+ """
+ L{_ishexdigits()} returns L{False} for bytestrings that don't contain
+ hexadecimal digits, including the empty string.
+ """
+ for s in self.badStrings:
+ self.assertIs(False, http._ishexdigits(s))
+
+ def test_decodeNotHex(self):
+ """
+ L{_hexint()} raises L{ValueError} for bytestrings that can't
+ be decoded.
+ """
+ for s in self.badStrings:
+ self.assertRaises(ValueError, http._hexint, s)