File CVE-2021-23336-only-amp-as-query-sep.patch of Package python3.20633
From 5c17dfc5d70ce88be99bc5769b91ce79d7a90d61 Mon Sep 17 00:00:00 2001
From: Senthil Kumaran <senthil@uthcode.com>
Date: Mon, 15 Feb 2021 11:16:43 -0800
Subject: [PATCH] [3.6] bpo-42967: only use '&' as a query string separator
(GH-24297) (GH-24532)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
bpo-42967: [security] Address a web cache-poisoning issue reported in
urllib.parse.parse_qsl().
urllib.parse will only us "&" as query string separator by default
instead of both ";" and "&" as allowed in earlier versions. An optional
argument seperator with default value "&" is added to specify the
separator.
Co-authored-by: Éric Araujo <merwok@netwok.org>
Co-authored-by: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com>
Co-authored-by: Adam Goldschmidt <adamgold7@gmail.com>
---
Doc/library/cgi.rst | 8 +-
Doc/library/urllib.parse.rst | 27 ++++++-
Lib/cgi.py | 19 +++--
Lib/test/test_cgi.py | 29 ++++++-
Lib/test/test_urlparse.py | 37 ++++++++++
Lib/urllib/parse.py | 17 +++-
Misc/NEWS.d/next/Security/2021-02-14-15-59-16.bpo-42967.YApqDS.rst | 1
7 files changed, 117 insertions(+), 21 deletions(-)
create mode 100644 Misc/NEWS.d/next/Security/2021-02-14-15-59-16.bpo-42967.YApqDS.rst
--- a/Doc/library/cgi.rst
+++ b/Doc/library/cgi.rst
@@ -271,13 +271,12 @@ These are useful if you want more contro
algorithms implemented in this module in other circumstances.
-.. function:: parse(fp=None, environ=os.environ, keep_blank_values=False, strict_parsing=False)
+.. function:: parse(fp=None, environ=os.environ, keep_blank_values=False, strict_parsing=False, separator="&")
Parse a query in the environment or from a file (the file defaults to
- ``sys.stdin``). The *keep_blank_values* and *strict_parsing* parameters are
+ ``sys.stdin``). The *keep_blank_values*, *strict_parsing* and *separator* parameters are
passed to :func:`urllib.parse.parse_qs` unchanged.
-
.. function:: parse_qs(qs, keep_blank_values=False, strict_parsing=False)
This function is deprecated in this module. Use :func:`urllib.parse.parse_qs`
@@ -302,6 +301,9 @@ algorithms implemented in this module in
Note that this does not parse nested multipart parts --- use
:class:`FieldStorage` for that.
+ .. versionchanged:: 3.6.13
+ Added the *separator* parameter.
+
.. function:: parse_header(string)
--- a/Doc/library/urllib.parse.rst
+++ b/Doc/library/urllib.parse.rst
@@ -136,7 +136,9 @@ or on combining URL components into a UR
now raise :exc:`ValueError`.
-.. function:: parse_qs(qs, keep_blank_values=False, strict_parsing=False, encoding='utf-8', errors='replace')
+.. function:: parse_qs(qs, keep_blank_values=False,
+ strict_parsing=False, encoding='utf-8', errors='replace',
+ max_num_fields=None, separator='&')
Parse a query string given as a string argument (data of type
:mimetype:`application/x-www-form-urlencoded`). Data are returned as a
@@ -157,16 +159,26 @@ or on combining URL components into a UR
percent-encoded sequences into Unicode characters, as accepted by the
:meth:`bytes.decode` method.
+ The optional argument *separator* is the symbol to use for separating the
+ query arguments. It defaults to ``&``.
+
Use the :func:`urllib.parse.urlencode` function (with the ``doseq``
parameter set to ``True``) to convert such dictionaries into query
strings.
-
.. versionchanged:: 3.2
Add *encoding* and *errors* parameters.
+ .. versionchanged:: 3.6.13
+ Added *separator* parameter with the default value of ``&``. Python
+ versions earlier than Python 3.6.13 allowed using both ``;`` and ``&`` as
+ query parameter separator. This has been changed to allow only a single
+ separator key, with ``&`` as the default separator.
+
-.. function:: parse_qsl(qs, keep_blank_values=False, strict_parsing=False, encoding='utf-8', errors='replace')
+.. function:: parse_qsl(qs, keep_blank_values=False,
+ strict_parsing=False, encoding='utf-8', errors='replace',
+ max_num_fields=None, separator='&')
Parse a query string given as a string argument (data of type
:mimetype:`application/x-www-form-urlencoded`). Data are returned as a list of
@@ -186,12 +198,21 @@ or on combining URL components into a UR
percent-encoded sequences into Unicode characters, as accepted by the
:meth:`bytes.decode` method.
+ The optional argument *separator* is the symbol to use for separating the
+ query arguments. It defaults to ``&``.
+
Use the :func:`urllib.parse.urlencode` function to convert such lists of pairs into
query strings.
.. versionchanged:: 3.2
Add *encoding* and *errors* parameters.
+ .. versionchanged:: 3.6.13
+ Added *separator* parameter with the default value of ``&``. Python
+ versions earlier than Python 3.6.13 allowed using both ``;`` and ``&`` as
+ query parameter separator. This has been changed to allow only a single
+ separator key, with ``&`` as the default separator.
+
.. function:: urlunparse(parts)
--- a/Lib/cgi.py
+++ b/Lib/cgi.py
@@ -117,7 +117,8 @@ log = initlog # The current lo
# 0 ==> unlimited input
maxlen = 0
-def parse(fp=None, environ=os.environ, keep_blank_values=0, strict_parsing=0):
+def parse(fp=None, environ=os.environ, keep_blank_values=0,
+ strict_parsing=0, separator='&'):
"""Parse a query in the environment or from a file (default stdin)
Arguments, all optional:
@@ -136,6 +137,9 @@ def parse(fp=None, environ=os.environ, k
strict_parsing: flag indicating what to do with parsing errors.
If false (the default), errors are silently ignored.
If true, errors raise a ValueError exception.
+
+ separator: str. The symbol to use for separating the query arguments.
+ Defaults to &.
"""
if fp is None:
fp = sys.stdin
@@ -180,7 +184,7 @@ def parse(fp=None, environ=os.environ, k
qs = ""
environ['QUERY_STRING'] = qs # XXX Shouldn't, really
return urllib.parse.parse_qs(qs, keep_blank_values, strict_parsing,
- encoding=encoding)
+ encoding=encoding, separator=separator)
# parse query string function called from urlparse,
@@ -404,7 +408,7 @@ class FieldStorage:
"""
def __init__(self, fp=None, headers=None, outerboundary=b'',
environ=os.environ, keep_blank_values=0, strict_parsing=0,
- limit=None, encoding='utf-8', errors='replace'):
+ limit=None, encoding='utf-8', errors='replace', separator='&'):
"""Constructor. Read multipart/* until last part.
Arguments, all optional:
@@ -448,6 +452,7 @@ class FieldStorage:
method = 'GET'
self.keep_blank_values = keep_blank_values
self.strict_parsing = strict_parsing
+ self.separator = separator
if 'REQUEST_METHOD' in environ:
method = environ['REQUEST_METHOD'].upper()
self.qs_on_post = None
@@ -667,7 +672,8 @@ class FieldStorage:
self.list = []
query = urllib.parse.parse_qsl(
qs, self.keep_blank_values, self.strict_parsing,
- encoding=self.encoding, errors=self.errors)
+ encoding=self.encoding, errors=self.errors,
+ separator=self.separator)
for key, value in query:
self.list.append(MiniFieldStorage(key, value))
self.skip_lines()
@@ -683,7 +689,8 @@ class FieldStorage:
if self.qs_on_post:
query = urllib.parse.parse_qsl(
self.qs_on_post, self.keep_blank_values, self.strict_parsing,
- encoding=self.encoding, errors=self.errors)
+ encoding=self.encoding, errors=self.errors,
+ separator=self.separator)
for key, value in query:
self.list.append(MiniFieldStorage(key, value))
@@ -721,7 +728,7 @@ class FieldStorage:
part = klass(self.fp, headers, ib, environ, keep_blank_values,
strict_parsing,self.limit-self.bytes_read,
- self.encoding, self.errors)
+ self.encoding, self.errors, self.separator)
self.bytes_read += part.bytes_read
self.list.append(part)
if part.done or self.bytes_read >= self.length > 0:
--- a/Lib/test/test_cgi.py
+++ b/Lib/test/test_cgi.py
@@ -54,12 +54,9 @@ parse_strict_test_cases = [
("", ValueError("bad query field: ''")),
("&", ValueError("bad query field: ''")),
("&&", ValueError("bad query field: ''")),
- (";", ValueError("bad query field: ''")),
- (";&;", ValueError("bad query field: ''")),
# Should the next few really be valid?
("=", {}),
("=&=", {}),
- ("=;=", {}),
# This rest seem to make sense
("=a", {'': ['a']}),
("&=a", ValueError("bad query field: ''")),
@@ -74,8 +71,6 @@ parse_strict_test_cases = [
("a=a+b&b=b+c", {'a': ['a b'], 'b': ['b c']}),
("a=a+b&a=b+a", {'a': ['a b', 'b a']}),
("x=1&y=2.0&z=2-3.%2b0", {'x': ['1'], 'y': ['2.0'], 'z': ['2-3.+0']}),
- ("x=1;y=2.0&z=2-3.%2b0", {'x': ['1'], 'y': ['2.0'], 'z': ['2-3.+0']}),
- ("x=1;y=2.0;z=2-3.%2b0", {'x': ['1'], 'y': ['2.0'], 'z': ['2-3.+0']}),
("Hbc5161168c542333633315dee1182227:key_store_seqid=400006&cuyer=r&view=bustomer&order_id=0bb2e248638833d48cb7fed300000f1b&expire=964546263&lobale=en-US&kid=130003.300038&ss=env",
{'Hbc5161168c542333633315dee1182227:key_store_seqid': ['400006'],
'cuyer': ['r'],
@@ -174,6 +169,30 @@ class CgiTests(unittest.TestCase):
for key in expect.keys():
expect_val = expect[key]
self.assertIn(key, fs)
+ if len(expect_val) > 1:
+ self.assertEqual(fs.getvalue(key), expect_val)
+ else:
+ self.assertEqual(fs.getvalue(key), expect_val[0])
+
+ def test_separator(self):
+ parse_semicolon = [
+ ("x=1;y=2.0", {'x': ['1'], 'y': ['2.0']}),
+ ("x=1;y=2.0;z=2-3.%2b0", {'x': ['1'], 'y': ['2.0'], 'z': ['2-3.+0']}),
+ (";", ValueError("bad query field: ''")),
+ (";;", ValueError("bad query field: ''")),
+ ("=;a", ValueError("bad query field: 'a'")),
+ (";b=a", ValueError("bad query field: ''")),
+ ("b;=a", ValueError("bad query field: 'b'")),
+ ("a=a+b;b=b+c", {'a': ['a b'], 'b': ['b c']}),
+ ("a=a+b;a=b+a", {'a': ['a b', 'b a']}),
+ ]
+ for orig, expect in parse_semicolon:
+ env = {'QUERY_STRING': orig}
+ fs = cgi.FieldStorage(separator=';', environ=env)
+ if isinstance(expect, dict):
+ for key in expect.keys():
+ expect_val = expect[key]
+ self.assertIn(key, fs)
if len(expect_val) > 1:
self.assertEqual(fs.getvalue(key), expect_val)
else:
--- a/Lib/test/test_urlparse.py
+++ b/Lib/test/test_urlparse.py
@@ -798,6 +798,43 @@ class UrlParseTestCase(unittest.TestCase
errors="ignore")
self.assertEqual(result, [('key', '\u0141-')])
+ def test_parse_qs_separator(self):
+ parse_qs_semicolon_cases = [
+ (";", {}),
+ (";;", {}),
+ (";a=b", {'a': ['b']}),
+ ("a=a+b;b=b+c", {'a': ['a b'], 'b': ['b c']}),
+ ("a=1;a=2", {'a': ['1', '2']}),
+ (b";", {}),
+ (b";;", {}),
+ (b";a=b", {b'a': [b'b']}),
+ (b"a=a+b;b=b+c", {b'a': [b'a b'], b'b': [b'b c']}),
+ (b"a=1;a=2", {b'a': [b'1', b'2']}),
+ ]
+ for orig, expect in parse_qs_semicolon_cases:
+ with self.subTest("Original: {!r}, Expected: {!r}".format(orig, expect)):
+ result = urllib.parse.parse_qs(orig, separator=';')
+ self.assertEqual(result, expect, "Error parsing %r" % orig)
+
+ def test_parse_qsl_separator(self):
+ parse_qsl_semicolon_cases = [
+ (";", []),
+ (";;", []),
+ (";a=b", [('a', 'b')]),
+ ("a=a+b;b=b+c", [('a', 'a b'), ('b', 'b c')]),
+ ("a=1;a=2", [('a', '1'), ('a', '2')]),
+ (b";", []),
+ (b";;", []),
+ (b";a=b", [(b'a', b'b')]),
+ (b"a=a+b;b=b+c", [(b'a', b'a b'), (b'b', b'b c')]),
+ (b"a=1;a=2", [(b'a', b'1'), (b'a', b'2')]),
+ ]
+ for orig, expect in parse_qsl_semicolon_cases:
+ with self.subTest("Original: {!r}, Expected: {!r}".format(orig, expect)):
+ result = urllib.parse.parse_qsl(orig, separator=';')
+ self.assertEqual(result, expect, "Error parsing %r" % orig)
+
+
def test_urlencode_sequences(self):
# Other tests incidentally urlencode things; test non-covered cases:
# Sequence and object values.
--- a/Lib/urllib/parse.py
+++ b/Lib/urllib/parse.py
@@ -553,7 +553,7 @@ def unquote(string, encoding='utf-8', er
return ''.join(res)
def parse_qs(qs, keep_blank_values=False, strict_parsing=False,
- encoding='utf-8', errors='replace'):
+ encoding='utf-8', errors='replace', separator='&'):
"""Parse a query given as a string argument.
Arguments:
@@ -573,10 +573,13 @@ def parse_qs(qs, keep_blank_values=False
encoding and errors: specify how to decode percent-encoded sequences
into Unicode characters, as accepted by the bytes.decode() method.
+
+ separator: str. The symbol to use for separating the query arguments.
+ Defaults to &.
"""
parsed_result = {}
pairs = parse_qsl(qs, keep_blank_values, strict_parsing,
- encoding=encoding, errors=errors)
+ encoding=encoding, errors=errors, separator=separator)
for name, value in pairs:
if name in parsed_result:
parsed_result[name].append(value)
@@ -585,7 +588,7 @@ def parse_qs(qs, keep_blank_values=False
return parsed_result
def parse_qsl(qs, keep_blank_values=False, strict_parsing=False,
- encoding='utf-8', errors='replace'):
+ encoding='utf-8', errors='replace', separator='&'):
"""Parse a query given as a string argument.
Arguments:
@@ -605,10 +608,16 @@ def parse_qsl(qs, keep_blank_values=Fals
encoding and errors: specify how to decode percent-encoded sequences
into Unicode characters, as accepted by the bytes.decode() method.
+ separator: str. The symbol to use for separating the query arguments.
+ Defaults to &.
+
Returns a list, as G-d intended.
"""
qs, _coerce_result = _coerce_args(qs)
- pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')]
+ if not separator or (not isinstance(separator, (str, bytes))):
+ raise ValueError("Separator must be of type string or bytes.")
+
+ pairs = [s1 for s1 in qs.split(separator)]
r = []
for name_value in pairs:
if not name_value and not strict_parsing:
--- /dev/null
+++ b/Misc/NEWS.d/next/Security/2021-02-14-15-59-16.bpo-42967.YApqDS.rst
@@ -0,0 +1 @@
+Fix web cache poisoning vulnerability by defaulting the query args separator to ``&``, and allowing the user to choose a custom separator.