File CVE-2021-33503.patch of Package python-urllib3.30987
commit 48e9fe66f77052e9427dc2c370148da0f0f3934d
Author: Seth Michael Larson <sethmichaellarson@gmail.com>
Date: Wed May 26 10:43:12 2021 -0500
Improve performance of sub-authority splitting in URL
(cherry picked from commit 2d4a3fee6de2fa45eb82169361918f759269b4ec)
diff --git a/src/urllib3/util/url.py b/src/urllib3/util/url.py
index 793324e5fd88..318a6d60bb34 100644
--- a/src/urllib3/util/url.py
+++ b/src/urllib3/util/url.py
@@ -63,12 +63,12 @@ IPV6_ADDRZ_RE = re.compile("^" + IPV6_ADDRZ_PAT + "$")
BRACELESS_IPV6_ADDRZ_RE = re.compile("^" + IPV6_ADDRZ_PAT[2:-2] + "$")
ZONE_ID_RE = re.compile("(" + ZONE_ID_PAT + r")\]$")
-SUBAUTHORITY_PAT = (u"^(?:(.*)@)?(%s|%s|%s)(?::([0-9]{0,5}))?$") % (
+_HOST_PORT_PAT = ("^(%s|%s|%s)(?::([0-9]{0,5}))?$") % (
REG_NAME_PAT,
IPV4_PAT,
IPV6_ADDRZ_PAT,
)
-SUBAUTHORITY_RE = re.compile(SUBAUTHORITY_PAT, re.UNICODE | re.DOTALL)
+_HOST_PORT_RE = re.compile(_HOST_PORT_PAT, re.UNICODE | re.DOTALL)
UNRESERVED_CHARS = set(
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789._-~"
@@ -365,7 +365,9 @@ def parse_url(url):
scheme = scheme.lower()
if authority:
- auth, host, port = SUBAUTHORITY_RE.match(authority).groups()
+ auth, _, host_port = authority.rpartition("@")
+ auth = auth or None
+ host, port = _HOST_PORT_RE.match(host_port).groups()
if auth and normalize_uri:
auth = _encode_invalid_chars(auth, USERINFO_CHARS)
if port == "":
diff --git a/test/test_util.py b/test/test_util.py
index 838c751518e6..ef6aa1157b20 100644
--- a/test/test_util.py
+++ b/test/test_util.py
@@ -437,6 +437,16 @@ class TestUtil(object):
fragment="hash",
),
),
+ # Tons of '@' causing backtracking
+ ("https://" + ("@" * 10000) + "[", False),
+ (
+ "https://user:" + ("@" * 10000) + "example.com",
+ Url(
+ scheme="https",
+ auth="user:" + ("%40" * 9999),
+ host="example.com",
+ ),
+ ),
]
@pytest.mark.parametrize("url, expected_url", url_vulnerabilities)