File CVE-2019-18348-CRLF_injection_via_host_part.patch of Package python3.38021

From 1e24df641580ae952a922f90c69b146041006257 Mon Sep 17 00:00:00 2001
From: Ashwin Ramaswami <aramaswamis@gmail.com>
Date: Sat, 14 Mar 2020 14:56:06 -0400
Subject: [PATCH] bpo-38576: Disallow control characters in hostnames in
 http.client (GH-18995)

Add host validation for control characters for more CVE-2019-18348 protection.
(cherry picked from commit 9165addc22d05e776a54319a8531ebd0b2fe01ef)

Co-authored-by: Ashwin Ramaswami <aramaswamis@gmail.com>
---
 Lib/http/client.py                                                 |   20 ++++++
 Lib/test/test_urllib.py                                            |   32 ++++++++++
 Misc/NEWS.d/next/Security/2020-03-14-14-57-44.bpo-38576.OowwQn.rst |    1 
 3 files changed, 53 insertions(+)
 create mode 100644 Misc/NEWS.d/next/Security/2020-03-14-14-57-44.bpo-38576.OowwQn.rst

--- a/Lib/http/client.py
+++ b/Lib/http/client.py
@@ -256,6 +256,16 @@ _contains_disallowed_url_pchar_re = re.c
 #  _is_allowed_url_pchars_re = re.compile(r"^[/!$&'()*+,;=:@%a-zA-Z0-9._~-]+$")
 # We are more lenient for assumed real world compatibility purposes.
 
+# These characters are not allowed within HTTP URL paths.
+#  See https://tools.ietf.org/html/rfc3986#section-3.3 and the
+#  https://tools.ietf.org/html/rfc3986#appendix-A pchar definition.
+# Prevents CVE-2019-9740.  Includes control characters such as \r\n.
+# We don't restrict chars above \x7f as putrequest() limits us to ASCII.
+_contains_disallowed_url_pchar_re = re.compile('[\x00-\x20\x7f]')
+# Arguably only these _should_ allowed:
+#  _is_allowed_url_pchars_re = re.compile(r"^[/!$&'()*+,;=:@%a-zA-Z0-9._~-]+$")
+# We are more lenient for assumed real world compatibility purposes.
+
 # We always set the Content-Length header for these methods because some
 # servers will otherwise respond with a 411
 _METHODS_EXPECTING_BODY = {'PATCH', 'POST', 'PUT'}
@@ -796,6 +806,8 @@ class HTTPConnection:
 
         (self.host, self.port) = self._get_hostport(host, port)
 
+        self._validate_host(self.host)
+
         # This is stored as an instance variable to allow unit
         # tests to replace it with a suitable mockup
         self._create_connection = socket.create_connection
@@ -1104,6 +1116,14 @@ class HTTPConnection:
             # For HTTP/1.0, the server will assume "not chunked"
             pass
 
+    def _validate_host(self, host):
+        """Validate a host so it doesn't contain control characters."""
+        # Prevent CVE-2019-18348.
+        match = _contains_disallowed_url_pchar_re.search(host)
+        if match:
+            raise InvalidURL("URL can't contain control characters. {!r} ".format(host) +
+                             "(found at least {!r})".format(match.group()))
+
     def putheader(self, header, *values):
         """Send a request header line to the server.
 
--- a/Lib/test/test_urllib.py
+++ b/Lib/test/test_urllib.py
@@ -355,6 +355,38 @@ class urlopen_HttpTests(unittest.TestCas
         finally:
             self.unfakehttp()
 
+    @unittest.skipUnless(ssl, "ssl module required")
+    def test_url_host_with_control_char_rejected(self):
+        for char_no in list(range(0, 0x21)) + [0x7f]:
+            char = chr(char_no)
+            schemeless_url = "//localhost{}/test/".format(char)
+            self.fakehttp(b"HTTP/1.1 200 OK\r\n\r\nHello.")
+            try:
+                escaped_char_repr = repr(char).replace('\\', r'\\')
+                InvalidURL = http.client.InvalidURL
+                with self.assertRaisesRegex(
+                    InvalidURL, "contain control.*{}".format(escaped_char_repr)):
+                    urlopen("http:{}".format(schemeless_url))
+                with self.assertRaisesRegex(InvalidURL, "contain control.*{}".format(escaped_char_repr)):
+                    urlopen("https:{}".format(schemeless_url))
+            finally:
+                self.unfakehttp()
+
+    @unittest.skipUnless(ssl, "ssl module required")
+    def test_url_host_with_newline_header_injection_rejected(self):
+        self.fakehttp(b"HTTP/1.1 200 OK\r\n\r\nHello.")
+        host = "localhost\r\nX-injected: header\r\n"
+        schemeless_url = "//" + host + ":8080/test/?test=a"
+        try:
+            InvalidURL = http.client.InvalidURL
+            with self.assertRaisesRegex(
+                InvalidURL, r"contain control.*\\r"):
+                urlopen("http:{}".format(schemeless_url))
+            with self.assertRaisesRegex(InvalidURL, r"contain control.*\\n"):
+                urlopen("https:{}".format(schemeless_url))
+        finally:
+            self.unfakehttp()
+
     def test_read_0_9(self):
         # "0.9" response accepted (but not "simple responses" without
         # a status line)
--- /dev/null
+++ b/Misc/NEWS.d/next/Security/2020-03-14-14-57-44.bpo-38576.OowwQn.rst
@@ -0,0 +1 @@
+Disallow control characters in hostnames in http.client, addressing CVE-2019-18348. Such potentially malicious header injection URLs now cause a InvalidURL to be raised.
\ No newline at end of file
openSUSE Build Service is sponsored by