File 0001-Add-XML-resource-access-control.patch of Package python-xmlschema

From 848a5354f1906faa67e5becd04ea9127b8e5e40e Mon Sep 17 00:00:00 2001
From: Davide Brunato <brunato@sissa.it>
Date: Mon, 25 May 2020 23:21:41 +0200
Subject: [PATCH 1/4] Add XML resource access control

  - Added allow='all' argument to schema and XMLResource
  - Remove XMLSchemaURLError, replaced by XMLSchemaResourceError
  - Added complex_types and simple_types properties to schemas
  - Fix package and regex test scripts

(cherry picked from commit 6e3ff636e24bc689f54cfb9a11fd025b10c56273)
---
 xmlschema/__init__.py             |  4 +-
 xmlschema/exceptions.py           |  4 +-
 xmlschema/resources.py            | 98 ++++++++++++++++++++++---------
 xmlschema/tests/test_package.py   |  5 +-
 xmlschema/tests/test_regex.py     | 26 +++++---
 xmlschema/tests/test_resources.py | 44 ++++++++++++--
 xmlschema/validators/schema.py    | 38 +++++++++---
 7 files changed, 165 insertions(+), 54 deletions(-)

diff --git a/xmlschema/__init__.py b/xmlschema/__init__.py
index e56cc62..6b94fbe 100644
--- a/xmlschema/__init__.py
+++ b/xmlschema/__init__.py
@@ -9,8 +9,8 @@
 # @author Davide Brunato <brunato@sissa.it>
 #
 from . import limits
-from .exceptions import XMLSchemaException, XMLSchemaRegexError, XMLSchemaURLError, \
-    XMLSchemaNamespaceError
+from .exceptions import XMLSchemaException, XMLSchemaRegexError, \
+    XMLSchemaResourceError, XMLSchemaNamespaceError
 from .etree import etree_tostring
 from .resources import (
     normalize_url, fetch_resource, load_xml_resource, fetch_namespaces,
diff --git a/xmlschema/exceptions.py b/xmlschema/exceptions.py
index 53dd563..83d5f10 100644
--- a/xmlschema/exceptions.py
+++ b/xmlschema/exceptions.py
@@ -46,8 +46,8 @@ class XMLSchemaIndexError(XMLSchemaException, ImportError):
     pass
 
 
-class XMLSchemaURLError(XMLSchemaException, URLError):
-    pass
+class XMLSchemaResourceError(XMLSchemaException, OSError):
+    """Raised when an error is found accessing an XML resource."""
 
 
 class XMLSchemaRegexError(XMLSchemaException, ValueError):
diff --git a/xmlschema/resources.py b/xmlschema/resources.py
index 7e28c33..52dff6f 100644
--- a/xmlschema/resources.py
+++ b/xmlschema/resources.py
@@ -17,13 +17,13 @@ from .compat import (
     PY3, StringIO, BytesIO, string_base_type, urlopen, urlsplit, urljoin, urlunsplit,
     pathname2url, URLError, uses_relative
 )
-from .exceptions import XMLSchemaTypeError, XMLSchemaValueError, XMLSchemaURLError, XMLSchemaOSError
+from .exceptions import XMLSchemaTypeError, XMLSchemaValueError, XMLSchemaResourceError
 from .namespaces import get_namespace
 from .etree import ElementTree, PyElementTree, SafeXMLParser, etree_tostring, etree_iter_location_hints
 
 
-DEFUSE_MODES = ('always', 'remote', 'never')
-
+DEFUSE_MODES = ('never', 'remote', 'always')
+SECURITY_MODES = ('all', 'remote', 'local', 'sandbox')
 
 XML_RESOURCE_XPATH_SYMBOLS = {
     'position', 'last', 'not', 'and', 'or', '!=', '<=', '>=', '(', ')', 'text',
@@ -140,7 +140,7 @@ def normalize_url(url, base_url=None, keep_relative=False):
 def fetch_resource(location, base_url=None, timeout=30):
     """
     Fetch a resource trying to accessing it. If the resource is accessible
-    returns the URL, otherwise raises an error (XMLSchemaURLError).
+    returns the URL, otherwise raises an error (XMLSchemaResourceError).
 
     :param location: an URL or a file path.
     :param base_url: reference base URL for normalizing local and relative URLs.
@@ -156,10 +156,13 @@ def fetch_resource(location, base_url=None, timeout=30):
     except URLError as err:
         # fallback joining the path without a base URL
         alt_url = normalize_url(location)
+        if url == alt_url:
+            raise XMLSchemaResourceError("cannot access to resource %r: %s" % (url, err.reason))
+
         try:
             resource = urlopen(alt_url, timeout=timeout)
         except URLError:
-            raise XMLSchemaURLError("cannot access to resource %r: %s" % (url, err.reason))
+            raise XMLSchemaResourceError("cannot access to resource %r: %s" % (url, err.reason))
         else:
             resource.close()
             return url
@@ -168,7 +171,8 @@ def fetch_resource(location, base_url=None, timeout=30):
         return url
 
 
-def fetch_schema_locations(source, locations=None, base_url=None, defuse='remote', timeout=30):
+def fetch_schema_locations(source, locations=None, base_url=None,
+                           allow='all', defuse='remote', timeout=30):
     """
     Fetches schema location hints from an XML data source and a list of location hints.
     If an accessible schema location is not found raises a ValueError.
@@ -179,13 +183,14 @@ def fetch_schema_locations(source, locations=None, base_url=None, defuse='remote
     instance a new one is built using this and *defuse*, *timeout* and *lazy* arguments.
     :param locations: a dictionary or dictionary items with additional schema location hints.
     :param base_url: the same argument of the :class:`XMLResource`.
+    :param allow: the same argument of the :class:`XMLResource`.
     :param defuse: the same argument of the :class:`XMLResource`.
     :param timeout: the same argument of the :class:`XMLResource` but with a reduced default.
     :return: A 2-tuple with the URL referring to the first reachable schema resource \
     and a list of dictionary items with normalized location hints.
     """
     if not isinstance(source, XMLResource):
-        resource = XMLResource(source, base_url, defuse, timeout)
+        resource = XMLResource(source, base_url, allow, defuse, timeout)
     else:
         resource = source
 
@@ -199,28 +204,28 @@ def fetch_schema_locations(source, locations=None, base_url=None, defuse='remote
     for ns, url in sorted(locations, key=lambda x: x[0] != namespace):
         try:
             return fetch_resource(url, base_url, timeout), locations
-        except XMLSchemaURLError:
+        except XMLSchemaResourceError:
             pass
 
     raise XMLSchemaValueError("not found a schema for XML data resource {!r}.".format(source))
 
 
-def fetch_schema(source, locations=None, base_url=None, defuse='remote', timeout=30):
+def fetch_schema(source, locations=None, base_url=None, allow='all', defuse='remote', timeout=30):
     """
     Like :meth:`fetch_schema_locations` but returns only a reachable
     location hint for a schema related to the source's namespace.
     """
-    return fetch_schema_locations(source, locations, base_url, defuse, timeout)[0]
+    return fetch_schema_locations(source, locations, base_url, allow, defuse, timeout)[0]
 
 
-def fetch_namespaces(source, base_url=None, defuse='remote', timeout=30):
+def fetch_namespaces(source, base_url=None, allow='all', defuse='remote', timeout=30):
     """
     Fetches namespaces information from the XML data source. The argument *source*
     can be a string containing the XML document or file path or an url or a file-like
     object or an ElementTree instance or an Element instance. A dictionary with
     namespace mappings is returned.
     """
-    resource = XMLResource(source, base_url, defuse, timeout)
+    resource = XMLResource(source, base_url, allow, defuse, timeout)
     return resource.get_namespaces()
 
 
@@ -256,9 +261,15 @@ class XMLResource(object):
     :param source: a string containing the XML document or file path or an URL or a \
     file like object or an ElementTree or an Element.
     :param base_url: is an optional base URL, used for the normalization of relative paths \
-    when the URL of the resource can't be obtained from the source argument.
-    :param defuse: set the usage of SafeXMLParser for XML data. Can be 'always', 'remote' \
-    or 'never'. Default is 'remote' that uses the defusedxml only when loading remote data.
+    when the URL of the resource can't be obtained from the source argument. For security \
+    access to a local file resource is always denied if the *base_url* is a remote URL.
+    :param allow: defines the security mode for accessing resource locations. Can be \
+    'all', 'remote', 'local' or 'sandbox'. Default is 'all' that means all types of \
+    URLs are allowed. With 'remote' only remote resource URLs are allowed. With 'local' \
+    only file paths and URLs are allowed. With 'sandbox' only file paths and URLs that \
+    are under the directory path identified by the *base_url* argument are allowed.
+    :param defuse: defines when to defuse XML data using a `SafeXMLParser`. Can be \
+    'always', 'remote' or 'never'. For default defuses only remote XML data.
     :param timeout: the timeout in seconds for the connection attempt in case of remote data.
     :param lazy: if a value `False` is provided the XML data is fully loaded into and \
     processed from memory. For default only the root element of the source is loaded, \
@@ -266,8 +277,10 @@ class XMLResource(object):
     """
     _root = _text = _url = None
 
-    def __init__(self, source, base_url=None, defuse='remote', timeout=300, lazy=True):
+    def __init__(self, source, base_url=None, allow='all',
+                 defuse='remote', timeout=300, lazy=True):
         self._base_url = base_url
+        self.allow = allow
         self.defuse = defuse
         self.timeout = timeout
         self._lazy = lazy
@@ -300,8 +313,15 @@ class XMLResource(object):
             if value is not None and not isinstance(value, string_base_type):
                 msg = "invalid type {!r} for the attribute 'base_url'"
                 raise XMLSchemaTypeError(msg.format(type(value)))
+        elif name == 'allow':
+            if not isinstance(value, str):
+                msg = "invalid type {!r} for the attribute 'allow'"
+                raise XMLSchemaTypeError(msg.format(type(value)))
+            elif value not in SECURITY_MODES:
+                msg = "'allow' attribute: {!r} is not a security mode"
+                raise XMLSchemaValueError(msg.format(value))
         elif name == 'defuse':
-            if value is not None and not isinstance(value, string_base_type):
+            if not isinstance(value, string_base_type):
                 msg = "invalid type {!r} for the attribute 'defuse'"
                 raise XMLSchemaTypeError(msg.format(type(value)))
             elif value not in DEFUSE_MODES:
@@ -319,6 +339,23 @@ class XMLResource(object):
                 raise XMLSchemaValueError(msg.format(type(value)))
         super(XMLResource, self).__setattr__(name, value)
 
+    def _access_control(self, url):
+        if self.allow == 'all':
+            return
+        elif self.allow == 'remote':
+            if is_remote_url(url):
+                return
+            raise XMLSchemaResourceError("block access to local resource {}".format(url))
+        elif is_remote_url(url):
+            raise XMLSchemaResourceError("block access to remote resource {}".format(url))
+        elif self.allow == 'local' or self._base_url is None:
+            return
+        else:
+            path = os.path.normpath(os.path.normcase(urlsplit(url).path))
+            base_path = os.path.normpath(os.path.normcase(urlsplit(self._base_url).path))
+            if not path.startswith(base_path):
+                raise XMLSchemaResourceError("block access to out of sandbox file {}".format(path))
+
     def _fromsource(self, source):
         url = None
         if hasattr(source, 'tag') and hasattr(source, 'attrib'):
@@ -358,6 +395,7 @@ class XMLResource(object):
                 # Save remote urls for open new resources (non seekable)
                 if is_remote_url(source.url):
                     url = source.url
+                    self._access_control(url)
             except AttributeError:
                 pass
 
@@ -388,17 +426,17 @@ class XMLResource(object):
                 "string containing XML data or an URL or a file-like object is required." % type(source)
             )
         else:
-            resource = urlopen(url, timeout=self.timeout)
+            self._access_control(url)
             _url, self._url = self._url, url
             try:
-                if self._lazy:
-                    for _, root in self.iterparse(resource, events=('start',)):
-                        return root, None, url
-                else:
-                    return self.parse(resource).getroot(), None, url
+                with urlopen(url, timeout=self.timeout) as resource:
+                    if self._lazy:
+                        for _, root in self.iterparse(resource, events=('start',)):
+                            return root, None, url
+                    else:
+                        return self.parse(resource).getroot(), None, url
             finally:
                 self._url = _url
-                resource.close()
 
     @property
     def root(self):
@@ -541,12 +579,14 @@ class XMLResource(object):
         if self.seek(0) == 0:
             return self.source
         elif self._url is None:
-            raise XMLSchemaValueError("can't open, the resource has no URL associated.")
+            raise XMLSchemaResourceError("can't open, the resource has no URL associated.")
 
         try:
             return urlopen(self._url, timeout=self.timeout)
         except URLError as err:
-            raise XMLSchemaURLError("cannot access to resource %r: %s" % (self._url, err.reason))
+            raise XMLSchemaResourceError(
+                "cannot access to resource %r: %s" % (self._url, err.reason)
+            )
 
     def seek(self, position):
         """
@@ -600,7 +640,7 @@ class XMLResource(object):
         try:
             data = resource.read()
         except (OSError, IOError) as err:
-            raise XMLSchemaOSError("cannot load data from %r: %s" % (self._url, err))
+            raise XMLSchemaResourceError("cannot load data from %r: %s" % (self._url, err))
         finally:
             # We don't want to close the file obj if it wasn't originally
             # opened by `XMLResource`. That is the concern of the code
@@ -823,7 +863,7 @@ class XMLResource(object):
         elif isinstance(self._text, string_base_type):
             resource = StringIO(self._text)
         else:
-            if hasattr(self._root, 'nsmap'):
+            try:
                 # Can extract namespace mapping information only from lxml etree structures
                 if root_only:
                     for k, v in self._root.nsmap.items():
@@ -832,6 +872,8 @@ class XMLResource(object):
                     for elem in self._root.iter():
                         for k, v in elem.nsmap.items():
                             update_nsmap(k if k is not None else '', v)
+            except AttributeError:
+                pass
 
             if nsmap.get('') == '':
                 del nsmap['']
diff --git a/xmlschema/tests/test_package.py b/xmlschema/tests/test_package.py
index b4a03c2..1b636a1 100644
--- a/xmlschema/tests/test_package.py
+++ b/xmlschema/tests/test_package.py
@@ -39,6 +39,7 @@ class TestPackaging(unittest.TestCase):
         exclude = {
             'regex.py': [240, 241],
             'codepoints.py': [543],
+            'cli.py': [117, 133, 137, 140],
         }
 
         message = "\nFound a debug missing statement at line %d or file %r: %r"
@@ -56,7 +57,9 @@ class TestPackaging(unittest.TestCase):
                 continue
 
             match = self.missing_debug.search(line)
-            self.assertIsNone(match, message % (lineno, filename, match.group(0) if match else None))
+            if match is None or filename.endswith('/cli.py') and match.group(0) == 'print(':
+                continue
+            self.assertIsNone(match, message % (lineno, filename, match.group(0)))
 
     def test_version(self):
         message = "\nFound a different version at line %d or file %r: %r (may be %r)."
diff --git a/xmlschema/tests/test_regex.py b/xmlschema/tests/test_regex.py
index b5bb6c8..c5d60a0 100644
--- a/xmlschema/tests/test_regex.py
+++ b/xmlschema/tests/test_regex.py
@@ -31,16 +31,19 @@ class TestCodePoints(unittest.TestCase):
     def test_iter_code_points(self):
         self.assertEqual(list(iter_code_points([10, 20, 11, 12, 25, (9, 21), 21])), [(9, 22), 25])
         self.assertEqual(list(iter_code_points([10, 20, 11, 12, 25, (9, 20), 21])), [(9, 22), 25])
-        self.assertEqual(list(iter_code_points({2, 120, 121, (150, 260)})), [2, (120, 122), (150, 260)])
+        self.assertEqual(list(iter_code_points({2, 120, 121, (150, 260)})),
+                         [2, (120, 122), (150, 260)])
         self.assertEqual(
             list(iter_code_points([10, 20, (10, 22), 11, 12, 25, 8, (9, 20), 21, 22, 9, 0])),
             [0, (8, 23), 25]
         )
         self.assertEqual(
-            list(e for e in iter_code_points([10, 20, 11, 12, 25, (9, 21)], reverse=True)), [25, (9, 21)]
+            list(e for e in iter_code_points([10, 20, 11, 12, 25, (9, 21)], reverse=True)),
+            [25, (9, 21)]
         )
         self.assertEqual(
-            list(iter_code_points([10, 20, (10, 22), 11, 12, 25, 8, (9, 20), 21, 22, 9, 0], reverse=True)),
+            list(iter_code_points([10, 20, (10, 22), 11, 12, 25, 8, (9, 20), 21, 22, 9, 0],
+                                  reverse=True)),
             [25, (8, 23), 0]
         )
 
@@ -89,9 +92,11 @@ class TestUnicodeSubset(unittest.TestCase):
 
     def test_complement(self):
         cds = UnicodeSubset([50, 90, 10, 90])
-        self.assertEqual(list(cds.complement()), [(0, 10), (11, 50), (51, 90), (91, sys.maxunicode + 1)])
+        self.assertEqual(list(cds.complement()),
+                         [(0, 10), (11, 50), (51, 90), (91, sys.maxunicode + 1)])
         cds.add(11)
-        self.assertEqual(list(cds.complement()), [(0, 10), (12, 50), (51, 90), (91, sys.maxunicode + 1)])
+        self.assertEqual(list(cds.complement()),
+                         [(0, 10), (12, 50), (51, 90), (91, sys.maxunicode + 1)])
         cds.add((0, 10))
         self.assertEqual(list(cds.complement()), [(12, 50), (51, 90), (91, sys.maxunicode + 1)])
 
@@ -142,20 +147,22 @@ class TestUnicodeCategories(unittest.TestCase):
     """
     def test_build_unicode_categories(self):
         categories = build_unicode_categories('not_existing_file.json')
-        self.assertEqual(sum(len(v) for k, v in categories.items() if len(k) > 1), sys.maxunicode + 1)
+        self.assertEqual(sum(len(v) for k, v in categories.items() if len(k) > 1),
+                         sys.maxunicode + 1)
         self.assertEqual(min([min(s) for s in categories.values()]), 0)
         self.assertEqual(max([max(s) for s in categories.values()]), sys.maxunicode)
         base_sets = [set(v) for k, v in categories.items() if len(k) > 1]
         self.assertFalse(any(s.intersection(t) for s in base_sets for t in base_sets if s != t))
 
     def test_unicode_categories(self):
-        self.assertEqual(sum(len(v) for k, v in UNICODE_CATEGORIES.items() if len(k) > 1), sys.maxunicode + 1)
+        self.assertEqual(sum(len(v) for k, v in UNICODE_CATEGORIES.items() if len(k) > 1),
+                         sys.maxunicode + 1)
         self.assertEqual(min([min(s) for s in UNICODE_CATEGORIES.values()]), 0)
         self.assertEqual(max([max(s) for s in UNICODE_CATEGORIES.values()]), sys.maxunicode)
         base_sets = [set(v) for k, v in UNICODE_CATEGORIES.items() if len(k) > 1]
         self.assertFalse(any(s.intersection(t) for s in base_sets for t in base_sets if s != t))
 
-    @unittest.skipIf(not ((3, 7) <= sys.version_info < (3, 8)), "Test only for Python 3.7")
+    @unittest.skipIf(not ((3, 8) <= sys.version_info < (3, 9)), "Test only for Python 3.8")
     def test_unicodedata_category(self):
         for key in UNICODE_CATEGORIES:
             for cp in UNICODE_CATEGORIES[key]:
@@ -205,7 +212,8 @@ class TestPatterns(unittest.TestCase):
         pattern = re.compile(regex)
         self.assertIsNone(pattern.search('alpha\r'))
         self.assertEqual(pattern.search('beta').group(0), 'beta')
-        self.assertEqual(pattern.search('beta\n').group(0), 'beta')  # $ matches also a \n at last position
+        self.assertEqual(pattern.search('beta\n').group(0),
+                         'beta')  # $ matches also a \n at last position
         self.assertIsNone(pattern.search('beta\n '))
         self.assertIsNone(pattern.search(''))
         self.assertIsNone(pattern.search('over the maximum length!'))
diff --git a/xmlschema/tests/test_resources.py b/xmlschema/tests/test_resources.py
index 98b2745..28fde20 100644
--- a/xmlschema/tests/test_resources.py
+++ b/xmlschema/tests/test_resources.py
@@ -25,7 +25,7 @@ except ImportError:
 
 from xmlschema import (
     fetch_namespaces, fetch_resource, normalize_url, fetch_schema, fetch_schema_locations,
-    load_xml_resource, XMLResource, XMLSchemaURLError, XMLSchema, XMLSchema10, XMLSchema11
+    load_xml_resource, XMLResource, XMLSchemaResourceError, XMLSchema, XMLSchema10, XMLSchema11
 )
 from xmlschema.tests import SKIP_REMOTE_TESTS, casepath
 from xmlschema.compat import urlopen, urlsplit, uses_relative, StringIO
@@ -146,7 +146,7 @@ class TestResources(unittest.TestCase):
 
     def test_fetch_resource(self):
         wrong_path = casepath('resources/dummy_file.txt')
-        self.assertRaises(XMLSchemaURLError, fetch_resource, wrong_path)
+        self.assertRaises(XMLSchemaResourceError, fetch_resource, wrong_path)
         right_path = casepath('resources/dummy file.txt')
         self.assertTrue(fetch_resource(right_path).endswith('dummy file.txt'))
 
@@ -378,11 +378,47 @@ class TestResources(unittest.TestCase):
         self.assertEqual(resource.namespace, 'http://example.com/ns/collection')
         self.assertEqual(XMLResource('<A/>').namespace, '')
 
+    def test_xml_resource_access(self):
+        resource = XMLResource(self.vh_xml_file)
+        base_url = resource.base_url
+
+        XMLResource(self.vh_xml_file, allow='local')
+        XMLResource(self.vh_xml_file, allow='sandbox')
+
+        with self.assertRaises(XMLSchemaResourceError) as ctx:
+            XMLResource(self.vh_xml_file, allow='remote')
+        self.assertTrue(str(ctx.exception).startswith("block access to local resource"))
+
+        with self.assertRaises(XMLSchemaResourceError) as ctx:
+            XMLResource("https://xmlschema.test/vehicles.xsd", allow='local')
+        self.assertEqual(str(ctx.exception),
+                         "block access to remote resource https://xmlschema.test/vehicles.xsd")
+
+        with self.assertRaises(XMLSchemaResourceError) as ctx:
+            XMLResource("https://xmlschema.test/vehicles.xsd", allow='sandbox')
+        self.assertEqual(str(ctx.exception),
+                         "block access to remote resource https://xmlschema.test/vehicles.xsd")
+
+        with self.assertRaises(XMLSchemaResourceError) as ctx:
+            XMLResource("/tmp/vehicles.xsd", base_url=base_url, allow='sandbox')
+        self.assertEqual(str(ctx.exception),
+                         "block access to out of sandbox file /tmp/vehicles.xsd")
+
+        with self.assertRaises(TypeError) as ctx:
+            XMLResource("https://xmlschema.test/vehicles.xsd", allow=None)
+        self.assertEqual(str(ctx.exception),
+                         "invalid type <class 'NoneType'> for the attribute 'allow'")
+
+        with self.assertRaises(ValueError) as ctx:
+            XMLResource("https://xmlschema.test/vehicles.xsd", allow='any')
+        self.assertEqual(str(ctx.exception),
+                         "'allow' attribute: 'any' is not a security mode")
+
     def test_xml_resource_defuse(self):
         resource = XMLResource(self.vh_xml_file, defuse='never')
         self.assertEqual(resource.defuse, 'never')
         self.assertRaises(ValueError, XMLResource, self.vh_xml_file, defuse='all')
-        self.assertRaises(ValueError, XMLResource, self.vh_xml_file, defuse=None)
+        self.assertRaises(TypeError, XMLResource, self.vh_xml_file, defuse=None)
         self.assertIsInstance(resource.root, etree_element)
         resource = XMLResource(self.vh_xml_file, defuse='always')
         self.assertIsInstance(resource.root, py_etree_element)
@@ -476,7 +512,7 @@ class TestResources(unittest.TestCase):
         self.assertTrue(data.startswith('<?xml '))
         xml_file.close()
         resource = XMLResource('<A/>')
-        self.assertRaises(ValueError, resource.open)
+        self.assertRaises(XMLSchemaResourceError, resource.open)
 
         resource = XMLResource(source=open(self.vh_xml_file))
         xml_file = resource.open()
diff --git a/xmlschema/validators/schema.py b/xmlschema/validators/schema.py
index 6b5c218..1222843 100644
--- a/xmlschema/validators/schema.py
+++ b/xmlschema/validators/schema.py
@@ -26,7 +26,7 @@ import sys
 
 
 from ..compat import add_metaclass
-from ..exceptions import XMLSchemaTypeError, XMLSchemaURLError, XMLSchemaKeyError, \
+from ..exceptions import XMLSchemaTypeError, XMLSchemaKeyError, \
     XMLSchemaValueError, XMLSchemaOSError, XMLSchemaNamespaceError
 from ..qnames import VC_MIN_VERSION, VC_MAX_VERSION, VC_TYPE_AVAILABLE, \
     VC_TYPE_UNAVAILABLE, VC_FACET_AVAILABLE, VC_FACET_UNAVAILABLE, XSD_SCHEMA, \
@@ -166,9 +166,16 @@ class XMLSchemaBase(XsdValidator, ValidationMixin, ElementPathMixin):
     :param base_url: is an optional base URL, used for the normalization of relative paths \
     when the URL of the schema resource can't be obtained from the source argument.
     :type base_url: str or None
-    :param defuse: defines when to defuse XML data. Can be 'always', 'remote' or 'never'. \
-    For default defuse only remote XML data.
-    :type defuse: str or None
+    :param allow: defines the security mode for accessing resource locations. Can be \
+    'all', 'remote', 'local' or 'sandbox'. Default is 'all' that means all types of \
+    URLs are allowed. With 'remote' only remote resource URLs are allowed. With 'local' \
+    only file paths and URLs are allowed. With 'sandbox' only file paths and URLs that \
+    are under the directory path identified by source or by the *base_url* argument \
+    are allowed.
+    :type allow: str
+    :param defuse: defines when to defuse XML data using a `SafeXMLParser`. Can be \
+    'always', 'remote' or 'never'. For default defuses only remote XML data.
+    :type defuse: str
     :param timeout: the timeout in seconds for fetching resources. Default is `300`.
     :type timeout: int
     :param build: defines whether build the schema maps. Default is `True`.
@@ -273,7 +280,7 @@ class XMLSchemaBase(XsdValidator, ValidationMixin, ElementPathMixin):
     xpath_tokens = None
 
     def __init__(self, source, namespace=None, validation='strict', global_maps=None,
-                 converter=None, locations=None, base_url=None, defuse='remote',
+                 converter=None, locations=None, base_url=None, allow='all', defuse='remote',
                  timeout=300, build=True, use_meta=True, loglevel=None):
         super(XMLSchemaBase, self).__init__(validation)
         ElementPathMixin.__init__(self)
@@ -283,7 +290,7 @@ class XMLSchemaBase(XsdValidator, ValidationMixin, ElementPathMixin):
         elif build and global_maps is None:
             logger.setLevel(logging.WARNING)
 
-        self.source = XMLResource(source, base_url, defuse, timeout, lazy=False)
+        self.source = XMLResource(source, base_url, allow, defuse, timeout, lazy=False)
         logger.debug("Read schema from %r", self.source)
 
         self.imports = {}
@@ -500,6 +507,11 @@ class XMLSchemaBase(XsdValidator, ValidationMixin, ElementPathMixin):
         """The base URL of the source of the schema."""
         return self.source.base_url
 
+    @property
+    def allow(self):
+        """Defines the resource access security mode, can be 'all', 'local' or 'sandbox'."""
+        return self.source.allow
+
     @property
     def defuse(self):
         """Defines when to defuse XML data, can be 'always', 'remote' or 'never'."""
@@ -605,6 +617,14 @@ class XMLSchemaBase(XsdValidator, ValidationMixin, ElementPathMixin):
                       "and will be removed in 1.1 version.", DeprecationWarning)
         return self.identities
 
+    @property
+    def simple_types(self):
+        return [x for x in self.types.values() if x.is_simple()]
+
+    @property
+    def complex_types(self):
+        return [x for x in self.types.values() if x.is_complex()]
+
     @classmethod
     def create_meta_schema(cls, source=None, base_schemas=None, global_maps=None):
         """
@@ -896,7 +916,7 @@ class XMLSchemaBase(XsdValidator, ValidationMixin, ElementPathMixin):
                 #   https://www.w3.org/TR/2012/REC-xmlschema11-1-20120405/#src-include
                 self.warnings.append("Include schema failed: %s." % str(err))
                 warnings.warn(self.warnings[-1], XMLSchemaIncludeWarning, stacklevel=3)
-            except (XMLSchemaURLError, XMLSchemaParseError, XMLSchemaTypeError, ParseError) as err:
+            except (XMLSchemaParseError, XMLSchemaTypeError, ParseError) as err:
                 msg = 'cannot include schema %r: %s' % (child.attrib['schemaLocation'], err)
                 if isinstance(err, (XMLSchemaParseError, ParseError)):
                     self.parse_error(msg)
@@ -919,7 +939,7 @@ class XMLSchemaBase(XsdValidator, ValidationMixin, ElementPathMixin):
                 warnings.warn(self.warnings[-1], XMLSchemaIncludeWarning, stacklevel=3)
                 if any(e.tag != XSD_ANNOTATION for e in child):
                     self.parse_error(str(err), child)
-            except (XMLSchemaURLError, XMLSchemaParseError, XMLSchemaTypeError, ParseError) as err:
+            except (XMLSchemaParseError, XMLSchemaTypeError, ParseError) as err:
                 msg = 'cannot redefine schema %r: %s' % (child.attrib['schemaLocation'], err)
                 if isinstance(err, (XMLSchemaParseError, ParseError)):
                     self.parse_error(msg)
@@ -950,6 +970,7 @@ class XMLSchemaBase(XsdValidator, ValidationMixin, ElementPathMixin):
                 global_maps=self.maps,
                 converter=self.converter,
                 base_url=self.base_url,
+                allow=self.allow,
                 defuse=self.defuse,
                 timeout=self.timeout,
                 build=False,
@@ -1081,6 +1102,7 @@ class XMLSchemaBase(XsdValidator, ValidationMixin, ElementPathMixin):
             global_maps=self.maps,
             converter=self.converter,
             base_url=self.base_url,
+            allow=self.allow,
             defuse=self.defuse,
             timeout=self.timeout,
             build=build,
-- 
2.29.2

openSUSE Build Service is sponsored by