File 0001-Add-XML-resource-access-control.patch of Package python-xmlschema
From 848a5354f1906faa67e5becd04ea9127b8e5e40e Mon Sep 17 00:00:00 2001
From: Davide Brunato <brunato@sissa.it>
Date: Mon, 25 May 2020 23:21:41 +0200
Subject: [PATCH 1/4] Add XML resource access control
- Added allow='all' argument to schema and XMLResource
- Remove XMLSchemaURLError, replaced by XMLSchemaResourceError
- Added complex_types and simple_types properties to schemas
- Fix package and regex test scripts
(cherry picked from commit 6e3ff636e24bc689f54cfb9a11fd025b10c56273)
---
xmlschema/__init__.py | 4 +-
xmlschema/exceptions.py | 4 +-
xmlschema/resources.py | 98 ++++++++++++++++++++++---------
xmlschema/tests/test_package.py | 5 +-
xmlschema/tests/test_regex.py | 26 +++++---
xmlschema/tests/test_resources.py | 44 ++++++++++++--
xmlschema/validators/schema.py | 38 +++++++++---
7 files changed, 165 insertions(+), 54 deletions(-)
diff --git a/xmlschema/__init__.py b/xmlschema/__init__.py
index e56cc62..6b94fbe 100644
--- a/xmlschema/__init__.py
+++ b/xmlschema/__init__.py
@@ -9,8 +9,8 @@
# @author Davide Brunato <brunato@sissa.it>
#
from . import limits
-from .exceptions import XMLSchemaException, XMLSchemaRegexError, XMLSchemaURLError, \
- XMLSchemaNamespaceError
+from .exceptions import XMLSchemaException, XMLSchemaRegexError, \
+ XMLSchemaResourceError, XMLSchemaNamespaceError
from .etree import etree_tostring
from .resources import (
normalize_url, fetch_resource, load_xml_resource, fetch_namespaces,
diff --git a/xmlschema/exceptions.py b/xmlschema/exceptions.py
index 53dd563..83d5f10 100644
--- a/xmlschema/exceptions.py
+++ b/xmlschema/exceptions.py
@@ -46,8 +46,8 @@ class XMLSchemaIndexError(XMLSchemaException, ImportError):
pass
-class XMLSchemaURLError(XMLSchemaException, URLError):
- pass
+class XMLSchemaResourceError(XMLSchemaException, OSError):
+ """Raised when an error is found accessing an XML resource."""
class XMLSchemaRegexError(XMLSchemaException, ValueError):
diff --git a/xmlschema/resources.py b/xmlschema/resources.py
index 7e28c33..52dff6f 100644
--- a/xmlschema/resources.py
+++ b/xmlschema/resources.py
@@ -17,13 +17,13 @@ from .compat import (
PY3, StringIO, BytesIO, string_base_type, urlopen, urlsplit, urljoin, urlunsplit,
pathname2url, URLError, uses_relative
)
-from .exceptions import XMLSchemaTypeError, XMLSchemaValueError, XMLSchemaURLError, XMLSchemaOSError
+from .exceptions import XMLSchemaTypeError, XMLSchemaValueError, XMLSchemaResourceError
from .namespaces import get_namespace
from .etree import ElementTree, PyElementTree, SafeXMLParser, etree_tostring, etree_iter_location_hints
-DEFUSE_MODES = ('always', 'remote', 'never')
-
+DEFUSE_MODES = ('never', 'remote', 'always')
+SECURITY_MODES = ('all', 'remote', 'local', 'sandbox')
XML_RESOURCE_XPATH_SYMBOLS = {
'position', 'last', 'not', 'and', 'or', '!=', '<=', '>=', '(', ')', 'text',
@@ -140,7 +140,7 @@ def normalize_url(url, base_url=None, keep_relative=False):
def fetch_resource(location, base_url=None, timeout=30):
"""
Fetch a resource trying to accessing it. If the resource is accessible
- returns the URL, otherwise raises an error (XMLSchemaURLError).
+ returns the URL, otherwise raises an error (XMLSchemaResourceError).
:param location: an URL or a file path.
:param base_url: reference base URL for normalizing local and relative URLs.
@@ -156,10 +156,13 @@ def fetch_resource(location, base_url=None, timeout=30):
except URLError as err:
# fallback joining the path without a base URL
alt_url = normalize_url(location)
+ if url == alt_url:
+ raise XMLSchemaResourceError("cannot access to resource %r: %s" % (url, err.reason))
+
try:
resource = urlopen(alt_url, timeout=timeout)
except URLError:
- raise XMLSchemaURLError("cannot access to resource %r: %s" % (url, err.reason))
+ raise XMLSchemaResourceError("cannot access to resource %r: %s" % (url, err.reason))
else:
resource.close()
return url
@@ -168,7 +171,8 @@ def fetch_resource(location, base_url=None, timeout=30):
return url
-def fetch_schema_locations(source, locations=None, base_url=None, defuse='remote', timeout=30):
+def fetch_schema_locations(source, locations=None, base_url=None,
+ allow='all', defuse='remote', timeout=30):
"""
Fetches schema location hints from an XML data source and a list of location hints.
If an accessible schema location is not found raises a ValueError.
@@ -179,13 +183,14 @@ def fetch_schema_locations(source, locations=None, base_url=None, defuse='remote
instance a new one is built using this and *defuse*, *timeout* and *lazy* arguments.
:param locations: a dictionary or dictionary items with additional schema location hints.
:param base_url: the same argument of the :class:`XMLResource`.
+ :param allow: the same argument of the :class:`XMLResource`.
:param defuse: the same argument of the :class:`XMLResource`.
:param timeout: the same argument of the :class:`XMLResource` but with a reduced default.
:return: A 2-tuple with the URL referring to the first reachable schema resource \
and a list of dictionary items with normalized location hints.
"""
if not isinstance(source, XMLResource):
- resource = XMLResource(source, base_url, defuse, timeout)
+ resource = XMLResource(source, base_url, allow, defuse, timeout)
else:
resource = source
@@ -199,28 +204,28 @@ def fetch_schema_locations(source, locations=None, base_url=None, defuse='remote
for ns, url in sorted(locations, key=lambda x: x[0] != namespace):
try:
return fetch_resource(url, base_url, timeout), locations
- except XMLSchemaURLError:
+ except XMLSchemaResourceError:
pass
raise XMLSchemaValueError("not found a schema for XML data resource {!r}.".format(source))
-def fetch_schema(source, locations=None, base_url=None, defuse='remote', timeout=30):
+def fetch_schema(source, locations=None, base_url=None, allow='all', defuse='remote', timeout=30):
"""
Like :meth:`fetch_schema_locations` but returns only a reachable
location hint for a schema related to the source's namespace.
"""
- return fetch_schema_locations(source, locations, base_url, defuse, timeout)[0]
+ return fetch_schema_locations(source, locations, base_url, allow, defuse, timeout)[0]
-def fetch_namespaces(source, base_url=None, defuse='remote', timeout=30):
+def fetch_namespaces(source, base_url=None, allow='all', defuse='remote', timeout=30):
"""
Fetches namespaces information from the XML data source. The argument *source*
can be a string containing the XML document or file path or an url or a file-like
object or an ElementTree instance or an Element instance. A dictionary with
namespace mappings is returned.
"""
- resource = XMLResource(source, base_url, defuse, timeout)
+ resource = XMLResource(source, base_url, allow, defuse, timeout)
return resource.get_namespaces()
@@ -256,9 +261,15 @@ class XMLResource(object):
:param source: a string containing the XML document or file path or an URL or a \
file like object or an ElementTree or an Element.
:param base_url: is an optional base URL, used for the normalization of relative paths \
- when the URL of the resource can't be obtained from the source argument.
- :param defuse: set the usage of SafeXMLParser for XML data. Can be 'always', 'remote' \
- or 'never'. Default is 'remote' that uses the defusedxml only when loading remote data.
+ when the URL of the resource can't be obtained from the source argument. For security \
+ access to a local file resource is always denied if the *base_url* is a remote URL.
+ :param allow: defines the security mode for accessing resource locations. Can be \
+ 'all', 'remote', 'local' or 'sandbox'. Default is 'all' that means all types of \
+ URLs are allowed. With 'remote' only remote resource URLs are allowed. With 'local' \
+ only file paths and URLs are allowed. With 'sandbox' only file paths and URLs that \
+ are under the directory path identified by the *base_url* argument are allowed.
+ :param defuse: defines when to defuse XML data using a `SafeXMLParser`. Can be \
+ 'always', 'remote' or 'never'. For default defuses only remote XML data.
:param timeout: the timeout in seconds for the connection attempt in case of remote data.
:param lazy: if a value `False` is provided the XML data is fully loaded into and \
processed from memory. For default only the root element of the source is loaded, \
@@ -266,8 +277,10 @@ class XMLResource(object):
"""
_root = _text = _url = None
- def __init__(self, source, base_url=None, defuse='remote', timeout=300, lazy=True):
+ def __init__(self, source, base_url=None, allow='all',
+ defuse='remote', timeout=300, lazy=True):
self._base_url = base_url
+ self.allow = allow
self.defuse = defuse
self.timeout = timeout
self._lazy = lazy
@@ -300,8 +313,15 @@ class XMLResource(object):
if value is not None and not isinstance(value, string_base_type):
msg = "invalid type {!r} for the attribute 'base_url'"
raise XMLSchemaTypeError(msg.format(type(value)))
+ elif name == 'allow':
+ if not isinstance(value, str):
+ msg = "invalid type {!r} for the attribute 'allow'"
+ raise XMLSchemaTypeError(msg.format(type(value)))
+ elif value not in SECURITY_MODES:
+ msg = "'allow' attribute: {!r} is not a security mode"
+ raise XMLSchemaValueError(msg.format(value))
elif name == 'defuse':
- if value is not None and not isinstance(value, string_base_type):
+ if not isinstance(value, string_base_type):
msg = "invalid type {!r} for the attribute 'defuse'"
raise XMLSchemaTypeError(msg.format(type(value)))
elif value not in DEFUSE_MODES:
@@ -319,6 +339,23 @@ class XMLResource(object):
raise XMLSchemaValueError(msg.format(type(value)))
super(XMLResource, self).__setattr__(name, value)
+ def _access_control(self, url):
+ if self.allow == 'all':
+ return
+ elif self.allow == 'remote':
+ if is_remote_url(url):
+ return
+ raise XMLSchemaResourceError("block access to local resource {}".format(url))
+ elif is_remote_url(url):
+ raise XMLSchemaResourceError("block access to remote resource {}".format(url))
+ elif self.allow == 'local' or self._base_url is None:
+ return
+ else:
+ path = os.path.normpath(os.path.normcase(urlsplit(url).path))
+ base_path = os.path.normpath(os.path.normcase(urlsplit(self._base_url).path))
+ if not path.startswith(base_path):
+ raise XMLSchemaResourceError("block access to out of sandbox file {}".format(path))
+
def _fromsource(self, source):
url = None
if hasattr(source, 'tag') and hasattr(source, 'attrib'):
@@ -358,6 +395,7 @@ class XMLResource(object):
# Save remote urls for open new resources (non seekable)
if is_remote_url(source.url):
url = source.url
+ self._access_control(url)
except AttributeError:
pass
@@ -388,17 +426,17 @@ class XMLResource(object):
"string containing XML data or an URL or a file-like object is required." % type(source)
)
else:
- resource = urlopen(url, timeout=self.timeout)
+ self._access_control(url)
_url, self._url = self._url, url
try:
- if self._lazy:
- for _, root in self.iterparse(resource, events=('start',)):
- return root, None, url
- else:
- return self.parse(resource).getroot(), None, url
+ with urlopen(url, timeout=self.timeout) as resource:
+ if self._lazy:
+ for _, root in self.iterparse(resource, events=('start',)):
+ return root, None, url
+ else:
+ return self.parse(resource).getroot(), None, url
finally:
self._url = _url
- resource.close()
@property
def root(self):
@@ -541,12 +579,14 @@ class XMLResource(object):
if self.seek(0) == 0:
return self.source
elif self._url is None:
- raise XMLSchemaValueError("can't open, the resource has no URL associated.")
+ raise XMLSchemaResourceError("can't open, the resource has no URL associated.")
try:
return urlopen(self._url, timeout=self.timeout)
except URLError as err:
- raise XMLSchemaURLError("cannot access to resource %r: %s" % (self._url, err.reason))
+ raise XMLSchemaResourceError(
+ "cannot access to resource %r: %s" % (self._url, err.reason)
+ )
def seek(self, position):
"""
@@ -600,7 +640,7 @@ class XMLResource(object):
try:
data = resource.read()
except (OSError, IOError) as err:
- raise XMLSchemaOSError("cannot load data from %r: %s" % (self._url, err))
+ raise XMLSchemaResourceError("cannot load data from %r: %s" % (self._url, err))
finally:
# We don't want to close the file obj if it wasn't originally
# opened by `XMLResource`. That is the concern of the code
@@ -823,7 +863,7 @@ class XMLResource(object):
elif isinstance(self._text, string_base_type):
resource = StringIO(self._text)
else:
- if hasattr(self._root, 'nsmap'):
+ try:
# Can extract namespace mapping information only from lxml etree structures
if root_only:
for k, v in self._root.nsmap.items():
@@ -832,6 +872,8 @@ class XMLResource(object):
for elem in self._root.iter():
for k, v in elem.nsmap.items():
update_nsmap(k if k is not None else '', v)
+ except AttributeError:
+ pass
if nsmap.get('') == '':
del nsmap['']
diff --git a/xmlschema/tests/test_package.py b/xmlschema/tests/test_package.py
index b4a03c2..1b636a1 100644
--- a/xmlschema/tests/test_package.py
+++ b/xmlschema/tests/test_package.py
@@ -39,6 +39,7 @@ class TestPackaging(unittest.TestCase):
exclude = {
'regex.py': [240, 241],
'codepoints.py': [543],
+ 'cli.py': [117, 133, 137, 140],
}
message = "\nFound a debug missing statement at line %d or file %r: %r"
@@ -56,7 +57,9 @@ class TestPackaging(unittest.TestCase):
continue
match = self.missing_debug.search(line)
- self.assertIsNone(match, message % (lineno, filename, match.group(0) if match else None))
+ if match is None or filename.endswith('/cli.py') and match.group(0) == 'print(':
+ continue
+ self.assertIsNone(match, message % (lineno, filename, match.group(0)))
def test_version(self):
message = "\nFound a different version at line %d or file %r: %r (may be %r)."
diff --git a/xmlschema/tests/test_regex.py b/xmlschema/tests/test_regex.py
index b5bb6c8..c5d60a0 100644
--- a/xmlschema/tests/test_regex.py
+++ b/xmlschema/tests/test_regex.py
@@ -31,16 +31,19 @@ class TestCodePoints(unittest.TestCase):
def test_iter_code_points(self):
self.assertEqual(list(iter_code_points([10, 20, 11, 12, 25, (9, 21), 21])), [(9, 22), 25])
self.assertEqual(list(iter_code_points([10, 20, 11, 12, 25, (9, 20), 21])), [(9, 22), 25])
- self.assertEqual(list(iter_code_points({2, 120, 121, (150, 260)})), [2, (120, 122), (150, 260)])
+ self.assertEqual(list(iter_code_points({2, 120, 121, (150, 260)})),
+ [2, (120, 122), (150, 260)])
self.assertEqual(
list(iter_code_points([10, 20, (10, 22), 11, 12, 25, 8, (9, 20), 21, 22, 9, 0])),
[0, (8, 23), 25]
)
self.assertEqual(
- list(e for e in iter_code_points([10, 20, 11, 12, 25, (9, 21)], reverse=True)), [25, (9, 21)]
+ list(e for e in iter_code_points([10, 20, 11, 12, 25, (9, 21)], reverse=True)),
+ [25, (9, 21)]
)
self.assertEqual(
- list(iter_code_points([10, 20, (10, 22), 11, 12, 25, 8, (9, 20), 21, 22, 9, 0], reverse=True)),
+ list(iter_code_points([10, 20, (10, 22), 11, 12, 25, 8, (9, 20), 21, 22, 9, 0],
+ reverse=True)),
[25, (8, 23), 0]
)
@@ -89,9 +92,11 @@ class TestUnicodeSubset(unittest.TestCase):
def test_complement(self):
cds = UnicodeSubset([50, 90, 10, 90])
- self.assertEqual(list(cds.complement()), [(0, 10), (11, 50), (51, 90), (91, sys.maxunicode + 1)])
+ self.assertEqual(list(cds.complement()),
+ [(0, 10), (11, 50), (51, 90), (91, sys.maxunicode + 1)])
cds.add(11)
- self.assertEqual(list(cds.complement()), [(0, 10), (12, 50), (51, 90), (91, sys.maxunicode + 1)])
+ self.assertEqual(list(cds.complement()),
+ [(0, 10), (12, 50), (51, 90), (91, sys.maxunicode + 1)])
cds.add((0, 10))
self.assertEqual(list(cds.complement()), [(12, 50), (51, 90), (91, sys.maxunicode + 1)])
@@ -142,20 +147,22 @@ class TestUnicodeCategories(unittest.TestCase):
"""
def test_build_unicode_categories(self):
categories = build_unicode_categories('not_existing_file.json')
- self.assertEqual(sum(len(v) for k, v in categories.items() if len(k) > 1), sys.maxunicode + 1)
+ self.assertEqual(sum(len(v) for k, v in categories.items() if len(k) > 1),
+ sys.maxunicode + 1)
self.assertEqual(min([min(s) for s in categories.values()]), 0)
self.assertEqual(max([max(s) for s in categories.values()]), sys.maxunicode)
base_sets = [set(v) for k, v in categories.items() if len(k) > 1]
self.assertFalse(any(s.intersection(t) for s in base_sets for t in base_sets if s != t))
def test_unicode_categories(self):
- self.assertEqual(sum(len(v) for k, v in UNICODE_CATEGORIES.items() if len(k) > 1), sys.maxunicode + 1)
+ self.assertEqual(sum(len(v) for k, v in UNICODE_CATEGORIES.items() if len(k) > 1),
+ sys.maxunicode + 1)
self.assertEqual(min([min(s) for s in UNICODE_CATEGORIES.values()]), 0)
self.assertEqual(max([max(s) for s in UNICODE_CATEGORIES.values()]), sys.maxunicode)
base_sets = [set(v) for k, v in UNICODE_CATEGORIES.items() if len(k) > 1]
self.assertFalse(any(s.intersection(t) for s in base_sets for t in base_sets if s != t))
- @unittest.skipIf(not ((3, 7) <= sys.version_info < (3, 8)), "Test only for Python 3.7")
+ @unittest.skipIf(not ((3, 8) <= sys.version_info < (3, 9)), "Test only for Python 3.8")
def test_unicodedata_category(self):
for key in UNICODE_CATEGORIES:
for cp in UNICODE_CATEGORIES[key]:
@@ -205,7 +212,8 @@ class TestPatterns(unittest.TestCase):
pattern = re.compile(regex)
self.assertIsNone(pattern.search('alpha\r'))
self.assertEqual(pattern.search('beta').group(0), 'beta')
- self.assertEqual(pattern.search('beta\n').group(0), 'beta') # $ matches also a \n at last position
+ self.assertEqual(pattern.search('beta\n').group(0),
+ 'beta') # $ matches also a \n at last position
self.assertIsNone(pattern.search('beta\n '))
self.assertIsNone(pattern.search(''))
self.assertIsNone(pattern.search('over the maximum length!'))
diff --git a/xmlschema/tests/test_resources.py b/xmlschema/tests/test_resources.py
index 98b2745..28fde20 100644
--- a/xmlschema/tests/test_resources.py
+++ b/xmlschema/tests/test_resources.py
@@ -25,7 +25,7 @@ except ImportError:
from xmlschema import (
fetch_namespaces, fetch_resource, normalize_url, fetch_schema, fetch_schema_locations,
- load_xml_resource, XMLResource, XMLSchemaURLError, XMLSchema, XMLSchema10, XMLSchema11
+ load_xml_resource, XMLResource, XMLSchemaResourceError, XMLSchema, XMLSchema10, XMLSchema11
)
from xmlschema.tests import SKIP_REMOTE_TESTS, casepath
from xmlschema.compat import urlopen, urlsplit, uses_relative, StringIO
@@ -146,7 +146,7 @@ class TestResources(unittest.TestCase):
def test_fetch_resource(self):
wrong_path = casepath('resources/dummy_file.txt')
- self.assertRaises(XMLSchemaURLError, fetch_resource, wrong_path)
+ self.assertRaises(XMLSchemaResourceError, fetch_resource, wrong_path)
right_path = casepath('resources/dummy file.txt')
self.assertTrue(fetch_resource(right_path).endswith('dummy file.txt'))
@@ -378,11 +378,47 @@ class TestResources(unittest.TestCase):
self.assertEqual(resource.namespace, 'http://example.com/ns/collection')
self.assertEqual(XMLResource('<A/>').namespace, '')
+ def test_xml_resource_access(self):
+ resource = XMLResource(self.vh_xml_file)
+ base_url = resource.base_url
+
+ XMLResource(self.vh_xml_file, allow='local')
+ XMLResource(self.vh_xml_file, allow='sandbox')
+
+ with self.assertRaises(XMLSchemaResourceError) as ctx:
+ XMLResource(self.vh_xml_file, allow='remote')
+ self.assertTrue(str(ctx.exception).startswith("block access to local resource"))
+
+ with self.assertRaises(XMLSchemaResourceError) as ctx:
+ XMLResource("https://xmlschema.test/vehicles.xsd", allow='local')
+ self.assertEqual(str(ctx.exception),
+ "block access to remote resource https://xmlschema.test/vehicles.xsd")
+
+ with self.assertRaises(XMLSchemaResourceError) as ctx:
+ XMLResource("https://xmlschema.test/vehicles.xsd", allow='sandbox')
+ self.assertEqual(str(ctx.exception),
+ "block access to remote resource https://xmlschema.test/vehicles.xsd")
+
+ with self.assertRaises(XMLSchemaResourceError) as ctx:
+ XMLResource("/tmp/vehicles.xsd", base_url=base_url, allow='sandbox')
+ self.assertEqual(str(ctx.exception),
+ "block access to out of sandbox file /tmp/vehicles.xsd")
+
+ with self.assertRaises(TypeError) as ctx:
+ XMLResource("https://xmlschema.test/vehicles.xsd", allow=None)
+ self.assertEqual(str(ctx.exception),
+ "invalid type <class 'NoneType'> for the attribute 'allow'")
+
+ with self.assertRaises(ValueError) as ctx:
+ XMLResource("https://xmlschema.test/vehicles.xsd", allow='any')
+ self.assertEqual(str(ctx.exception),
+ "'allow' attribute: 'any' is not a security mode")
+
def test_xml_resource_defuse(self):
resource = XMLResource(self.vh_xml_file, defuse='never')
self.assertEqual(resource.defuse, 'never')
self.assertRaises(ValueError, XMLResource, self.vh_xml_file, defuse='all')
- self.assertRaises(ValueError, XMLResource, self.vh_xml_file, defuse=None)
+ self.assertRaises(TypeError, XMLResource, self.vh_xml_file, defuse=None)
self.assertIsInstance(resource.root, etree_element)
resource = XMLResource(self.vh_xml_file, defuse='always')
self.assertIsInstance(resource.root, py_etree_element)
@@ -476,7 +512,7 @@ class TestResources(unittest.TestCase):
self.assertTrue(data.startswith('<?xml '))
xml_file.close()
resource = XMLResource('<A/>')
- self.assertRaises(ValueError, resource.open)
+ self.assertRaises(XMLSchemaResourceError, resource.open)
resource = XMLResource(source=open(self.vh_xml_file))
xml_file = resource.open()
diff --git a/xmlschema/validators/schema.py b/xmlschema/validators/schema.py
index 6b5c218..1222843 100644
--- a/xmlschema/validators/schema.py
+++ b/xmlschema/validators/schema.py
@@ -26,7 +26,7 @@ import sys
from ..compat import add_metaclass
-from ..exceptions import XMLSchemaTypeError, XMLSchemaURLError, XMLSchemaKeyError, \
+from ..exceptions import XMLSchemaTypeError, XMLSchemaKeyError, \
XMLSchemaValueError, XMLSchemaOSError, XMLSchemaNamespaceError
from ..qnames import VC_MIN_VERSION, VC_MAX_VERSION, VC_TYPE_AVAILABLE, \
VC_TYPE_UNAVAILABLE, VC_FACET_AVAILABLE, VC_FACET_UNAVAILABLE, XSD_SCHEMA, \
@@ -166,9 +166,16 @@ class XMLSchemaBase(XsdValidator, ValidationMixin, ElementPathMixin):
:param base_url: is an optional base URL, used for the normalization of relative paths \
when the URL of the schema resource can't be obtained from the source argument.
:type base_url: str or None
- :param defuse: defines when to defuse XML data. Can be 'always', 'remote' or 'never'. \
- For default defuse only remote XML data.
- :type defuse: str or None
+ :param allow: defines the security mode for accessing resource locations. Can be \
+ 'all', 'remote', 'local' or 'sandbox'. Default is 'all' that means all types of \
+ URLs are allowed. With 'remote' only remote resource URLs are allowed. With 'local' \
+ only file paths and URLs are allowed. With 'sandbox' only file paths and URLs that \
+ are under the directory path identified by source or by the *base_url* argument \
+ are allowed.
+ :type allow: str
+ :param defuse: defines when to defuse XML data using a `SafeXMLParser`. Can be \
+ 'always', 'remote' or 'never'. For default defuses only remote XML data.
+ :type defuse: str
:param timeout: the timeout in seconds for fetching resources. Default is `300`.
:type timeout: int
:param build: defines whether build the schema maps. Default is `True`.
@@ -273,7 +280,7 @@ class XMLSchemaBase(XsdValidator, ValidationMixin, ElementPathMixin):
xpath_tokens = None
def __init__(self, source, namespace=None, validation='strict', global_maps=None,
- converter=None, locations=None, base_url=None, defuse='remote',
+ converter=None, locations=None, base_url=None, allow='all', defuse='remote',
timeout=300, build=True, use_meta=True, loglevel=None):
super(XMLSchemaBase, self).__init__(validation)
ElementPathMixin.__init__(self)
@@ -283,7 +290,7 @@ class XMLSchemaBase(XsdValidator, ValidationMixin, ElementPathMixin):
elif build and global_maps is None:
logger.setLevel(logging.WARNING)
- self.source = XMLResource(source, base_url, defuse, timeout, lazy=False)
+ self.source = XMLResource(source, base_url, allow, defuse, timeout, lazy=False)
logger.debug("Read schema from %r", self.source)
self.imports = {}
@@ -500,6 +507,11 @@ class XMLSchemaBase(XsdValidator, ValidationMixin, ElementPathMixin):
"""The base URL of the source of the schema."""
return self.source.base_url
+ @property
+ def allow(self):
+ """Defines the resource access security mode, can be 'all', 'local' or 'sandbox'."""
+ return self.source.allow
+
@property
def defuse(self):
"""Defines when to defuse XML data, can be 'always', 'remote' or 'never'."""
@@ -605,6 +617,14 @@ class XMLSchemaBase(XsdValidator, ValidationMixin, ElementPathMixin):
"and will be removed in 1.1 version.", DeprecationWarning)
return self.identities
+ @property
+ def simple_types(self):
+ return [x for x in self.types.values() if x.is_simple()]
+
+ @property
+ def complex_types(self):
+ return [x for x in self.types.values() if x.is_complex()]
+
@classmethod
def create_meta_schema(cls, source=None, base_schemas=None, global_maps=None):
"""
@@ -896,7 +916,7 @@ class XMLSchemaBase(XsdValidator, ValidationMixin, ElementPathMixin):
# https://www.w3.org/TR/2012/REC-xmlschema11-1-20120405/#src-include
self.warnings.append("Include schema failed: %s." % str(err))
warnings.warn(self.warnings[-1], XMLSchemaIncludeWarning, stacklevel=3)
- except (XMLSchemaURLError, XMLSchemaParseError, XMLSchemaTypeError, ParseError) as err:
+ except (XMLSchemaParseError, XMLSchemaTypeError, ParseError) as err:
msg = 'cannot include schema %r: %s' % (child.attrib['schemaLocation'], err)
if isinstance(err, (XMLSchemaParseError, ParseError)):
self.parse_error(msg)
@@ -919,7 +939,7 @@ class XMLSchemaBase(XsdValidator, ValidationMixin, ElementPathMixin):
warnings.warn(self.warnings[-1], XMLSchemaIncludeWarning, stacklevel=3)
if any(e.tag != XSD_ANNOTATION for e in child):
self.parse_error(str(err), child)
- except (XMLSchemaURLError, XMLSchemaParseError, XMLSchemaTypeError, ParseError) as err:
+ except (XMLSchemaParseError, XMLSchemaTypeError, ParseError) as err:
msg = 'cannot redefine schema %r: %s' % (child.attrib['schemaLocation'], err)
if isinstance(err, (XMLSchemaParseError, ParseError)):
self.parse_error(msg)
@@ -950,6 +970,7 @@ class XMLSchemaBase(XsdValidator, ValidationMixin, ElementPathMixin):
global_maps=self.maps,
converter=self.converter,
base_url=self.base_url,
+ allow=self.allow,
defuse=self.defuse,
timeout=self.timeout,
build=False,
@@ -1081,6 +1102,7 @@ class XMLSchemaBase(XsdValidator, ValidationMixin, ElementPathMixin):
global_maps=self.maps,
converter=self.converter,
base_url=self.base_url,
+ allow=self.allow,
defuse=self.defuse,
timeout=self.timeout,
build=build,
--
2.29.2