diff options
author | Stefan Behnel <stefan_ml@behnel.de> | 2019-11-25 15:36:25 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2019-11-25 15:36:25 (GMT) |
commit | c6a7bdb356835c9d7513b1ea6846683d446fe6c3 (patch) | |
tree | 3e98d7da17cbffbfc4ffd97392c5869caccb5185 /Lib/xml | |
parent | ded8888fbc33011dd39b7b1c86a5adfacc4943f3 (diff) | |
download | cpython-c6a7bdb356835c9d7513b1ea6846683d446fe6c3.zip cpython-c6a7bdb356835c9d7513b1ea6846683d446fe6c3.tar.gz cpython-c6a7bdb356835c9d7513b1ea6846683d446fe6c3.tar.bz2 |
bpo-20928: support base-URL and recursive includes in etree.ElementInclude (#5723)
* bpo-20928: bring elementtree's XInclude support en-par with the implementation in lxml by adding support for recursive includes and a base-URL.
* bpo-20928: Support xincluding the same file multiple times, just not recursively.
* bpo-20928: Add 'max_depth' parameter to xinclude that limits the maximum recursion depth to 6 by default.
* Add news entry for updated ElementInclude support
Diffstat (limited to 'Lib/xml')
-rw-r--r-- | Lib/xml/etree/ElementInclude.py | 56 |
1 files changed, 49 insertions, 7 deletions
diff --git a/Lib/xml/etree/ElementInclude.py b/Lib/xml/etree/ElementInclude.py index 963470e..5303062 100644 --- a/Lib/xml/etree/ElementInclude.py +++ b/Lib/xml/etree/ElementInclude.py @@ -50,18 +50,28 @@ import copy from . import ElementTree +from urllib.parse import urljoin XINCLUDE = "{http://www.w3.org/2001/XInclude}" XINCLUDE_INCLUDE = XINCLUDE + "include" XINCLUDE_FALLBACK = XINCLUDE + "fallback" +# For security reasons, the inclusion depth is limited to this read-only value by default. +DEFAULT_MAX_INCLUSION_DEPTH = 6 + + ## # Fatal include error. class FatalIncludeError(SyntaxError): pass + +class LimitedRecursiveIncludeError(FatalIncludeError): + pass + + ## # Default loader. This loader reads an included resource from disk. # @@ -92,13 +102,33 @@ def default_loader(href, parse, encoding=None): # @param loader Optional resource loader. If omitted, it defaults # to {@link default_loader}. If given, it should be a callable # that implements the same interface as <b>default_loader</b>. +# @param base_url The base URL of the original file, to resolve +# relative include file references. +# @param max_depth The maximum number of recursive inclusions. +# Limited to reduce the risk of malicious content explosion. +# Pass a negative value to disable the limitation. +# @throws LimitedRecursiveIncludeError If the {@link max_depth} was exceeded. # @throws FatalIncludeError If the function fails to include a given # resource, or if the tree contains malformed XInclude elements. -# @throws OSError If the function fails to load a given resource. +# @throws IOError If the function fails to load a given resource. +# @returns the node or its replacement if it was an XInclude node -def include(elem, loader=None): +def include(elem, loader=None, base_url=None, + max_depth=DEFAULT_MAX_INCLUSION_DEPTH): + if max_depth is None: + max_depth = -1 + elif max_depth < 0: + raise ValueError("expected non-negative depth or None for 'max_depth', got %r" % max_depth) + + if hasattr(elem, 'getroot'): + elem = elem.getroot() if loader is None: loader = default_loader + + _include(elem, loader, base_url, max_depth, set()) + + +def _include(elem, loader, base_url, max_depth, _parent_hrefs): # look for xinclude elements i = 0 while i < len(elem): @@ -106,14 +136,24 @@ def include(elem, loader=None): if e.tag == XINCLUDE_INCLUDE: # process xinclude directive href = e.get("href") + if base_url: + href = urljoin(base_url, href) parse = e.get("parse", "xml") if parse == "xml": + if href in _parent_hrefs: + raise FatalIncludeError("recursive include of %s" % href) + if max_depth == 0: + raise LimitedRecursiveIncludeError( + "maximum xinclude depth reached when including file %s" % href) + _parent_hrefs.add(href) node = loader(href, parse) if node is None: raise FatalIncludeError( "cannot load %r as %r" % (href, parse) ) - node = copy.copy(node) + node = copy.copy(node) # FIXME: this makes little sense with recursive includes + _include(node, loader, href, max_depth - 1, _parent_hrefs) + _parent_hrefs.remove(href) if e.tail: node.tail = (node.tail or "") + e.tail elem[i] = node @@ -123,11 +163,13 @@ def include(elem, loader=None): raise FatalIncludeError( "cannot load %r as %r" % (href, parse) ) + if e.tail: + text += e.tail if i: node = elem[i-1] - node.tail = (node.tail or "") + text + (e.tail or "") + node.tail = (node.tail or "") + text else: - elem.text = (elem.text or "") + text + (e.tail or "") + elem.text = (elem.text or "") + text del elem[i] continue else: @@ -139,5 +181,5 @@ def include(elem, loader=None): "xi:fallback tag must be child of xi:include (%r)" % e.tag ) else: - include(e, loader) - i = i + 1 + _include(e, loader, base_url, max_depth, _parent_hrefs) + i += 1 |