summaryrefslogtreecommitdiffstats
path: root/Lib/xml/etree
diff options
context:
space:
mode:
authorStefan Behnel <stefan_ml@behnel.de>2019-11-25 15:36:25 (GMT)
committerGitHub <noreply@github.com>2019-11-25 15:36:25 (GMT)
commitc6a7bdb356835c9d7513b1ea6846683d446fe6c3 (patch)
tree3e98d7da17cbffbfc4ffd97392c5869caccb5185 /Lib/xml/etree
parentded8888fbc33011dd39b7b1c86a5adfacc4943f3 (diff)
downloadcpython-c6a7bdb356835c9d7513b1ea6846683d446fe6c3.zip
cpython-c6a7bdb356835c9d7513b1ea6846683d446fe6c3.tar.gz
cpython-c6a7bdb356835c9d7513b1ea6846683d446fe6c3.tar.bz2
bpo-20928: support base-URL and recursive includes in etree.ElementInclude (#5723)
* bpo-20928: bring elementtree's XInclude support en-par with the implementation in lxml by adding support for recursive includes and a base-URL. * bpo-20928: Support xincluding the same file multiple times, just not recursively. * bpo-20928: Add 'max_depth' parameter to xinclude that limits the maximum recursion depth to 6 by default. * Add news entry for updated ElementInclude support
Diffstat (limited to 'Lib/xml/etree')
-rw-r--r--Lib/xml/etree/ElementInclude.py56
1 files changed, 49 insertions, 7 deletions
diff --git a/Lib/xml/etree/ElementInclude.py b/Lib/xml/etree/ElementInclude.py
index 963470e..5303062 100644
--- a/Lib/xml/etree/ElementInclude.py
+++ b/Lib/xml/etree/ElementInclude.py
@@ -50,18 +50,28 @@
import copy
from . import ElementTree
+from urllib.parse import urljoin
XINCLUDE = "{http://www.w3.org/2001/XInclude}"
XINCLUDE_INCLUDE = XINCLUDE + "include"
XINCLUDE_FALLBACK = XINCLUDE + "fallback"
+# For security reasons, the inclusion depth is limited to this read-only value by default.
+DEFAULT_MAX_INCLUSION_DEPTH = 6
+
+
##
# Fatal include error.
class FatalIncludeError(SyntaxError):
pass
+
+class LimitedRecursiveIncludeError(FatalIncludeError):
+ pass
+
+
##
# Default loader. This loader reads an included resource from disk.
#
@@ -92,13 +102,33 @@ def default_loader(href, parse, encoding=None):
# @param loader Optional resource loader. If omitted, it defaults
# to {@link default_loader}. If given, it should be a callable
# that implements the same interface as <b>default_loader</b>.
+# @param base_url The base URL of the original file, to resolve
+# relative include file references.
+# @param max_depth The maximum number of recursive inclusions.
+# Limited to reduce the risk of malicious content explosion.
+# Pass a negative value to disable the limitation.
+# @throws LimitedRecursiveIncludeError If the {@link max_depth} was exceeded.
# @throws FatalIncludeError If the function fails to include a given
# resource, or if the tree contains malformed XInclude elements.
-# @throws OSError If the function fails to load a given resource.
+# @throws IOError If the function fails to load a given resource.
+# @returns the node or its replacement if it was an XInclude node
-def include(elem, loader=None):
+def include(elem, loader=None, base_url=None,
+ max_depth=DEFAULT_MAX_INCLUSION_DEPTH):
+ if max_depth is None:
+ max_depth = -1
+ elif max_depth < 0:
+ raise ValueError("expected non-negative depth or None for 'max_depth', got %r" % max_depth)
+
+ if hasattr(elem, 'getroot'):
+ elem = elem.getroot()
if loader is None:
loader = default_loader
+
+ _include(elem, loader, base_url, max_depth, set())
+
+
+def _include(elem, loader, base_url, max_depth, _parent_hrefs):
# look for xinclude elements
i = 0
while i < len(elem):
@@ -106,14 +136,24 @@ def include(elem, loader=None):
if e.tag == XINCLUDE_INCLUDE:
# process xinclude directive
href = e.get("href")
+ if base_url:
+ href = urljoin(base_url, href)
parse = e.get("parse", "xml")
if parse == "xml":
+ if href in _parent_hrefs:
+ raise FatalIncludeError("recursive include of %s" % href)
+ if max_depth == 0:
+ raise LimitedRecursiveIncludeError(
+ "maximum xinclude depth reached when including file %s" % href)
+ _parent_hrefs.add(href)
node = loader(href, parse)
if node is None:
raise FatalIncludeError(
"cannot load %r as %r" % (href, parse)
)
- node = copy.copy(node)
+ node = copy.copy(node) # FIXME: this makes little sense with recursive includes
+ _include(node, loader, href, max_depth - 1, _parent_hrefs)
+ _parent_hrefs.remove(href)
if e.tail:
node.tail = (node.tail or "") + e.tail
elem[i] = node
@@ -123,11 +163,13 @@ def include(elem, loader=None):
raise FatalIncludeError(
"cannot load %r as %r" % (href, parse)
)
+ if e.tail:
+ text += e.tail
if i:
node = elem[i-1]
- node.tail = (node.tail or "") + text + (e.tail or "")
+ node.tail = (node.tail or "") + text
else:
- elem.text = (elem.text or "") + text + (e.tail or "")
+ elem.text = (elem.text or "") + text
del elem[i]
continue
else:
@@ -139,5 +181,5 @@ def include(elem, loader=None):
"xi:fallback tag must be child of xi:include (%r)" % e.tag
)
else:
- include(e, loader)
- i = i + 1
+ _include(e, loader, base_url, max_depth, _parent_hrefs)
+ i += 1