summaryrefslogtreecommitdiffstats
path: root/Lib/xml/etree
diff options
context:
space:
mode:
authorStefan Behnel <stefan_ml@behnel.de>2019-05-03 18:58:16 (GMT)
committerGitHub <noreply@github.com>2019-05-03 18:58:16 (GMT)
commit47541689ccea79dfcb055c6be5800b13fcb6bdd2 (patch)
tree7580016557a064cc019fe41d1d62e57ac3dcc8c6 /Lib/xml/etree
parentcf48e55f7f7718482fa712552f0cbc0aea1c826f (diff)
downloadcpython-47541689ccea79dfcb055c6be5800b13fcb6bdd2.zip
cpython-47541689ccea79dfcb055c6be5800b13fcb6bdd2.tar.gz
cpython-47541689ccea79dfcb055c6be5800b13fcb6bdd2.tar.bz2
bpo-28238: Implement "{*}tag" and "{ns}*" wildcard tag selection support for ElementPath, and extend the surrounding tests and docs. (GH-12997)
Diffstat (limited to 'Lib/xml/etree')
-rw-r--r--Lib/xml/etree/ElementPath.py90
1 files changed, 80 insertions, 10 deletions
diff --git a/Lib/xml/etree/ElementPath.py b/Lib/xml/etree/ElementPath.py
index b670d58..cfe72f2 100644
--- a/Lib/xml/etree/ElementPath.py
+++ b/Lib/xml/etree/ElementPath.py
@@ -99,13 +99,70 @@ def get_parent_map(context):
parent_map[e] = p
return parent_map
+
+
+def _is_wildcard_tag(tag):
+ return tag[:3] == '{*}' or tag[-2:] == '}*'
+
+
+def _prepare_tag(tag):
+ _isinstance, _str = isinstance, str
+ if tag == '{*}*':
+ # Same as '*', but no comments or processing instructions.
+ # It can be a surprise that '*' includes those, but there is no
+ # justification for '{*}*' doing the same.
+ def select(context, result):
+ for elem in result:
+ if _isinstance(elem.tag, _str):
+ yield elem
+ elif tag == '{}*':
+ # Any tag that is not in a namespace.
+ def select(context, result):
+ for elem in result:
+ el_tag = elem.tag
+ if _isinstance(el_tag, _str) and el_tag[0] != '{':
+ yield elem
+ elif tag[:3] == '{*}':
+ # The tag in any (or no) namespace.
+ suffix = tag[2:] # '}name'
+ no_ns = slice(-len(suffix), None)
+ tag = tag[3:]
+ def select(context, result):
+ for elem in result:
+ el_tag = elem.tag
+ if el_tag == tag or _isinstance(el_tag, _str) and el_tag[no_ns] == suffix:
+ yield elem
+ elif tag[-2:] == '}*':
+ # Any tag in the given namespace.
+ ns = tag[:-1]
+ ns_only = slice(None, len(ns))
+ def select(context, result):
+ for elem in result:
+ el_tag = elem.tag
+ if _isinstance(el_tag, _str) and el_tag[ns_only] == ns:
+ yield elem
+ else:
+ raise RuntimeError(f"internal parser error, got {tag}")
+ return select
+
+
def prepare_child(next, token):
tag = token[1]
- def select(context, result):
- for elem in result:
- for e in elem:
- if e.tag == tag:
- yield e
+ if _is_wildcard_tag(tag):
+ select_tag = _prepare_tag(tag)
+ def select(context, result):
+ def select_child(result):
+ for elem in result:
+ yield from elem
+ return select_tag(context, select_child(result))
+ else:
+ if tag[:2] == '{}':
+ tag = tag[2:] # '{}tag' == 'tag'
+ def select(context, result):
+ for elem in result:
+ for e in elem:
+ if e.tag == tag:
+ yield e
return select
def prepare_star(next, token):
@@ -130,11 +187,24 @@ def prepare_descendant(next, token):
tag = token[1]
else:
raise SyntaxError("invalid descendant")
- def select(context, result):
- for elem in result:
- for e in elem.iter(tag):
- if e is not elem:
- yield e
+
+ if _is_wildcard_tag(tag):
+ select_tag = _prepare_tag(tag)
+ def select(context, result):
+ def select_child(result):
+ for elem in result:
+ for e in elem.iter():
+ if e is not elem:
+ yield e
+ return select_tag(context, select_child(result))
+ else:
+ if tag[:2] == '{}':
+ tag = tag[2:] # '{}tag' == 'tag'
+ def select(context, result):
+ for elem in result:
+ for e in elem.iter(tag):
+ if e is not elem:
+ yield e
return select
def prepare_parent(next, token):