diff options
author | Stefan Behnel <stefan_ml@behnel.de> | 2019-05-03 18:58:16 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2019-05-03 18:58:16 (GMT) |
commit | 47541689ccea79dfcb055c6be5800b13fcb6bdd2 (patch) | |
tree | 7580016557a064cc019fe41d1d62e57ac3dcc8c6 /Lib/xml/etree | |
parent | cf48e55f7f7718482fa712552f0cbc0aea1c826f (diff) | |
download | cpython-47541689ccea79dfcb055c6be5800b13fcb6bdd2.zip cpython-47541689ccea79dfcb055c6be5800b13fcb6bdd2.tar.gz cpython-47541689ccea79dfcb055c6be5800b13fcb6bdd2.tar.bz2 |
bpo-28238: Implement "{*}tag" and "{ns}*" wildcard tag selection support for ElementPath, and extend the surrounding tests and docs. (GH-12997)
Diffstat (limited to 'Lib/xml/etree')
-rw-r--r-- | Lib/xml/etree/ElementPath.py | 90 |
1 files changed, 80 insertions, 10 deletions
diff --git a/Lib/xml/etree/ElementPath.py b/Lib/xml/etree/ElementPath.py index b670d58..cfe72f2 100644 --- a/Lib/xml/etree/ElementPath.py +++ b/Lib/xml/etree/ElementPath.py @@ -99,13 +99,70 @@ def get_parent_map(context): parent_map[e] = p return parent_map + + +def _is_wildcard_tag(tag): + return tag[:3] == '{*}' or tag[-2:] == '}*' + + +def _prepare_tag(tag): + _isinstance, _str = isinstance, str + if tag == '{*}*': + # Same as '*', but no comments or processing instructions. + # It can be a surprise that '*' includes those, but there is no + # justification for '{*}*' doing the same. + def select(context, result): + for elem in result: + if _isinstance(elem.tag, _str): + yield elem + elif tag == '{}*': + # Any tag that is not in a namespace. + def select(context, result): + for elem in result: + el_tag = elem.tag + if _isinstance(el_tag, _str) and el_tag[0] != '{': + yield elem + elif tag[:3] == '{*}': + # The tag in any (or no) namespace. + suffix = tag[2:] # '}name' + no_ns = slice(-len(suffix), None) + tag = tag[3:] + def select(context, result): + for elem in result: + el_tag = elem.tag + if el_tag == tag or _isinstance(el_tag, _str) and el_tag[no_ns] == suffix: + yield elem + elif tag[-2:] == '}*': + # Any tag in the given namespace. + ns = tag[:-1] + ns_only = slice(None, len(ns)) + def select(context, result): + for elem in result: + el_tag = elem.tag + if _isinstance(el_tag, _str) and el_tag[ns_only] == ns: + yield elem + else: + raise RuntimeError(f"internal parser error, got {tag}") + return select + + def prepare_child(next, token): tag = token[1] - def select(context, result): - for elem in result: - for e in elem: - if e.tag == tag: - yield e + if _is_wildcard_tag(tag): + select_tag = _prepare_tag(tag) + def select(context, result): + def select_child(result): + for elem in result: + yield from elem + return select_tag(context, select_child(result)) + else: + if tag[:2] == '{}': + tag = tag[2:] # '{}tag' == 'tag' + def select(context, result): + for elem in result: + for e in elem: + if e.tag == tag: + yield e return select def prepare_star(next, token): @@ -130,11 +187,24 @@ def prepare_descendant(next, token): tag = token[1] else: raise SyntaxError("invalid descendant") - def select(context, result): - for elem in result: - for e in elem.iter(tag): - if e is not elem: - yield e + + if _is_wildcard_tag(tag): + select_tag = _prepare_tag(tag) + def select(context, result): + def select_child(result): + for elem in result: + for e in elem.iter(): + if e is not elem: + yield e + return select_tag(context, select_child(result)) + else: + if tag[:2] == '{}': + tag = tag[2:] # '{}tag' == 'tag' + def select(context, result): + for elem in result: + for e in elem.iter(tag): + if e is not elem: + yield e return select def prepare_parent(next, token): |