diff options
author | Stefan Behnel <stefan_ml@behnel.de> | 2019-05-03 18:58:16 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2019-05-03 18:58:16 (GMT) |
commit | 47541689ccea79dfcb055c6be5800b13fcb6bdd2 (patch) | |
tree | 7580016557a064cc019fe41d1d62e57ac3dcc8c6 /Lib | |
parent | cf48e55f7f7718482fa712552f0cbc0aea1c826f (diff) | |
download | cpython-47541689ccea79dfcb055c6be5800b13fcb6bdd2.zip cpython-47541689ccea79dfcb055c6be5800b13fcb6bdd2.tar.gz cpython-47541689ccea79dfcb055c6be5800b13fcb6bdd2.tar.bz2 |
bpo-28238: Implement "{*}tag" and "{ns}*" wildcard tag selection support for ElementPath, and extend the surrounding tests and docs. (GH-12997)
Diffstat (limited to 'Lib')
-rw-r--r-- | Lib/test/test_xml_etree.py | 56 | ||||
-rw-r--r-- | Lib/xml/etree/ElementPath.py | 90 |
2 files changed, 134 insertions, 12 deletions
diff --git a/Lib/test/test_xml_etree.py b/Lib/test/test_xml_etree.py index a59a11f..ca6862c 100644 --- a/Lib/test/test_xml_etree.py +++ b/Lib/test/test_xml_etree.py @@ -1137,16 +1137,21 @@ class ElementTreeTest(unittest.TestCase): def test_xpath_tokenizer(self): # Test the XPath tokenizer. from xml.etree import ElementPath - def check(p, expected): + def check(p, expected, namespaces=None): self.assertEqual([op or tag - for op, tag in ElementPath.xpath_tokenizer(p)], + for op, tag in ElementPath.xpath_tokenizer(p, namespaces)], expected) # tests from the xml specification check("*", ['*']) + check("{ns}*", ['{ns}*']) + check("{}*", ['{}*']) + check("{*}tag", ['{*}tag']) + check("{*}*", ['{*}*']) check("text()", ['text', '()']) check("@name", ['@', 'name']) check("@*", ['@', '*']) + check("@{ns}attr", ['@', '{ns}attr']) check("para[1]", ['para', '[', '1', ']']) check("para[last()]", ['para', '[', 'last', '()', ']']) check("*/para", ['*', '/', 'para']) @@ -1158,6 +1163,7 @@ class ElementTreeTest(unittest.TestCase): check("//olist/item", ['//', 'olist', '/', 'item']) check(".", ['.']) check(".//para", ['.', '//', 'para']) + check(".//{*}tag", ['.', '//', '{*}tag']) check("..", ['..']) check("../@lang", ['..', '/', '@', 'lang']) check("chapter[title]", ['chapter', '[', 'title', ']']) @@ -1168,6 +1174,8 @@ class ElementTreeTest(unittest.TestCase): check("{http://spam}egg", ['{http://spam}egg']) check("./spam.egg", ['.', '/', 'spam.egg']) check(".//{http://spam}egg", ['.', '//', '{http://spam}egg']) + check("./xsd:type", ['.', '/', '{http://www.w3.org/2001/XMLSchema}type'], + {'xsd': 'http://www.w3.org/2001/XMLSchema'}) def test_processinginstruction(self): # Test ProcessingInstruction directly @@ -2669,6 +2677,50 @@ class ElementFindTest(unittest.TestCase): self.assertEqual(len(root.findall(".//xx:b", namespaces=nsmap)), 2) self.assertEqual(len(root.findall(".//b", namespaces=nsmap)), 1) + def test_findall_wildcard(self): + root = ET.XML(''' + <a xmlns:x="X" xmlns:y="Y"> + <x:b><c/></x:b> + <b/> + <c><x:b/><b/></c><y:b/> + </a>''') + root.append(ET.Comment('test')) + + self.assertEqual(summarize_list(root.findall("{*}b")), + ['{X}b', 'b', '{Y}b']) + self.assertEqual(summarize_list(root.findall("{*}c")), + ['c']) + self.assertEqual(summarize_list(root.findall("{X}*")), + ['{X}b']) + self.assertEqual(summarize_list(root.findall("{Y}*")), + ['{Y}b']) + self.assertEqual(summarize_list(root.findall("{}*")), + ['b', 'c']) + self.assertEqual(summarize_list(root.findall("{}b")), # only for consistency + ['b']) + self.assertEqual(summarize_list(root.findall("{}b")), + summarize_list(root.findall("b"))) + self.assertEqual(summarize_list(root.findall("{*}*")), + ['{X}b', 'b', 'c', '{Y}b']) + # This is an unfortunate difference, but that's how find('*') works. + self.assertEqual(summarize_list(root.findall("{*}*") + [root[-1]]), + summarize_list(root.findall("*"))) + + self.assertEqual(summarize_list(root.findall(".//{*}b")), + ['{X}b', 'b', '{X}b', 'b', '{Y}b']) + self.assertEqual(summarize_list(root.findall(".//{*}c")), + ['c', 'c']) + self.assertEqual(summarize_list(root.findall(".//{X}*")), + ['{X}b', '{X}b']) + self.assertEqual(summarize_list(root.findall(".//{Y}*")), + ['{Y}b']) + self.assertEqual(summarize_list(root.findall(".//{}*")), + ['c', 'b', 'c', 'b']) + self.assertEqual(summarize_list(root.findall(".//{}b")), # only for consistency + ['b', 'b']) + self.assertEqual(summarize_list(root.findall(".//{}b")), + summarize_list(root.findall(".//b"))) + def test_bad_find(self): e = ET.XML(SAMPLE_XML) with self.assertRaisesRegex(SyntaxError, 'cannot use absolute path'): diff --git a/Lib/xml/etree/ElementPath.py b/Lib/xml/etree/ElementPath.py index b670d58..cfe72f2 100644 --- a/Lib/xml/etree/ElementPath.py +++ b/Lib/xml/etree/ElementPath.py @@ -99,13 +99,70 @@ def get_parent_map(context): parent_map[e] = p return parent_map + + +def _is_wildcard_tag(tag): + return tag[:3] == '{*}' or tag[-2:] == '}*' + + +def _prepare_tag(tag): + _isinstance, _str = isinstance, str + if tag == '{*}*': + # Same as '*', but no comments or processing instructions. + # It can be a surprise that '*' includes those, but there is no + # justification for '{*}*' doing the same. + def select(context, result): + for elem in result: + if _isinstance(elem.tag, _str): + yield elem + elif tag == '{}*': + # Any tag that is not in a namespace. + def select(context, result): + for elem in result: + el_tag = elem.tag + if _isinstance(el_tag, _str) and el_tag[0] != '{': + yield elem + elif tag[:3] == '{*}': + # The tag in any (or no) namespace. + suffix = tag[2:] # '}name' + no_ns = slice(-len(suffix), None) + tag = tag[3:] + def select(context, result): + for elem in result: + el_tag = elem.tag + if el_tag == tag or _isinstance(el_tag, _str) and el_tag[no_ns] == suffix: + yield elem + elif tag[-2:] == '}*': + # Any tag in the given namespace. + ns = tag[:-1] + ns_only = slice(None, len(ns)) + def select(context, result): + for elem in result: + el_tag = elem.tag + if _isinstance(el_tag, _str) and el_tag[ns_only] == ns: + yield elem + else: + raise RuntimeError(f"internal parser error, got {tag}") + return select + + def prepare_child(next, token): tag = token[1] - def select(context, result): - for elem in result: - for e in elem: - if e.tag == tag: - yield e + if _is_wildcard_tag(tag): + select_tag = _prepare_tag(tag) + def select(context, result): + def select_child(result): + for elem in result: + yield from elem + return select_tag(context, select_child(result)) + else: + if tag[:2] == '{}': + tag = tag[2:] # '{}tag' == 'tag' + def select(context, result): + for elem in result: + for e in elem: + if e.tag == tag: + yield e return select def prepare_star(next, token): @@ -130,11 +187,24 @@ def prepare_descendant(next, token): tag = token[1] else: raise SyntaxError("invalid descendant") - def select(context, result): - for elem in result: - for e in elem.iter(tag): - if e is not elem: - yield e + + if _is_wildcard_tag(tag): + select_tag = _prepare_tag(tag) + def select(context, result): + def select_child(result): + for elem in result: + for e in elem.iter(): + if e is not elem: + yield e + return select_tag(context, select_child(result)) + else: + if tag[:2] == '{}': + tag = tag[2:] # '{}tag' == 'tag' + def select(context, result): + for elem in result: + for e in elem.iter(tag): + if e is not elem: + yield e return select def prepare_parent(next, token): |