From 101a5e84acbab9d880e150195f23185dfb5449a9 Mon Sep 17 00:00:00 2001 From: scoder Date: Sat, 30 Sep 2017 15:35:21 +0200 Subject: bpo-31648: Improve ElementPath (#3835) * Allow whitespace inside of ElementPath predicates. * Add ElementPath predicate support for text comparison of the current node, like "[.='text']". --- Doc/library/xml.etree.elementtree.rst | 5 ++++ Doc/whatsnew/3.7.rst | 8 ++++++ Lib/test/test_xml_etree.py | 33 ++++++++++++++++++++++ Lib/xml/etree/ElementPath.py | 23 ++++++++++----- .../2017-09-30-10-45-12.bpo-31648.Cai7ji.rst | 6 ++++ 5 files changed, 68 insertions(+), 7 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2017-09-30-10-45-12.bpo-31648.Cai7ji.rst diff --git a/Doc/library/xml.etree.elementtree.rst b/Doc/library/xml.etree.elementtree.rst index 7d814ad..6180859 100644 --- a/Doc/library/xml.etree.elementtree.rst +++ b/Doc/library/xml.etree.elementtree.rst @@ -437,6 +437,11 @@ Supported XPath syntax | ``[tag]`` | Selects all elements that have a child named | | | ``tag``. Only immediate children are supported. | +-----------------------+------------------------------------------------------+ +| ``[.='text']`` | Selects all elements whose complete text content, | +| | including descendants, equals the given ``text``. | +| | | +| | .. versionadded:: 3.7 | ++-----------------------+------------------------------------------------------+ | ``[tag='text']`` | Selects all elements that have a child named | | | ``tag`` whose complete text content, including | | | descendants, equals the given ``text``. | diff --git a/Doc/whatsnew/3.7.rst b/Doc/whatsnew/3.7.rst index a474e76..845ed64 100644 --- a/Doc/whatsnew/3.7.rst +++ b/Doc/whatsnew/3.7.rst @@ -281,6 +281,14 @@ Function :func:`~uu.encode` now accepts an optional *backtick* keyword argument. When it's true, zeros are represented by ``'`'`` instead of spaces. (Contributed by Xiang Zhang in :issue:`30103`.) +xml.etree +--------- + +:ref:`ElementPath ` predicates in the :meth:`find` +methods can now compare text of the current node with ``[. = "text"]``, +not only text in children. Predicates also allow adding spaces for +better readability. (Contributed by Stefan Behnel in :issue:`31648`.) + zipapp ------ diff --git a/Lib/test/test_xml_etree.py b/Lib/test/test_xml_etree.py index 661ad8b..02812f3 100644 --- a/Lib/test/test_xml_etree.py +++ b/Lib/test/test_xml_etree.py @@ -2237,6 +2237,39 @@ class ElementFindTest(unittest.TestCase): ['tag'] * 2) self.assertEqual(e.findall('section//'), e.findall('section//*')) + self.assertEqual(summarize_list(e.findall(".//section[tag='subtext']")), + ['section']) + self.assertEqual(summarize_list(e.findall(".//section[tag ='subtext']")), + ['section']) + self.assertEqual(summarize_list(e.findall(".//section[tag= 'subtext']")), + ['section']) + self.assertEqual(summarize_list(e.findall(".//section[tag = 'subtext']")), + ['section']) + self.assertEqual(summarize_list(e.findall(".//section[ tag = 'subtext' ]")), + ['section']) + + self.assertEqual(summarize_list(e.findall(".//tag[.='subtext']")), + ['tag']) + self.assertEqual(summarize_list(e.findall(".//tag[. ='subtext']")), + ['tag']) + self.assertEqual(summarize_list(e.findall('.//tag[.= "subtext"]')), + ['tag']) + self.assertEqual(summarize_list(e.findall('.//tag[ . = "subtext" ]')), + ['tag']) + self.assertEqual(summarize_list(e.findall(".//tag[. = 'subtext']")), + ['tag']) + self.assertEqual(summarize_list(e.findall(".//tag[. = 'subtext ']")), + []) + self.assertEqual(summarize_list(e.findall(".//tag[.= ' subtext']")), + []) + + # duplicate section => 2x tag matches + e[1] = e[2] + self.assertEqual(summarize_list(e.findall(".//section[tag = 'subtext']")), + ['section', 'section']) + self.assertEqual(summarize_list(e.findall(".//tag[. = 'subtext']")), + ['tag', 'tag']) + def test_test_find_with_ns(self): e = ET.XML(SAMPLE_XML_NS) self.assertEqual(summarize_list(e.findall('tag')), []) diff --git a/Lib/xml/etree/ElementPath.py b/Lib/xml/etree/ElementPath.py index 361f6d5..c9d6ef3 100644 --- a/Lib/xml/etree/ElementPath.py +++ b/Lib/xml/etree/ElementPath.py @@ -157,6 +157,9 @@ def prepare_predicate(next, token): return if token[0] == "]": break + if token == ('', ''): + # ignore whitespace + continue if token[0] and token[0][:1] in "'\"": token = "'", token[0][1:-1] signature.append(token[0] or "-") @@ -188,16 +191,22 @@ def prepare_predicate(next, token): if elem.find(tag) is not None: yield elem return select - if signature == "-='" and not re.match(r"\-?\d+$", predicate[0]): - # [tag='value'] + if signature == ".='" or (signature == "-='" and not re.match(r"\-?\d+$", predicate[0])): + # [.='value'] or [tag='value'] tag = predicate[0] value = predicate[-1] - def select(context, result): - for elem in result: - for e in elem.findall(tag): - if "".join(e.itertext()) == value: + if tag: + def select(context, result): + for elem in result: + for e in elem.findall(tag): + if "".join(e.itertext()) == value: + yield elem + break + else: + def select(context, result): + for elem in result: + if "".join(elem.itertext()) == value: yield elem - break return select if signature == "-" or signature == "-()" or signature == "-()-": # [index] or [last()] or [last()-index] diff --git a/Misc/NEWS.d/next/Library/2017-09-30-10-45-12.bpo-31648.Cai7ji.rst b/Misc/NEWS.d/next/Library/2017-09-30-10-45-12.bpo-31648.Cai7ji.rst new file mode 100644 index 0000000..8b39ce9 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2017-09-30-10-45-12.bpo-31648.Cai7ji.rst @@ -0,0 +1,6 @@ +Improvements to path predicates in ElementTree: + +* Allow whitespace around predicate parts, i.e. "[a = 'text']" instead of requiring the less readable "[a='text']". +* Add support for text comparison of the current node, like "[.='text']". + +Patch by Stefan Behnel. -- cgit v0.12