summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorscoder <stefan_ml@behnel.de>2017-09-30 13:35:21 (GMT)
committerSerhiy Storchaka <storchaka@gmail.com>2017-09-30 13:35:21 (GMT)
commit101a5e84acbab9d880e150195f23185dfb5449a9 (patch)
tree9fd7915a7e4049f33f9d083312d2c1b78d4eeb38
parent9811e80fd0ed9d74c76a66f1dd4e4b8afa9e8f53 (diff)
downloadcpython-101a5e84acbab9d880e150195f23185dfb5449a9.zip
cpython-101a5e84acbab9d880e150195f23185dfb5449a9.tar.gz
cpython-101a5e84acbab9d880e150195f23185dfb5449a9.tar.bz2
bpo-31648: Improve ElementPath (#3835)
* Allow whitespace inside of ElementPath predicates. * Add ElementPath predicate support for text comparison of the current node, like "[.='text']".
-rw-r--r--Doc/library/xml.etree.elementtree.rst5
-rw-r--r--Doc/whatsnew/3.7.rst8
-rw-r--r--Lib/test/test_xml_etree.py33
-rw-r--r--Lib/xml/etree/ElementPath.py23
-rw-r--r--Misc/NEWS.d/next/Library/2017-09-30-10-45-12.bpo-31648.Cai7ji.rst6
5 files changed, 68 insertions, 7 deletions
diff --git a/Doc/library/xml.etree.elementtree.rst b/Doc/library/xml.etree.elementtree.rst
index 7d814ad..6180859 100644
--- a/Doc/library/xml.etree.elementtree.rst
+++ b/Doc/library/xml.etree.elementtree.rst
@@ -437,6 +437,11 @@ Supported XPath syntax
| ``[tag]`` | Selects all elements that have a child named |
| | ``tag``. Only immediate children are supported. |
+-----------------------+------------------------------------------------------+
+| ``[.='text']`` | Selects all elements whose complete text content, |
+| | including descendants, equals the given ``text``. |
+| | |
+| | .. versionadded:: 3.7 |
++-----------------------+------------------------------------------------------+
| ``[tag='text']`` | Selects all elements that have a child named |
| | ``tag`` whose complete text content, including |
| | descendants, equals the given ``text``. |
diff --git a/Doc/whatsnew/3.7.rst b/Doc/whatsnew/3.7.rst
index a474e76..845ed64 100644
--- a/Doc/whatsnew/3.7.rst
+++ b/Doc/whatsnew/3.7.rst
@@ -281,6 +281,14 @@ Function :func:`~uu.encode` now accepts an optional *backtick*
keyword argument. When it's true, zeros are represented by ``'`'``
instead of spaces. (Contributed by Xiang Zhang in :issue:`30103`.)
+xml.etree
+---------
+
+:ref:`ElementPath <elementtree-xpath>` predicates in the :meth:`find`
+methods can now compare text of the current node with ``[. = "text"]``,
+not only text in children. Predicates also allow adding spaces for
+better readability. (Contributed by Stefan Behnel in :issue:`31648`.)
+
zipapp
------
diff --git a/Lib/test/test_xml_etree.py b/Lib/test/test_xml_etree.py
index 661ad8b..02812f3 100644
--- a/Lib/test/test_xml_etree.py
+++ b/Lib/test/test_xml_etree.py
@@ -2237,6 +2237,39 @@ class ElementFindTest(unittest.TestCase):
['tag'] * 2)
self.assertEqual(e.findall('section//'), e.findall('section//*'))
+ self.assertEqual(summarize_list(e.findall(".//section[tag='subtext']")),
+ ['section'])
+ self.assertEqual(summarize_list(e.findall(".//section[tag ='subtext']")),
+ ['section'])
+ self.assertEqual(summarize_list(e.findall(".//section[tag= 'subtext']")),
+ ['section'])
+ self.assertEqual(summarize_list(e.findall(".//section[tag = 'subtext']")),
+ ['section'])
+ self.assertEqual(summarize_list(e.findall(".//section[ tag = 'subtext' ]")),
+ ['section'])
+
+ self.assertEqual(summarize_list(e.findall(".//tag[.='subtext']")),
+ ['tag'])
+ self.assertEqual(summarize_list(e.findall(".//tag[. ='subtext']")),
+ ['tag'])
+ self.assertEqual(summarize_list(e.findall('.//tag[.= "subtext"]')),
+ ['tag'])
+ self.assertEqual(summarize_list(e.findall('.//tag[ . = "subtext" ]')),
+ ['tag'])
+ self.assertEqual(summarize_list(e.findall(".//tag[. = 'subtext']")),
+ ['tag'])
+ self.assertEqual(summarize_list(e.findall(".//tag[. = 'subtext ']")),
+ [])
+ self.assertEqual(summarize_list(e.findall(".//tag[.= ' subtext']")),
+ [])
+
+ # duplicate section => 2x tag matches
+ e[1] = e[2]
+ self.assertEqual(summarize_list(e.findall(".//section[tag = 'subtext']")),
+ ['section', 'section'])
+ self.assertEqual(summarize_list(e.findall(".//tag[. = 'subtext']")),
+ ['tag', 'tag'])
+
def test_test_find_with_ns(self):
e = ET.XML(SAMPLE_XML_NS)
self.assertEqual(summarize_list(e.findall('tag')), [])
diff --git a/Lib/xml/etree/ElementPath.py b/Lib/xml/etree/ElementPath.py
index 361f6d5..c9d6ef3 100644
--- a/Lib/xml/etree/ElementPath.py
+++ b/Lib/xml/etree/ElementPath.py
@@ -157,6 +157,9 @@ def prepare_predicate(next, token):
return
if token[0] == "]":
break
+ if token == ('', ''):
+ # ignore whitespace
+ continue
if token[0] and token[0][:1] in "'\"":
token = "'", token[0][1:-1]
signature.append(token[0] or "-")
@@ -188,16 +191,22 @@ def prepare_predicate(next, token):
if elem.find(tag) is not None:
yield elem
return select
- if signature == "-='" and not re.match(r"\-?\d+$", predicate[0]):
- # [tag='value']
+ if signature == ".='" or (signature == "-='" and not re.match(r"\-?\d+$", predicate[0])):
+ # [.='value'] or [tag='value']
tag = predicate[0]
value = predicate[-1]
- def select(context, result):
- for elem in result:
- for e in elem.findall(tag):
- if "".join(e.itertext()) == value:
+ if tag:
+ def select(context, result):
+ for elem in result:
+ for e in elem.findall(tag):
+ if "".join(e.itertext()) == value:
+ yield elem
+ break
+ else:
+ def select(context, result):
+ for elem in result:
+ if "".join(elem.itertext()) == value:
yield elem
- break
return select
if signature == "-" or signature == "-()" or signature == "-()-":
# [index] or [last()] or [last()-index]
diff --git a/Misc/NEWS.d/next/Library/2017-09-30-10-45-12.bpo-31648.Cai7ji.rst b/Misc/NEWS.d/next/Library/2017-09-30-10-45-12.bpo-31648.Cai7ji.rst
new file mode 100644
index 0000000..8b39ce9
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2017-09-30-10-45-12.bpo-31648.Cai7ji.rst
@@ -0,0 +1,6 @@
+Improvements to path predicates in ElementTree:
+
+* Allow whitespace around predicate parts, i.e. "[a = 'text']" instead of requiring the less readable "[a='text']".
+* Add support for text comparison of the current node, like "[.='text']".
+
+Patch by Stefan Behnel.