From 97e8b1eaeaf3aa325c84ff2e13417c30414d0269 Mon Sep 17 00:00:00 2001 From: Ammar Askar Date: Mon, 9 Nov 2020 02:02:39 -0500 Subject: bpo-40624: Add support for the XPath != operator in xml.etree (GH-22147) --- Doc/library/xml.etree.elementtree.rst | 18 +++++++++++ Lib/test/test_xml_etree.py | 35 ++++++++++++++++++++++ Lib/xml/etree/ElementPath.py | 31 ++++++++++++++----- .../2020-09-08-03-19-04.bpo-40624.0-gYfx.rst | 1 + 4 files changed, 78 insertions(+), 7 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2020-09-08-03-19-04.bpo-40624.0-gYfx.rst diff --git a/Doc/library/xml.etree.elementtree.rst b/Doc/library/xml.etree.elementtree.rst index f4bccf6..87f4ee3 100644 --- a/Doc/library/xml.etree.elementtree.rst +++ b/Doc/library/xml.etree.elementtree.rst @@ -455,6 +455,12 @@ Supported XPath syntax | | has the given value. The value cannot contain | | | quotes. | +-----------------------+------------------------------------------------------+ +| ``[@attrib!='value']``| Selects all elements for which the given attribute | +| | does not have the given value. The value cannot | +| | contain quotes. | +| | | +| | .. versionadded:: 3.10 | ++-----------------------+------------------------------------------------------+ | ``[tag]`` | Selects all elements that have a child named | | | ``tag``. Only immediate children are supported. | +-----------------------+------------------------------------------------------+ @@ -463,10 +469,22 @@ Supported XPath syntax | | | | | .. versionadded:: 3.7 | +-----------------------+------------------------------------------------------+ +| ``[.!='text']`` | Selects all elements whose complete text content, | +| | including descendants, does not equal the given | +| | ``text``. | +| | | +| | .. versionadded:: 3.10 | ++-----------------------+------------------------------------------------------+ | ``[tag='text']`` | Selects all elements that have a child named | | | ``tag`` whose complete text content, including | | | descendants, equals the given ``text``. | +-----------------------+------------------------------------------------------+ +| ``[tag!='text']`` | Selects all elements that have a child named | +| | ``tag`` whose complete text content, including | +| | descendants, does not equal the given ``text``. | +| | | +| | .. versionadded:: 3.10 | ++-----------------------+------------------------------------------------------+ | ``[position]`` | Selects all elements that are located at the given | | | position. The position can be either an integer | | | (1 is the first position), the expression ``last()`` | diff --git a/Lib/test/test_xml_etree.py b/Lib/test/test_xml_etree.py index 3f1f378..fd4a385 100644 --- a/Lib/test/test_xml_etree.py +++ b/Lib/test/test_xml_etree.py @@ -2852,8 +2852,12 @@ class ElementFindTest(unittest.TestCase): ['tag'] * 3) self.assertEqual(summarize_list(e.findall('.//tag[@class="a"]')), ['tag']) + self.assertEqual(summarize_list(e.findall('.//tag[@class!="a"]')), + ['tag'] * 2) self.assertEqual(summarize_list(e.findall('.//tag[@class="b"]')), ['tag'] * 2) + self.assertEqual(summarize_list(e.findall('.//tag[@class!="b"]')), + ['tag']) self.assertEqual(summarize_list(e.findall('.//tag[@id]')), ['tag']) self.assertEqual(summarize_list(e.findall('.//section[tag]')), @@ -2875,6 +2879,19 @@ class ElementFindTest(unittest.TestCase): self.assertEqual(summarize_list(e.findall(".//section[ tag = 'subtext' ]")), ['section']) + # Negations of above tests. They match nothing because the sole section + # tag has subtext. + self.assertEqual(summarize_list(e.findall(".//section[tag!='subtext']")), + []) + self.assertEqual(summarize_list(e.findall(".//section[tag !='subtext']")), + []) + self.assertEqual(summarize_list(e.findall(".//section[tag!= 'subtext']")), + []) + self.assertEqual(summarize_list(e.findall(".//section[tag != 'subtext']")), + []) + self.assertEqual(summarize_list(e.findall(".//section[ tag != 'subtext' ]")), + []) + self.assertEqual(summarize_list(e.findall(".//tag[.='subtext']")), ['tag']) self.assertEqual(summarize_list(e.findall(".//tag[. ='subtext']")), @@ -2890,6 +2907,24 @@ class ElementFindTest(unittest.TestCase): self.assertEqual(summarize_list(e.findall(".//tag[.= ' subtext']")), []) + # Negations of above tests. + # Matches everything but the tag containing subtext + self.assertEqual(summarize_list(e.findall(".//tag[.!='subtext']")), + ['tag'] * 3) + self.assertEqual(summarize_list(e.findall(".//tag[. !='subtext']")), + ['tag'] * 3) + self.assertEqual(summarize_list(e.findall('.//tag[.!= "subtext"]')), + ['tag'] * 3) + self.assertEqual(summarize_list(e.findall('.//tag[ . != "subtext" ]')), + ['tag'] * 3) + self.assertEqual(summarize_list(e.findall(".//tag[. != 'subtext']")), + ['tag'] * 3) + # Matches all tags. + self.assertEqual(summarize_list(e.findall(".//tag[. != 'subtext ']")), + ['tag'] * 4) + self.assertEqual(summarize_list(e.findall(".//tag[.!= ' subtext']")), + ['tag'] * 4) + # duplicate section => 2x tag matches e[1] = e[2] self.assertEqual(summarize_list(e.findall(".//section[tag = 'subtext']")), diff --git a/Lib/xml/etree/ElementPath.py b/Lib/xml/etree/ElementPath.py index d318e65..1cbd839 100644 --- a/Lib/xml/etree/ElementPath.py +++ b/Lib/xml/etree/ElementPath.py @@ -65,8 +65,9 @@ xpath_tokenizer_re = re.compile( r"//?|" r"\.\.|" r"\(\)|" + r"!=|" r"[/.*:\[\]\(\)@=])|" - r"((?:\{[^}]+\})?[^/\[\]\(\)@=\s]+)|" + r"((?:\{[^}]+\})?[^/\[\]\(\)@!=\s]+)|" r"\s+" ) @@ -253,15 +254,19 @@ def prepare_predicate(next, token): if elem.get(key) is not None: yield elem return select - if signature == "@-='": - # [@attribute='value'] + if signature == "@-='" or signature == "@-!='": + # [@attribute='value'] or [@attribute!='value'] key = predicate[1] value = predicate[-1] def select(context, result): for elem in result: if elem.get(key) == value: yield elem - return select + def select_negated(context, result): + for elem in result: + if (attr_value := elem.get(key)) is not None and attr_value != value: + yield elem + return select_negated if '!=' in signature else select if signature == "-" and not re.match(r"\-?\d+$", predicate[0]): # [tag] tag = predicate[0] @@ -270,8 +275,10 @@ def prepare_predicate(next, token): if elem.find(tag) is not None: yield elem return select - if signature == ".='" or (signature == "-='" and not re.match(r"\-?\d+$", predicate[0])): - # [.='value'] or [tag='value'] + if signature == ".='" or signature == ".!='" or ( + (signature == "-='" or signature == "-!='") + and not re.match(r"\-?\d+$", predicate[0])): + # [.='value'] or [tag='value'] or [.!='value'] or [tag!='value'] tag = predicate[0] value = predicate[-1] if tag: @@ -281,12 +288,22 @@ def prepare_predicate(next, token): if "".join(e.itertext()) == value: yield elem break + def select_negated(context, result): + for elem in result: + for e in elem.iterfind(tag): + if "".join(e.itertext()) != value: + yield elem + break else: def select(context, result): for elem in result: if "".join(elem.itertext()) == value: yield elem - return select + def select_negated(context, result): + for elem in result: + if "".join(elem.itertext()) != value: + yield elem + return select_negated if '!=' in signature else select if signature == "-" or signature == "-()" or signature == "-()-": # [index] or [last()] or [last()-index] if signature == "-": diff --git a/Misc/NEWS.d/next/Library/2020-09-08-03-19-04.bpo-40624.0-gYfx.rst b/Misc/NEWS.d/next/Library/2020-09-08-03-19-04.bpo-40624.0-gYfx.rst new file mode 100644 index 0000000..78bad6e --- /dev/null +++ b/Misc/NEWS.d/next/Library/2020-09-08-03-19-04.bpo-40624.0-gYfx.rst @@ -0,0 +1 @@ +Added support for the XPath ``!=`` operator in xml.etree -- cgit v0.12