diff options
author | Stefan Behnel <stefan_ml@behnel.de> | 2019-05-01 19:49:58 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2019-05-01 19:49:58 (GMT) |
commit | dde3eebdaa8d2c51971ca704d53af7cbcda8bb34 (patch) | |
tree | ff16947548ec92506e63f98bbf79d9ad7af296a8 /Lib | |
parent | 43851a202cabce1e6be699e7177735c778b6697e (diff) | |
download | cpython-dde3eebdaa8d2c51971ca704d53af7cbcda8bb34.zip cpython-dde3eebdaa8d2c51971ca704d53af7cbcda8bb34.tar.gz cpython-dde3eebdaa8d2c51971ca704d53af7cbcda8bb34.tar.bz2 |
bpo-36676: Namespace prefix aware parsing support for the ET.XMLParser target (GH-12885)
* bpo-36676: Implement namespace prefix aware parsing support for the XMLParser target in ElementTree.
Diffstat (limited to 'Lib')
-rw-r--r-- | Lib/test/test_xml_etree.py | 93 | ||||
-rw-r--r-- | Lib/xml/etree/ElementTree.py | 30 |
2 files changed, 114 insertions, 9 deletions
diff --git a/Lib/test/test_xml_etree.py b/Lib/test/test_xml_etree.py index 8a228b8..0abc42a 100644 --- a/Lib/test/test_xml_etree.py +++ b/Lib/test/test_xml_etree.py @@ -14,12 +14,13 @@ import locale import operator import pickle import sys +import textwrap import types import unittest import warnings import weakref -from itertools import product +from itertools import product, islice from test import support from test.support import TESTFN, findfile, import_fresh_module, gc_collect, swap_attr @@ -694,12 +695,17 @@ class ElementTreeTest(unittest.TestCase): self.append(("pi", target, data)) def comment(self, data): self.append(("comment", data)) + def start_ns(self, prefix, uri): + self.append(("start-ns", prefix, uri)) + def end_ns(self, prefix): + self.append(("end-ns", prefix)) builder = Builder() parser = ET.XMLParser(target=builder) parser.feed(data) self.assertEqual(builder, [ ('pi', 'pi', 'data'), ('comment', ' comment '), + ('start-ns', '', 'namespace'), ('start', '{namespace}root'), ('start', '{namespace}element'), ('end', '{namespace}element'), @@ -708,8 +714,30 @@ class ElementTreeTest(unittest.TestCase): ('start', '{namespace}empty-element'), ('end', '{namespace}empty-element'), ('end', '{namespace}root'), + ('end-ns', ''), ]) + def test_custom_builder_only_end_ns(self): + class Builder(list): + def end_ns(self, prefix): + self.append(("end-ns", prefix)) + + builder = Builder() + parser = ET.XMLParser(target=builder) + parser.feed(textwrap.dedent("""\ + <?pi data?> + <!-- comment --> + <root xmlns='namespace' xmlns:p='pns' xmlns:a='ans'> + <a:element key='value'>text</a:element> + <p:element>text</p:element>tail + <empty-element/> + </root> + """)) + self.assertEqual(builder, [ + ('end-ns', 'a'), + ('end-ns', 'p'), + ('end-ns', ''), + ]) # Element.getchildren() and ElementTree.getiterator() are deprecated. @checkwarnings(("This method will be removed in future versions. " @@ -1194,14 +1222,19 @@ class XMLPullParserTest(unittest.TestCase): for i in range(0, len(data), chunk_size): parser.feed(data[i:i+chunk_size]) - def assert_events(self, parser, expected): + def assert_events(self, parser, expected, max_events=None): self.assertEqual( [(event, (elem.tag, elem.text)) - for event, elem in parser.read_events()], + for event, elem in islice(parser.read_events(), max_events)], expected) - def assert_event_tags(self, parser, expected): - events = parser.read_events() + def assert_event_tuples(self, parser, expected, max_events=None): + self.assertEqual( + list(islice(parser.read_events(), max_events)), + expected) + + def assert_event_tags(self, parser, expected, max_events=None): + events = islice(parser.read_events(), max_events) self.assertEqual([(action, elem.tag) for action, elem in events], expected) @@ -1276,6 +1309,56 @@ class XMLPullParserTest(unittest.TestCase): self.assertEqual(list(parser.read_events()), [('end-ns', None)]) self.assertIsNone(parser.close()) + def test_ns_events_start(self): + parser = ET.XMLPullParser(events=('start-ns', 'start', 'end')) + self._feed(parser, "<tag xmlns='abc' xmlns:p='xyz'>\n") + self.assert_event_tuples(parser, [ + ('start-ns', ('', 'abc')), + ('start-ns', ('p', 'xyz')), + ], max_events=2) + self.assert_event_tags(parser, [ + ('start', '{abc}tag'), + ], max_events=1) + + self._feed(parser, "<child />\n") + self.assert_event_tags(parser, [ + ('start', '{abc}child'), + ('end', '{abc}child'), + ]) + + self._feed(parser, "</tag>\n") + parser.close() + self.assert_event_tags(parser, [ + ('end', '{abc}tag'), + ]) + + def test_ns_events_start_end(self): + parser = ET.XMLPullParser(events=('start-ns', 'start', 'end', 'end-ns')) + self._feed(parser, "<tag xmlns='abc' xmlns:p='xyz'>\n") + self.assert_event_tuples(parser, [ + ('start-ns', ('', 'abc')), + ('start-ns', ('p', 'xyz')), + ], max_events=2) + self.assert_event_tags(parser, [ + ('start', '{abc}tag'), + ], max_events=1) + + self._feed(parser, "<child />\n") + self.assert_event_tags(parser, [ + ('start', '{abc}child'), + ('end', '{abc}child'), + ]) + + self._feed(parser, "</tag>\n") + parser.close() + self.assert_event_tags(parser, [ + ('end', '{abc}tag'), + ], max_events=1) + self.assert_event_tuples(parser, [ + ('end-ns', None), + ('end-ns', None), + ]) + def test_events(self): parser = ET.XMLPullParser(events=()) self._feed(parser, "<root/>\n") diff --git a/Lib/xml/etree/ElementTree.py b/Lib/xml/etree/ElementTree.py index c640048..5b26ac7 100644 --- a/Lib/xml/etree/ElementTree.py +++ b/Lib/xml/etree/ElementTree.py @@ -1518,6 +1518,10 @@ class XMLParser: parser.StartElementHandler = self._start if hasattr(target, 'end'): parser.EndElementHandler = self._end + if hasattr(target, 'start_ns'): + parser.StartNamespaceDeclHandler = self._start_ns + if hasattr(target, 'end_ns'): + parser.EndNamespaceDeclHandler = self._end_ns if hasattr(target, 'data'): parser.CharacterDataHandler = target.data # miscellaneous callbacks @@ -1559,12 +1563,24 @@ class XMLParser: append((event, end(tag))) parser.EndElementHandler = handler elif event_name == "start-ns": - def handler(prefix, uri, event=event_name, append=append): - append((event, (prefix or "", uri or ""))) + # TreeBuilder does not implement .start_ns() + if hasattr(self.target, "start_ns"): + def handler(prefix, uri, event=event_name, append=append, + start_ns=self._start_ns): + append((event, start_ns(prefix, uri))) + else: + def handler(prefix, uri, event=event_name, append=append): + append((event, (prefix or '', uri or ''))) parser.StartNamespaceDeclHandler = handler elif event_name == "end-ns": - def handler(prefix, event=event_name, append=append): - append((event, None)) + # TreeBuilder does not implement .end_ns() + if hasattr(self.target, "end_ns"): + def handler(prefix, event=event_name, append=append, + end_ns=self._end_ns): + append((event, end_ns(prefix))) + else: + def handler(prefix, event=event_name, append=append): + append((event, None)) parser.EndNamespaceDeclHandler = handler elif event_name == 'comment': def handler(text, event=event_name, append=append, self=self): @@ -1595,6 +1611,12 @@ class XMLParser: self._names[key] = name return name + def _start_ns(self, prefix, uri): + return self.target.start_ns(prefix or '', uri or '') + + def _end_ns(self, prefix): + return self.target.end_ns(prefix or '') + def _start(self, tag, attr_list): # Handler for expat's StartElementHandler. Since ordered_attributes # is set, the attributes are reported as a list of alternating |