summaryrefslogtreecommitdiffstats
path: root/Lib
diff options
context:
space:
mode:
authorStefan Behnel <stefan_ml@behnel.de>2019-05-01 19:49:58 (GMT)
committerGitHub <noreply@github.com>2019-05-01 19:49:58 (GMT)
commitdde3eebdaa8d2c51971ca704d53af7cbcda8bb34 (patch)
treeff16947548ec92506e63f98bbf79d9ad7af296a8 /Lib
parent43851a202cabce1e6be699e7177735c778b6697e (diff)
downloadcpython-dde3eebdaa8d2c51971ca704d53af7cbcda8bb34.zip
cpython-dde3eebdaa8d2c51971ca704d53af7cbcda8bb34.tar.gz
cpython-dde3eebdaa8d2c51971ca704d53af7cbcda8bb34.tar.bz2
bpo-36676: Namespace prefix aware parsing support for the ET.XMLParser target (GH-12885)
* bpo-36676: Implement namespace prefix aware parsing support for the XMLParser target in ElementTree.
Diffstat (limited to 'Lib')
-rw-r--r--Lib/test/test_xml_etree.py93
-rw-r--r--Lib/xml/etree/ElementTree.py30
2 files changed, 114 insertions, 9 deletions
diff --git a/Lib/test/test_xml_etree.py b/Lib/test/test_xml_etree.py
index 8a228b8..0abc42a 100644
--- a/Lib/test/test_xml_etree.py
+++ b/Lib/test/test_xml_etree.py
@@ -14,12 +14,13 @@ import locale
import operator
import pickle
import sys
+import textwrap
import types
import unittest
import warnings
import weakref
-from itertools import product
+from itertools import product, islice
from test import support
from test.support import TESTFN, findfile, import_fresh_module, gc_collect, swap_attr
@@ -694,12 +695,17 @@ class ElementTreeTest(unittest.TestCase):
self.append(("pi", target, data))
def comment(self, data):
self.append(("comment", data))
+ def start_ns(self, prefix, uri):
+ self.append(("start-ns", prefix, uri))
+ def end_ns(self, prefix):
+ self.append(("end-ns", prefix))
builder = Builder()
parser = ET.XMLParser(target=builder)
parser.feed(data)
self.assertEqual(builder, [
('pi', 'pi', 'data'),
('comment', ' comment '),
+ ('start-ns', '', 'namespace'),
('start', '{namespace}root'),
('start', '{namespace}element'),
('end', '{namespace}element'),
@@ -708,8 +714,30 @@ class ElementTreeTest(unittest.TestCase):
('start', '{namespace}empty-element'),
('end', '{namespace}empty-element'),
('end', '{namespace}root'),
+ ('end-ns', ''),
])
+ def test_custom_builder_only_end_ns(self):
+ class Builder(list):
+ def end_ns(self, prefix):
+ self.append(("end-ns", prefix))
+
+ builder = Builder()
+ parser = ET.XMLParser(target=builder)
+ parser.feed(textwrap.dedent("""\
+ <?pi data?>
+ <!-- comment -->
+ <root xmlns='namespace' xmlns:p='pns' xmlns:a='ans'>
+ <a:element key='value'>text</a:element>
+ <p:element>text</p:element>tail
+ <empty-element/>
+ </root>
+ """))
+ self.assertEqual(builder, [
+ ('end-ns', 'a'),
+ ('end-ns', 'p'),
+ ('end-ns', ''),
+ ])
# Element.getchildren() and ElementTree.getiterator() are deprecated.
@checkwarnings(("This method will be removed in future versions. "
@@ -1194,14 +1222,19 @@ class XMLPullParserTest(unittest.TestCase):
for i in range(0, len(data), chunk_size):
parser.feed(data[i:i+chunk_size])
- def assert_events(self, parser, expected):
+ def assert_events(self, parser, expected, max_events=None):
self.assertEqual(
[(event, (elem.tag, elem.text))
- for event, elem in parser.read_events()],
+ for event, elem in islice(parser.read_events(), max_events)],
expected)
- def assert_event_tags(self, parser, expected):
- events = parser.read_events()
+ def assert_event_tuples(self, parser, expected, max_events=None):
+ self.assertEqual(
+ list(islice(parser.read_events(), max_events)),
+ expected)
+
+ def assert_event_tags(self, parser, expected, max_events=None):
+ events = islice(parser.read_events(), max_events)
self.assertEqual([(action, elem.tag) for action, elem in events],
expected)
@@ -1276,6 +1309,56 @@ class XMLPullParserTest(unittest.TestCase):
self.assertEqual(list(parser.read_events()), [('end-ns', None)])
self.assertIsNone(parser.close())
+ def test_ns_events_start(self):
+ parser = ET.XMLPullParser(events=('start-ns', 'start', 'end'))
+ self._feed(parser, "<tag xmlns='abc' xmlns:p='xyz'>\n")
+ self.assert_event_tuples(parser, [
+ ('start-ns', ('', 'abc')),
+ ('start-ns', ('p', 'xyz')),
+ ], max_events=2)
+ self.assert_event_tags(parser, [
+ ('start', '{abc}tag'),
+ ], max_events=1)
+
+ self._feed(parser, "<child />\n")
+ self.assert_event_tags(parser, [
+ ('start', '{abc}child'),
+ ('end', '{abc}child'),
+ ])
+
+ self._feed(parser, "</tag>\n")
+ parser.close()
+ self.assert_event_tags(parser, [
+ ('end', '{abc}tag'),
+ ])
+
+ def test_ns_events_start_end(self):
+ parser = ET.XMLPullParser(events=('start-ns', 'start', 'end', 'end-ns'))
+ self._feed(parser, "<tag xmlns='abc' xmlns:p='xyz'>\n")
+ self.assert_event_tuples(parser, [
+ ('start-ns', ('', 'abc')),
+ ('start-ns', ('p', 'xyz')),
+ ], max_events=2)
+ self.assert_event_tags(parser, [
+ ('start', '{abc}tag'),
+ ], max_events=1)
+
+ self._feed(parser, "<child />\n")
+ self.assert_event_tags(parser, [
+ ('start', '{abc}child'),
+ ('end', '{abc}child'),
+ ])
+
+ self._feed(parser, "</tag>\n")
+ parser.close()
+ self.assert_event_tags(parser, [
+ ('end', '{abc}tag'),
+ ], max_events=1)
+ self.assert_event_tuples(parser, [
+ ('end-ns', None),
+ ('end-ns', None),
+ ])
+
def test_events(self):
parser = ET.XMLPullParser(events=())
self._feed(parser, "<root/>\n")
diff --git a/Lib/xml/etree/ElementTree.py b/Lib/xml/etree/ElementTree.py
index c640048..5b26ac7 100644
--- a/Lib/xml/etree/ElementTree.py
+++ b/Lib/xml/etree/ElementTree.py
@@ -1518,6 +1518,10 @@ class XMLParser:
parser.StartElementHandler = self._start
if hasattr(target, 'end'):
parser.EndElementHandler = self._end
+ if hasattr(target, 'start_ns'):
+ parser.StartNamespaceDeclHandler = self._start_ns
+ if hasattr(target, 'end_ns'):
+ parser.EndNamespaceDeclHandler = self._end_ns
if hasattr(target, 'data'):
parser.CharacterDataHandler = target.data
# miscellaneous callbacks
@@ -1559,12 +1563,24 @@ class XMLParser:
append((event, end(tag)))
parser.EndElementHandler = handler
elif event_name == "start-ns":
- def handler(prefix, uri, event=event_name, append=append):
- append((event, (prefix or "", uri or "")))
+ # TreeBuilder does not implement .start_ns()
+ if hasattr(self.target, "start_ns"):
+ def handler(prefix, uri, event=event_name, append=append,
+ start_ns=self._start_ns):
+ append((event, start_ns(prefix, uri)))
+ else:
+ def handler(prefix, uri, event=event_name, append=append):
+ append((event, (prefix or '', uri or '')))
parser.StartNamespaceDeclHandler = handler
elif event_name == "end-ns":
- def handler(prefix, event=event_name, append=append):
- append((event, None))
+ # TreeBuilder does not implement .end_ns()
+ if hasattr(self.target, "end_ns"):
+ def handler(prefix, event=event_name, append=append,
+ end_ns=self._end_ns):
+ append((event, end_ns(prefix)))
+ else:
+ def handler(prefix, event=event_name, append=append):
+ append((event, None))
parser.EndNamespaceDeclHandler = handler
elif event_name == 'comment':
def handler(text, event=event_name, append=append, self=self):
@@ -1595,6 +1611,12 @@ class XMLParser:
self._names[key] = name
return name
+ def _start_ns(self, prefix, uri):
+ return self.target.start_ns(prefix or '', uri or '')
+
+ def _end_ns(self, prefix):
+ return self.target.end_ns(prefix or '')
+
def _start(self, tag, attr_list):
# Handler for expat's StartElementHandler. Since ordered_attributes
# is set, the attributes are reported as a list of alternating