summaryrefslogtreecommitdiffstats
path: root/Lib
diff options
context:
space:
mode:
authorStefan Behnel <stefan_ml@behnel.de>2019-05-01 19:20:38 (GMT)
committerGitHub <noreply@github.com>2019-05-01 19:20:38 (GMT)
commit43851a202cabce1e6be699e7177735c778b6697e (patch)
treeb517d3f81642bf531c8876522d7fabf55b9155c2 /Lib
parent3d37ea25dc97e4cb024045581979570835deb13c (diff)
downloadcpython-43851a202cabce1e6be699e7177735c778b6697e.zip
cpython-43851a202cabce1e6be699e7177735c778b6697e.tar.gz
cpython-43851a202cabce1e6be699e7177735c778b6697e.tar.bz2
bpo-36673: Implement comment/PI parsing support for the TreeBuilder in ElementTree. (#12883)
* bpo-36673: Implement comment/PI parsing support for the TreeBuilder in ElementTree. * bpo-36673: Rewrite the comment/PI factory handling for the TreeBuilder in "_elementtree" to make it use the same factories as the ElementTree module, and to make it explicit when the comments/PIs are inserted into the tree and when they are not (which is the default).
Diffstat (limited to 'Lib')
-rw-r--r--Lib/test/test_xml_etree.py90
-rw-r--r--Lib/xml/etree/ElementTree.py67
2 files changed, 150 insertions, 7 deletions
diff --git a/Lib/test/test_xml_etree.py b/Lib/test/test_xml_etree.py
index e0d2cb7..8a228b8 100644
--- a/Lib/test/test_xml_etree.py
+++ b/Lib/test/test_xml_etree.py
@@ -1194,6 +1194,12 @@ class XMLPullParserTest(unittest.TestCase):
for i in range(0, len(data), chunk_size):
parser.feed(data[i:i+chunk_size])
+ def assert_events(self, parser, expected):
+ self.assertEqual(
+ [(event, (elem.tag, elem.text))
+ for event, elem in parser.read_events()],
+ expected)
+
def assert_event_tags(self, parser, expected):
events = parser.read_events()
self.assertEqual([(action, elem.tag) for action, elem in events],
@@ -1276,8 +1282,10 @@ class XMLPullParserTest(unittest.TestCase):
self.assert_event_tags(parser, [])
parser = ET.XMLPullParser(events=('start', 'end'))
- self._feed(parser, "<!-- comment -->\n")
- self.assert_event_tags(parser, [])
+ self._feed(parser, "<!-- text here -->\n")
+ self.assert_events(parser, [])
+
+ parser = ET.XMLPullParser(events=('start', 'end'))
self._feed(parser, "<root>\n")
self.assert_event_tags(parser, [('start', 'root')])
self._feed(parser, "<element key='value'>text</element")
@@ -1314,6 +1322,33 @@ class XMLPullParserTest(unittest.TestCase):
self._feed(parser, "</root>")
self.assertIsNone(parser.close())
+ def test_events_comment(self):
+ parser = ET.XMLPullParser(events=('start', 'comment', 'end'))
+ self._feed(parser, "<!-- text here -->\n")
+ self.assert_events(parser, [('comment', (ET.Comment, ' text here '))])
+ self._feed(parser, "<!-- more text here -->\n")
+ self.assert_events(parser, [('comment', (ET.Comment, ' more text here '))])
+ self._feed(parser, "<root-tag>text")
+ self.assert_event_tags(parser, [('start', 'root-tag')])
+ self._feed(parser, "<!-- inner comment-->\n")
+ self.assert_events(parser, [('comment', (ET.Comment, ' inner comment'))])
+ self._feed(parser, "</root-tag>\n")
+ self.assert_event_tags(parser, [('end', 'root-tag')])
+ self._feed(parser, "<!-- outer comment -->\n")
+ self.assert_events(parser, [('comment', (ET.Comment, ' outer comment '))])
+
+ parser = ET.XMLPullParser(events=('comment',))
+ self._feed(parser, "<!-- text here -->\n")
+ self.assert_events(parser, [('comment', (ET.Comment, ' text here '))])
+
+ def test_events_pi(self):
+ parser = ET.XMLPullParser(events=('start', 'pi', 'end'))
+ self._feed(parser, "<?pitarget?>\n")
+ self.assert_events(parser, [('pi', (ET.PI, 'pitarget'))])
+ parser = ET.XMLPullParser(events=('pi',))
+ self._feed(parser, "<?pitarget some text ?>\n")
+ self.assert_events(parser, [('pi', (ET.PI, 'pitarget some text '))])
+
def test_events_sequence(self):
# Test that events can be some sequence that's not just a tuple or list
eventset = {'end', 'start'}
@@ -1333,7 +1368,6 @@ class XMLPullParserTest(unittest.TestCase):
self._feed(parser, "<foo>bar</foo>")
self.assert_event_tags(parser, [('start', 'foo'), ('end', 'foo')])
-
def test_unknown_event(self):
with self.assertRaises(ValueError):
ET.XMLPullParser(events=('start', 'end', 'bogus'))
@@ -2741,6 +2775,33 @@ class TreeBuilderTest(unittest.TestCase):
parser.feed(self.sample1)
self.assertIsNone(parser.close())
+ def test_treebuilder_comment(self):
+ b = ET.TreeBuilder()
+ self.assertEqual(b.comment('ctext').tag, ET.Comment)
+ self.assertEqual(b.comment('ctext').text, 'ctext')
+
+ b = ET.TreeBuilder(comment_factory=ET.Comment)
+ self.assertEqual(b.comment('ctext').tag, ET.Comment)
+ self.assertEqual(b.comment('ctext').text, 'ctext')
+
+ b = ET.TreeBuilder(comment_factory=len)
+ self.assertEqual(b.comment('ctext'), len('ctext'))
+
+ def test_treebuilder_pi(self):
+ b = ET.TreeBuilder()
+ self.assertEqual(b.pi('target', None).tag, ET.PI)
+ self.assertEqual(b.pi('target', None).text, 'target')
+
+ b = ET.TreeBuilder(pi_factory=ET.PI)
+ self.assertEqual(b.pi('target').tag, ET.PI)
+ self.assertEqual(b.pi('target').text, "target")
+ self.assertEqual(b.pi('pitarget', ' text ').tag, ET.PI)
+ self.assertEqual(b.pi('pitarget', ' text ').text, "pitarget text ")
+
+ b = ET.TreeBuilder(pi_factory=lambda target, text: (len(target), text))
+ self.assertEqual(b.pi('target'), (len('target'), None))
+ self.assertEqual(b.pi('pitarget', ' text '), (len('pitarget'), ' text '))
+
def test_treebuilder_elementfactory_none(self):
parser = ET.XMLParser(target=ET.TreeBuilder(element_factory=None))
parser.feed(self.sample1)
@@ -2761,6 +2822,21 @@ class TreeBuilderTest(unittest.TestCase):
e = parser.close()
self._check_sample1_element(e)
+ def test_subclass_comment_pi(self):
+ class MyTreeBuilder(ET.TreeBuilder):
+ def foobar(self, x):
+ return x * 2
+
+ tb = MyTreeBuilder(comment_factory=ET.Comment, pi_factory=ET.PI)
+ self.assertEqual(tb.foobar(10), 20)
+
+ parser = ET.XMLParser(target=tb)
+ parser.feed(self.sample1)
+ parser.feed('<!-- a comment--><?and a pi?>')
+
+ e = parser.close()
+ self._check_sample1_element(e)
+
def test_element_factory(self):
lst = []
def myfactory(tag, attrib):
@@ -3418,6 +3494,12 @@ def test_main(module=None):
# Copy the path cache (should be empty)
path_cache = ElementPath._cache
ElementPath._cache = path_cache.copy()
+ # Align the Comment/PI factories.
+ if hasattr(ET, '_set_factories'):
+ old_factories = ET._set_factories(ET.Comment, ET.PI)
+ else:
+ old_factories = None
+
try:
support.run_unittest(*test_classes)
finally:
@@ -3426,6 +3508,8 @@ def test_main(module=None):
nsmap.clear()
nsmap.update(nsmap_copy)
ElementPath._cache = path_cache
+ if old_factories is not None:
+ ET._set_factories(*old_factories)
# don't interfere with subsequent tests
ET = pyET = None
diff --git a/Lib/xml/etree/ElementTree.py b/Lib/xml/etree/ElementTree.py
index c9e2f36..c640048 100644
--- a/Lib/xml/etree/ElementTree.py
+++ b/Lib/xml/etree/ElementTree.py
@@ -1374,12 +1374,30 @@ class TreeBuilder:
*element_factory* is an optional element factory which is called
to create new Element instances, as necessary.
+ *comment_factory* is a factory to create comments to be used instead of
+ the standard factory. If *insert_comments* is false (the default),
+ comments will not be inserted into the tree.
+
+ *pi_factory* is a factory to create processing instructions to be used
+ instead of the standard factory. If *insert_pis* is false (the default),
+ processing instructions will not be inserted into the tree.
"""
- def __init__(self, element_factory=None):
+ def __init__(self, element_factory=None, *,
+ comment_factory=None, pi_factory=None,
+ insert_comments=False, insert_pis=False):
self._data = [] # data collector
self._elem = [] # element stack
self._last = None # last element
+ self._root = None # root element
self._tail = None # true if we're after an end tag
+ if comment_factory is None:
+ comment_factory = Comment
+ self._comment_factory = comment_factory
+ self.insert_comments = insert_comments
+ if pi_factory is None:
+ pi_factory = ProcessingInstruction
+ self._pi_factory = pi_factory
+ self.insert_pis = insert_pis
if element_factory is None:
element_factory = Element
self._factory = element_factory
@@ -1387,8 +1405,8 @@ class TreeBuilder:
def close(self):
"""Flush builder buffers and return toplevel document Element."""
assert len(self._elem) == 0, "missing end tags"
- assert self._last is not None, "missing toplevel element"
- return self._last
+ assert self._root is not None, "missing toplevel element"
+ return self._root
def _flush(self):
if self._data:
@@ -1417,6 +1435,8 @@ class TreeBuilder:
self._last = elem = self._factory(tag, attrs)
if self._elem:
self._elem[-1].append(elem)
+ elif self._root is None:
+ self._root = elem
self._elem.append(elem)
self._tail = 0
return elem
@@ -1435,6 +1455,33 @@ class TreeBuilder:
self._tail = 1
return self._last
+ def comment(self, text):
+ """Create a comment using the comment_factory.
+
+ *text* is the text of the comment.
+ """
+ return self._handle_single(
+ self._comment_factory, self.insert_comments, text)
+
+ def pi(self, target, text=None):
+ """Create a processing instruction using the pi_factory.
+
+ *target* is the target name of the processing instruction.
+ *text* is the data of the processing instruction, or ''.
+ """
+ return self._handle_single(
+ self._pi_factory, self.insert_pis, target, text)
+
+ def _handle_single(self, factory, insert, *args):
+ elem = factory(*args)
+ if insert:
+ self._flush()
+ self._last = elem
+ if self._elem:
+ self._elem[-1].append(elem)
+ self._tail = 1
+ return elem
+
# also see ElementTree and TreeBuilder
class XMLParser:
@@ -1519,6 +1566,15 @@ class XMLParser:
def handler(prefix, event=event_name, append=append):
append((event, None))
parser.EndNamespaceDeclHandler = handler
+ elif event_name == 'comment':
+ def handler(text, event=event_name, append=append, self=self):
+ append((event, self.target.comment(text)))
+ parser.CommentHandler = handler
+ elif event_name == 'pi':
+ def handler(pi_target, data, event=event_name, append=append,
+ self=self):
+ append((event, self.target.pi(pi_target, data)))
+ parser.ProcessingInstructionHandler = handler
else:
raise ValueError("unknown event %r" % event_name)
@@ -1640,7 +1696,10 @@ try:
# (see tests)
_Element_Py = Element
- # Element, SubElement, ParseError, TreeBuilder, XMLParser
+ # Element, SubElement, ParseError, TreeBuilder, XMLParser, _set_factories
from _elementtree import *
+ from _elementtree import _set_factories
except ImportError:
pass
+else:
+ _set_factories(Comment, ProcessingInstruction)