diff options
author | Stefan Behnel <stefan_ml@behnel.de> | 2019-05-01 19:20:38 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2019-05-01 19:20:38 (GMT) |
commit | 43851a202cabce1e6be699e7177735c778b6697e (patch) | |
tree | b517d3f81642bf531c8876522d7fabf55b9155c2 /Lib | |
parent | 3d37ea25dc97e4cb024045581979570835deb13c (diff) | |
download | cpython-43851a202cabce1e6be699e7177735c778b6697e.zip cpython-43851a202cabce1e6be699e7177735c778b6697e.tar.gz cpython-43851a202cabce1e6be699e7177735c778b6697e.tar.bz2 |
bpo-36673: Implement comment/PI parsing support for the TreeBuilder in ElementTree. (#12883)
* bpo-36673: Implement comment/PI parsing support for the TreeBuilder in ElementTree.
* bpo-36673: Rewrite the comment/PI factory handling for the TreeBuilder in "_elementtree" to make it use the same factories as the ElementTree module, and to make it explicit when the comments/PIs are inserted into the tree and when they are not (which is the default).
Diffstat (limited to 'Lib')
-rw-r--r-- | Lib/test/test_xml_etree.py | 90 | ||||
-rw-r--r-- | Lib/xml/etree/ElementTree.py | 67 |
2 files changed, 150 insertions, 7 deletions
diff --git a/Lib/test/test_xml_etree.py b/Lib/test/test_xml_etree.py index e0d2cb7..8a228b8 100644 --- a/Lib/test/test_xml_etree.py +++ b/Lib/test/test_xml_etree.py @@ -1194,6 +1194,12 @@ class XMLPullParserTest(unittest.TestCase): for i in range(0, len(data), chunk_size): parser.feed(data[i:i+chunk_size]) + def assert_events(self, parser, expected): + self.assertEqual( + [(event, (elem.tag, elem.text)) + for event, elem in parser.read_events()], + expected) + def assert_event_tags(self, parser, expected): events = parser.read_events() self.assertEqual([(action, elem.tag) for action, elem in events], @@ -1276,8 +1282,10 @@ class XMLPullParserTest(unittest.TestCase): self.assert_event_tags(parser, []) parser = ET.XMLPullParser(events=('start', 'end')) - self._feed(parser, "<!-- comment -->\n") - self.assert_event_tags(parser, []) + self._feed(parser, "<!-- text here -->\n") + self.assert_events(parser, []) + + parser = ET.XMLPullParser(events=('start', 'end')) self._feed(parser, "<root>\n") self.assert_event_tags(parser, [('start', 'root')]) self._feed(parser, "<element key='value'>text</element") @@ -1314,6 +1322,33 @@ class XMLPullParserTest(unittest.TestCase): self._feed(parser, "</root>") self.assertIsNone(parser.close()) + def test_events_comment(self): + parser = ET.XMLPullParser(events=('start', 'comment', 'end')) + self._feed(parser, "<!-- text here -->\n") + self.assert_events(parser, [('comment', (ET.Comment, ' text here '))]) + self._feed(parser, "<!-- more text here -->\n") + self.assert_events(parser, [('comment', (ET.Comment, ' more text here '))]) + self._feed(parser, "<root-tag>text") + self.assert_event_tags(parser, [('start', 'root-tag')]) + self._feed(parser, "<!-- inner comment-->\n") + self.assert_events(parser, [('comment', (ET.Comment, ' inner comment'))]) + self._feed(parser, "</root-tag>\n") + self.assert_event_tags(parser, [('end', 'root-tag')]) + self._feed(parser, "<!-- outer comment -->\n") + self.assert_events(parser, [('comment', (ET.Comment, ' outer comment '))]) + + parser = ET.XMLPullParser(events=('comment',)) + self._feed(parser, "<!-- text here -->\n") + self.assert_events(parser, [('comment', (ET.Comment, ' text here '))]) + + def test_events_pi(self): + parser = ET.XMLPullParser(events=('start', 'pi', 'end')) + self._feed(parser, "<?pitarget?>\n") + self.assert_events(parser, [('pi', (ET.PI, 'pitarget'))]) + parser = ET.XMLPullParser(events=('pi',)) + self._feed(parser, "<?pitarget some text ?>\n") + self.assert_events(parser, [('pi', (ET.PI, 'pitarget some text '))]) + def test_events_sequence(self): # Test that events can be some sequence that's not just a tuple or list eventset = {'end', 'start'} @@ -1333,7 +1368,6 @@ class XMLPullParserTest(unittest.TestCase): self._feed(parser, "<foo>bar</foo>") self.assert_event_tags(parser, [('start', 'foo'), ('end', 'foo')]) - def test_unknown_event(self): with self.assertRaises(ValueError): ET.XMLPullParser(events=('start', 'end', 'bogus')) @@ -2741,6 +2775,33 @@ class TreeBuilderTest(unittest.TestCase): parser.feed(self.sample1) self.assertIsNone(parser.close()) + def test_treebuilder_comment(self): + b = ET.TreeBuilder() + self.assertEqual(b.comment('ctext').tag, ET.Comment) + self.assertEqual(b.comment('ctext').text, 'ctext') + + b = ET.TreeBuilder(comment_factory=ET.Comment) + self.assertEqual(b.comment('ctext').tag, ET.Comment) + self.assertEqual(b.comment('ctext').text, 'ctext') + + b = ET.TreeBuilder(comment_factory=len) + self.assertEqual(b.comment('ctext'), len('ctext')) + + def test_treebuilder_pi(self): + b = ET.TreeBuilder() + self.assertEqual(b.pi('target', None).tag, ET.PI) + self.assertEqual(b.pi('target', None).text, 'target') + + b = ET.TreeBuilder(pi_factory=ET.PI) + self.assertEqual(b.pi('target').tag, ET.PI) + self.assertEqual(b.pi('target').text, "target") + self.assertEqual(b.pi('pitarget', ' text ').tag, ET.PI) + self.assertEqual(b.pi('pitarget', ' text ').text, "pitarget text ") + + b = ET.TreeBuilder(pi_factory=lambda target, text: (len(target), text)) + self.assertEqual(b.pi('target'), (len('target'), None)) + self.assertEqual(b.pi('pitarget', ' text '), (len('pitarget'), ' text ')) + def test_treebuilder_elementfactory_none(self): parser = ET.XMLParser(target=ET.TreeBuilder(element_factory=None)) parser.feed(self.sample1) @@ -2761,6 +2822,21 @@ class TreeBuilderTest(unittest.TestCase): e = parser.close() self._check_sample1_element(e) + def test_subclass_comment_pi(self): + class MyTreeBuilder(ET.TreeBuilder): + def foobar(self, x): + return x * 2 + + tb = MyTreeBuilder(comment_factory=ET.Comment, pi_factory=ET.PI) + self.assertEqual(tb.foobar(10), 20) + + parser = ET.XMLParser(target=tb) + parser.feed(self.sample1) + parser.feed('<!-- a comment--><?and a pi?>') + + e = parser.close() + self._check_sample1_element(e) + def test_element_factory(self): lst = [] def myfactory(tag, attrib): @@ -3418,6 +3494,12 @@ def test_main(module=None): # Copy the path cache (should be empty) path_cache = ElementPath._cache ElementPath._cache = path_cache.copy() + # Align the Comment/PI factories. + if hasattr(ET, '_set_factories'): + old_factories = ET._set_factories(ET.Comment, ET.PI) + else: + old_factories = None + try: support.run_unittest(*test_classes) finally: @@ -3426,6 +3508,8 @@ def test_main(module=None): nsmap.clear() nsmap.update(nsmap_copy) ElementPath._cache = path_cache + if old_factories is not None: + ET._set_factories(*old_factories) # don't interfere with subsequent tests ET = pyET = None diff --git a/Lib/xml/etree/ElementTree.py b/Lib/xml/etree/ElementTree.py index c9e2f36..c640048 100644 --- a/Lib/xml/etree/ElementTree.py +++ b/Lib/xml/etree/ElementTree.py @@ -1374,12 +1374,30 @@ class TreeBuilder: *element_factory* is an optional element factory which is called to create new Element instances, as necessary. + *comment_factory* is a factory to create comments to be used instead of + the standard factory. If *insert_comments* is false (the default), + comments will not be inserted into the tree. + + *pi_factory* is a factory to create processing instructions to be used + instead of the standard factory. If *insert_pis* is false (the default), + processing instructions will not be inserted into the tree. """ - def __init__(self, element_factory=None): + def __init__(self, element_factory=None, *, + comment_factory=None, pi_factory=None, + insert_comments=False, insert_pis=False): self._data = [] # data collector self._elem = [] # element stack self._last = None # last element + self._root = None # root element self._tail = None # true if we're after an end tag + if comment_factory is None: + comment_factory = Comment + self._comment_factory = comment_factory + self.insert_comments = insert_comments + if pi_factory is None: + pi_factory = ProcessingInstruction + self._pi_factory = pi_factory + self.insert_pis = insert_pis if element_factory is None: element_factory = Element self._factory = element_factory @@ -1387,8 +1405,8 @@ class TreeBuilder: def close(self): """Flush builder buffers and return toplevel document Element.""" assert len(self._elem) == 0, "missing end tags" - assert self._last is not None, "missing toplevel element" - return self._last + assert self._root is not None, "missing toplevel element" + return self._root def _flush(self): if self._data: @@ -1417,6 +1435,8 @@ class TreeBuilder: self._last = elem = self._factory(tag, attrs) if self._elem: self._elem[-1].append(elem) + elif self._root is None: + self._root = elem self._elem.append(elem) self._tail = 0 return elem @@ -1435,6 +1455,33 @@ class TreeBuilder: self._tail = 1 return self._last + def comment(self, text): + """Create a comment using the comment_factory. + + *text* is the text of the comment. + """ + return self._handle_single( + self._comment_factory, self.insert_comments, text) + + def pi(self, target, text=None): + """Create a processing instruction using the pi_factory. + + *target* is the target name of the processing instruction. + *text* is the data of the processing instruction, or ''. + """ + return self._handle_single( + self._pi_factory, self.insert_pis, target, text) + + def _handle_single(self, factory, insert, *args): + elem = factory(*args) + if insert: + self._flush() + self._last = elem + if self._elem: + self._elem[-1].append(elem) + self._tail = 1 + return elem + # also see ElementTree and TreeBuilder class XMLParser: @@ -1519,6 +1566,15 @@ class XMLParser: def handler(prefix, event=event_name, append=append): append((event, None)) parser.EndNamespaceDeclHandler = handler + elif event_name == 'comment': + def handler(text, event=event_name, append=append, self=self): + append((event, self.target.comment(text))) + parser.CommentHandler = handler + elif event_name == 'pi': + def handler(pi_target, data, event=event_name, append=append, + self=self): + append((event, self.target.pi(pi_target, data))) + parser.ProcessingInstructionHandler = handler else: raise ValueError("unknown event %r" % event_name) @@ -1640,7 +1696,10 @@ try: # (see tests) _Element_Py = Element - # Element, SubElement, ParseError, TreeBuilder, XMLParser + # Element, SubElement, ParseError, TreeBuilder, XMLParser, _set_factories from _elementtree import * + from _elementtree import _set_factories except ImportError: pass +else: + _set_factories(Comment, ProcessingInstruction) |