From 88efc52d7451ab648c9659e6be47cee34bf0ce43 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Sun, 10 Feb 2013 14:29:52 +0200 Subject: Issue #1470548: XMLGenerator now works with binary output streams. --- Lib/test/test_sax.py | 215 ++++++++++++++++++++++++++++++++---------------- Lib/xml/sax/saxutils.py | 67 ++++++++++----- Misc/NEWS | 2 + 3 files changed, 192 insertions(+), 92 deletions(-) diff --git a/Lib/test/test_sax.py b/Lib/test/test_sax.py index a96f0a8..218d519 100644 --- a/Lib/test/test_sax.py +++ b/Lib/test/test_sax.py @@ -13,7 +13,7 @@ from xml.sax.saxutils import XMLGenerator, escape, unescape, quoteattr, \ from xml.sax.expatreader import create_parser from xml.sax.handler import feature_namespaces from xml.sax.xmlreader import InputSource, AttributesImpl, AttributesNSImpl -from io import StringIO +from io import BytesIO, StringIO import os.path import shutil from test import support @@ -173,31 +173,29 @@ class SaxutilsTest(unittest.TestCase): # ===== XMLGenerator -start = '\n' - -class XmlgenTest(unittest.TestCase): +class XmlgenTest: def test_xmlgen_basic(self): - result = StringIO() + result = self.ioclass() gen = XMLGenerator(result) gen.startDocument() gen.startElement("doc", {}) gen.endElement("doc") gen.endDocument() - self.assertEqual(result.getvalue(), start + "") + self.assertEqual(result.getvalue(), self.xml("")) def test_xmlgen_basic_empty(self): - result = StringIO() + result = self.ioclass() gen = XMLGenerator(result, short_empty_elements=True) gen.startDocument() gen.startElement("doc", {}) gen.endElement("doc") gen.endDocument() - self.assertEqual(result.getvalue(), start + "") + self.assertEqual(result.getvalue(), self.xml("")) def test_xmlgen_content(self): - result = StringIO() + result = self.ioclass() gen = XMLGenerator(result) gen.startDocument() @@ -206,10 +204,10 @@ class XmlgenTest(unittest.TestCase): gen.endElement("doc") gen.endDocument() - self.assertEqual(result.getvalue(), start + "huhei") + self.assertEqual(result.getvalue(), self.xml("huhei")) def test_xmlgen_content_empty(self): - result = StringIO() + result = self.ioclass() gen = XMLGenerator(result, short_empty_elements=True) gen.startDocument() @@ -218,10 +216,10 @@ class XmlgenTest(unittest.TestCase): gen.endElement("doc") gen.endDocument() - self.assertEqual(result.getvalue(), start + "huhei") + self.assertEqual(result.getvalue(), self.xml("huhei")) def test_xmlgen_pi(self): - result = StringIO() + result = self.ioclass() gen = XMLGenerator(result) gen.startDocument() @@ -230,10 +228,11 @@ class XmlgenTest(unittest.TestCase): gen.endElement("doc") gen.endDocument() - self.assertEqual(result.getvalue(), start + "") + self.assertEqual(result.getvalue(), + self.xml("")) def test_xmlgen_content_escape(self): - result = StringIO() + result = self.ioclass() gen = XMLGenerator(result) gen.startDocument() @@ -243,10 +242,10 @@ class XmlgenTest(unittest.TestCase): gen.endDocument() self.assertEqual(result.getvalue(), - start + "<huhei&") + self.xml("<huhei&")) def test_xmlgen_attr_escape(self): - result = StringIO() + result = self.ioclass() gen = XMLGenerator(result) gen.startDocument() @@ -260,13 +259,43 @@ class XmlgenTest(unittest.TestCase): gen.endElement("doc") gen.endDocument() - self.assertEqual(result.getvalue(), start + - ("" - "" - "")) + self.assertEqual(result.getvalue(), self.xml( + "" + "" + "")) + + def test_xmlgen_encoding(self): + encodings = ('iso-8859-15', 'utf-8', 'utf-8-sig', + 'utf-16', 'utf-16be', 'utf-16le', + 'utf-32', 'utf-32be', 'utf-32le') + for encoding in encodings: + result = self.ioclass() + gen = XMLGenerator(result, encoding=encoding) + + gen.startDocument() + gen.startElement("doc", {"a": '\u20ac'}) + gen.characters("\u20ac") + gen.endElement("doc") + gen.endDocument() + + self.assertEqual(result.getvalue(), + self.xml('\u20ac', encoding=encoding)) + + def test_xmlgen_unencodable(self): + result = self.ioclass() + gen = XMLGenerator(result, encoding='ascii') + + gen.startDocument() + gen.startElement("doc", {"a": '\u20ac'}) + gen.characters("\u20ac") + gen.endElement("doc") + gen.endDocument() + + self.assertEqual(result.getvalue(), + self.xml('', encoding='ascii')) def test_xmlgen_ignorable(self): - result = StringIO() + result = self.ioclass() gen = XMLGenerator(result) gen.startDocument() @@ -275,10 +304,10 @@ class XmlgenTest(unittest.TestCase): gen.endElement("doc") gen.endDocument() - self.assertEqual(result.getvalue(), start + " ") + self.assertEqual(result.getvalue(), self.xml(" ")) def test_xmlgen_ignorable_empty(self): - result = StringIO() + result = self.ioclass() gen = XMLGenerator(result, short_empty_elements=True) gen.startDocument() @@ -287,10 +316,10 @@ class XmlgenTest(unittest.TestCase): gen.endElement("doc") gen.endDocument() - self.assertEqual(result.getvalue(), start + " ") + self.assertEqual(result.getvalue(), self.xml(" ")) def test_xmlgen_ns(self): - result = StringIO() + result = self.ioclass() gen = XMLGenerator(result) gen.startDocument() @@ -303,12 +332,12 @@ class XmlgenTest(unittest.TestCase): gen.endPrefixMapping("ns1") gen.endDocument() - self.assertEqual(result.getvalue(), start + \ - ('' % + self.assertEqual(result.getvalue(), self.xml( + '' % ns_uri)) def test_xmlgen_ns_empty(self): - result = StringIO() + result = self.ioclass() gen = XMLGenerator(result, short_empty_elements=True) gen.startDocument() @@ -321,12 +350,12 @@ class XmlgenTest(unittest.TestCase): gen.endPrefixMapping("ns1") gen.endDocument() - self.assertEqual(result.getvalue(), start + \ - ('' % + self.assertEqual(result.getvalue(), self.xml( + '' % ns_uri)) def test_1463026_1(self): - result = StringIO() + result = self.ioclass() gen = XMLGenerator(result) gen.startDocument() @@ -334,10 +363,10 @@ class XmlgenTest(unittest.TestCase): gen.endElementNS((None, 'a'), 'a') gen.endDocument() - self.assertEqual(result.getvalue(), start+'') + self.assertEqual(result.getvalue(), self.xml('')) def test_1463026_1_empty(self): - result = StringIO() + result = self.ioclass() gen = XMLGenerator(result, short_empty_elements=True) gen.startDocument() @@ -345,10 +374,10 @@ class XmlgenTest(unittest.TestCase): gen.endElementNS((None, 'a'), 'a') gen.endDocument() - self.assertEqual(result.getvalue(), start+'') + self.assertEqual(result.getvalue(), self.xml('')) def test_1463026_2(self): - result = StringIO() + result = self.ioclass() gen = XMLGenerator(result) gen.startDocument() @@ -358,10 +387,10 @@ class XmlgenTest(unittest.TestCase): gen.endPrefixMapping(None) gen.endDocument() - self.assertEqual(result.getvalue(), start+'') + self.assertEqual(result.getvalue(), self.xml('')) def test_1463026_2_empty(self): - result = StringIO() + result = self.ioclass() gen = XMLGenerator(result, short_empty_elements=True) gen.startDocument() @@ -371,10 +400,10 @@ class XmlgenTest(unittest.TestCase): gen.endPrefixMapping(None) gen.endDocument() - self.assertEqual(result.getvalue(), start+'') + self.assertEqual(result.getvalue(), self.xml('')) def test_1463026_3(self): - result = StringIO() + result = self.ioclass() gen = XMLGenerator(result) gen.startDocument() @@ -385,10 +414,10 @@ class XmlgenTest(unittest.TestCase): gen.endDocument() self.assertEqual(result.getvalue(), - start+'') + self.xml('')) def test_1463026_3_empty(self): - result = StringIO() + result = self.ioclass() gen = XMLGenerator(result, short_empty_elements=True) gen.startDocument() @@ -399,7 +428,7 @@ class XmlgenTest(unittest.TestCase): gen.endDocument() self.assertEqual(result.getvalue(), - start+'') + self.xml('')) def test_5027_1(self): # The xml prefix (as in xml:lang below) is reserved and bound by @@ -416,13 +445,13 @@ class XmlgenTest(unittest.TestCase): parser = make_parser() parser.setFeature(feature_namespaces, True) - result = StringIO() + result = self.ioclass() gen = XMLGenerator(result) parser.setContentHandler(gen) parser.parse(test_xml) self.assertEqual(result.getvalue(), - start + ( + self.xml( '' 'Hello' '')) @@ -435,7 +464,7 @@ class XmlgenTest(unittest.TestCase): # # This test demonstrates the bug by direct manipulation of the # XMLGenerator. - result = StringIO() + result = self.ioclass() gen = XMLGenerator(result) gen.startDocument() @@ -450,15 +479,57 @@ class XmlgenTest(unittest.TestCase): gen.endDocument() self.assertEqual(result.getvalue(), - start + ( + self.xml( '' 'Hello' '')) + def test_no_close_file(self): + result = self.ioclass() + def func(out): + gen = XMLGenerator(out) + gen.startDocument() + gen.startElement("doc", {}) + func(result) + self.assertFalse(result.closed) + +class StringXmlgenTest(XmlgenTest, unittest.TestCase): + ioclass = StringIO + + def xml(self, doc, encoding='iso-8859-1'): + return '\n%s' % (encoding, doc) + + test_xmlgen_unencodable = None + +class BytesXmlgenTest(XmlgenTest, unittest.TestCase): + ioclass = BytesIO + + def xml(self, doc, encoding='iso-8859-1'): + return ('\n%s' % + (encoding, doc)).encode(encoding, 'xmlcharrefreplace') + +class WriterXmlgenTest(BytesXmlgenTest): + class ioclass(list): + write = list.append + closed = False + + def seekable(self): + return True + + def tell(self): + # return 0 at start and not 0 after start + return len(self) + + def getvalue(self): + return b''.join(self) + + +start = b'\n' + class XMLFilterBaseTest(unittest.TestCase): def test_filter_basic(self): - result = StringIO() + result = BytesIO() gen = XMLGenerator(result) filter = XMLFilterBase() filter.setContentHandler(gen) @@ -470,7 +541,7 @@ class XMLFilterBaseTest(unittest.TestCase): filter.endElement("doc") filter.endDocument() - self.assertEqual(result.getvalue(), start + "content ") + self.assertEqual(result.getvalue(), start + b"content ") # =========================================================================== # @@ -478,7 +549,7 @@ class XMLFilterBaseTest(unittest.TestCase): # # =========================================================================== -with open(TEST_XMLFILE_OUT) as f: +with open(TEST_XMLFILE_OUT, 'rb') as f: xml_test_out = f.read() class ExpatReaderTest(XmlTestBase): @@ -487,11 +558,11 @@ class ExpatReaderTest(XmlTestBase): def test_expat_file(self): parser = create_parser() - result = StringIO() + result = BytesIO() xmlgen = XMLGenerator(result) parser.setContentHandler(xmlgen) - with open(TEST_XMLFILE) as f: + with open(TEST_XMLFILE, 'rb') as f: parser.parse(f) self.assertEqual(result.getvalue(), xml_test_out) @@ -503,7 +574,7 @@ class ExpatReaderTest(XmlTestBase): self.addCleanup(support.unlink, fname) parser = create_parser() - result = StringIO() + result = BytesIO() xmlgen = XMLGenerator(result) parser.setContentHandler(xmlgen) @@ -547,13 +618,13 @@ class ExpatReaderTest(XmlTestBase): def resolveEntity(self, publicId, systemId): inpsrc = InputSource() - inpsrc.setByteStream(StringIO("")) + inpsrc.setByteStream(BytesIO(b"")) return inpsrc def test_expat_entityresolver(self): parser = create_parser() parser.setEntityResolver(self.TestEntityResolver()) - result = StringIO() + result = BytesIO() parser.setContentHandler(XMLGenerator(result)) parser.feed('") + b"") # ===== Attributes support @@ -632,7 +703,7 @@ class ExpatReaderTest(XmlTestBase): def test_expat_inpsource_filename(self): parser = create_parser() - result = StringIO() + result = BytesIO() xmlgen = XMLGenerator(result) parser.setContentHandler(xmlgen) @@ -642,7 +713,7 @@ class ExpatReaderTest(XmlTestBase): def test_expat_inpsource_sysid(self): parser = create_parser() - result = StringIO() + result = BytesIO() xmlgen = XMLGenerator(result) parser.setContentHandler(xmlgen) @@ -657,7 +728,7 @@ class ExpatReaderTest(XmlTestBase): self.addCleanup(support.unlink, fname) parser = create_parser() - result = StringIO() + result = BytesIO() xmlgen = XMLGenerator(result) parser.setContentHandler(xmlgen) @@ -667,12 +738,12 @@ class ExpatReaderTest(XmlTestBase): def test_expat_inpsource_stream(self): parser = create_parser() - result = StringIO() + result = BytesIO() xmlgen = XMLGenerator(result) parser.setContentHandler(xmlgen) inpsrc = InputSource() - with open(TEST_XMLFILE) as f: + with open(TEST_XMLFILE, 'rb') as f: inpsrc.setByteStream(f) parser.parse(inpsrc) @@ -681,7 +752,7 @@ class ExpatReaderTest(XmlTestBase): # ===== IncrementalParser support def test_expat_incremental(self): - result = StringIO() + result = BytesIO() xmlgen = XMLGenerator(result) parser = create_parser() parser.setContentHandler(xmlgen) @@ -690,10 +761,10 @@ class ExpatReaderTest(XmlTestBase): parser.feed("") parser.close() - self.assertEqual(result.getvalue(), start + "") + self.assertEqual(result.getvalue(), start + b"") def test_expat_incremental_reset(self): - result = StringIO() + result = BytesIO() xmlgen = XMLGenerator(result) parser = create_parser() parser.setContentHandler(xmlgen) @@ -701,7 +772,7 @@ class ExpatReaderTest(XmlTestBase): parser.feed("") parser.feed("text") - result = StringIO() + result = BytesIO() xmlgen = XMLGenerator(result) parser.setContentHandler(xmlgen) parser.reset() @@ -711,12 +782,12 @@ class ExpatReaderTest(XmlTestBase): parser.feed("") parser.close() - self.assertEqual(result.getvalue(), start + "text") + self.assertEqual(result.getvalue(), start + b"text") # ===== Locator support def test_expat_locator_noinfo(self): - result = StringIO() + result = BytesIO() xmlgen = XMLGenerator(result) parser = create_parser() parser.setContentHandler(xmlgen) @@ -730,7 +801,7 @@ class ExpatReaderTest(XmlTestBase): self.assertEqual(parser.getLineNumber(), 1) def test_expat_locator_withinfo(self): - result = StringIO() + result = BytesIO() xmlgen = XMLGenerator(result) parser = create_parser() parser.setContentHandler(xmlgen) @@ -745,7 +816,7 @@ class ExpatReaderTest(XmlTestBase): shutil.copyfile(TEST_XMLFILE, fname) self.addCleanup(support.unlink, fname) - result = StringIO() + result = BytesIO() xmlgen = XMLGenerator(result) parser = create_parser() parser.setContentHandler(xmlgen) @@ -766,7 +837,7 @@ class ErrorReportingTest(unittest.TestCase): parser = create_parser() parser.setContentHandler(ContentHandler()) # do nothing source = InputSource() - source.setByteStream(StringIO("")) #ill-formed + source.setByteStream(BytesIO(b"")) #ill-formed name = "a file name" source.setSystemId(name) try: @@ -857,7 +928,9 @@ class XmlReaderTest(XmlTestBase): def test_main(): run_unittest(MakeParserTest, SaxutilsTest, - XmlgenTest, + StringXmlgenTest, + BytesXmlgenTest, + WriterXmlgenTest, ExpatReaderTest, ErrorReportingTest, XmlReaderTest) diff --git a/Lib/xml/sax/saxutils.py b/Lib/xml/sax/saxutils.py index 625bc12..a62183a 100644 --- a/Lib/xml/sax/saxutils.py +++ b/Lib/xml/sax/saxutils.py @@ -4,18 +4,10 @@ convenience of application and driver writers. """ import os, urllib.parse, urllib.request +import io from . import handler from . import xmlreader -# See whether the xmlcharrefreplace error handler is -# supported -try: - from codecs import xmlcharrefreplace_errors - _error_handling = "xmlcharrefreplace" - del xmlcharrefreplace_errors -except ImportError: - _error_handling = "strict" - def __dict_replace(s, d): """Replace substrings of a string using a dictionary.""" for key, value in d.items(): @@ -76,14 +68,50 @@ def quoteattr(data, entities={}): return data +def _gettextwriter(out, encoding): + if out is None: + import sys + return sys.stdout + + if isinstance(out, io.TextIOBase): + # use a text writer as is + return out + + # wrap a binary writer with TextIOWrapper + if isinstance(out, io.RawIOBase): + # Keep the original file open when the TextIOWrapper is + # destroyed + class _wrapper: + __class__ = out.__class__ + def __getattr__(self, name): + return getattr(out, name) + buffer = _wrapper() + buffer.close = lambda: None + else: + # This is to handle passed objects that aren't in the + # IOBase hierarchy, but just have a write method + buffer = io.BufferedIOBase() + buffer.writable = lambda: True + buffer.write = out.write + try: + # TextIOWrapper uses this methods to determine + # if BOM (for UTF-16, etc) should be added + buffer.seekable = out.seekable + buffer.tell = out.tell + except AttributeError: + pass + return io.TextIOWrapper(buffer, encoding=encoding, + errors='xmlcharrefreplace', + newline='\n', + write_through=True) + class XMLGenerator(handler.ContentHandler): def __init__(self, out=None, encoding="iso-8859-1", short_empty_elements=False): - if out is None: - import sys - out = sys.stdout handler.ContentHandler.__init__(self) - self._out = out + out = _gettextwriter(out, encoding) + self._write = out.write + self._flush = out.flush self._ns_contexts = [{}] # contains uri -> prefix dicts self._current_context = self._ns_contexts[-1] self._undeclared_ns_maps = [] @@ -91,12 +119,6 @@ class XMLGenerator(handler.ContentHandler): self._short_empty_elements = short_empty_elements self._pending_start_element = False - def _write(self, text): - if isinstance(text, str): - self._out.write(text) - else: - self._out.write(text.encode(self._encoding, _error_handling)) - def _qname(self, name): """Builds a qualified name from a (ns_url, localname) pair""" if name[0]: @@ -125,6 +147,9 @@ class XMLGenerator(handler.ContentHandler): self._write('\n' % self._encoding) + def endDocument(self): + self._flush() + def startPrefixMapping(self, prefix, uri): self._ns_contexts.append(self._current_context.copy()) self._current_context[uri] = prefix @@ -157,9 +182,9 @@ class XMLGenerator(handler.ContentHandler): for prefix, uri in self._undeclared_ns_maps: if prefix: - self._out.write(' xmlns:%s="%s"' % (prefix, uri)) + self._write(' xmlns:%s="%s"' % (prefix, uri)) else: - self._out.write(' xmlns="%s"' % uri) + self._write(' xmlns="%s"' % uri) self._undeclared_ns_maps = [] for (name, value) in attrs.items(): diff --git a/Misc/NEWS b/Misc/NEWS index cef6edb..e28509e 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -218,6 +218,8 @@ Core and Builtins Library ------- +- Issue #1470548: XMLGenerator now works with binary output streams. + - Issue #6975: os.path.realpath() now correctly resolves multiple nested symlinks on POSIX platforms. -- cgit v0.12