From 8a80502d2c80e4af771136bf7cefb6ecd7c7df4a Mon Sep 17 00:00:00 2001 From: Eli Bendersky Date: Fri, 13 Jul 2012 09:52:39 +0300 Subject: Issue #15296: Fix minidom.toxml/toprettyxml for non-unicode encodings. Patch by Serhiy Storchaka, with some minor style adjustments by me. --- Doc/library/xml.dom.minidom.rst | 7 +------ Lib/test/test_minidom.py | 5 +++++ Lib/xml/dom/minidom.py | 22 ++++++++++++---------- 3 files changed, 18 insertions(+), 16 deletions(-) diff --git a/Doc/library/xml.dom.minidom.rst b/Doc/library/xml.dom.minidom.rst index ae286b0..a5c6fb2 100644 --- a/Doc/library/xml.dom.minidom.rst +++ b/Doc/library/xml.dom.minidom.rst @@ -147,12 +147,7 @@ module documentation. This section lists the differences between the API and the DOM node. With an explicit *encoding* [1]_ argument, the result is a byte - string in the specified encoding. It is recommended that you - always specify an encoding; you may use any encoding you like, but - an argument of "utf-8" is the most common choice, avoiding - :exc:`UnicodeError` exceptions in case of unrepresentable text - data. - + string in the specified encoding. With no *encoding* argument, the result is a Unicode string, and the XML declaration in the resulting string does not specify an encoding. Encoding this string in an encoding other than UTF-8 is diff --git a/Lib/test/test_minidom.py b/Lib/test/test_minidom.py index cc4c95b..0427ba3 100644 --- a/Lib/test/test_minidom.py +++ b/Lib/test/test_minidom.py @@ -1067,6 +1067,11 @@ class MinidomTest(unittest.TestCase): b'\xe2\x82\xac') self.assertEqual(doc.toxml('iso-8859-15'), b'\xa4') + self.assertEqual(doc.toxml('us-ascii'), + b'') + self.assertEqual(doc.toxml('utf-16'), + '' + '\u20ac'.encode('utf-16')) # Verify that character decoding errors throw exceptions instead # of crashing diff --git a/Lib/xml/dom/minidom.py b/Lib/xml/dom/minidom.py index 275e20c..28e5030 100644 --- a/Lib/xml/dom/minidom.py +++ b/Lib/xml/dom/minidom.py @@ -14,7 +14,6 @@ Todo: * SAX 2 namespaces """ -import codecs import io import xml.dom @@ -47,19 +46,22 @@ class Node(xml.dom.Node): return self.toprettyxml("", "", encoding) def toprettyxml(self, indent="\t", newl="\n", encoding=None): - # indent = the indentation string to prepend, per level - # newl = the newline string to append - use_encoding = "utf-8" if encoding is None else encoding - writer = codecs.getwriter(use_encoding)(io.BytesIO()) + if encoding is None: + writer = io.StringIO() + else: + writer = io.TextIOWrapper(io.BytesIO(), + encoding=encoding, + errors="xmlcharrefreplace", + newline='\n') if self.nodeType == Node.DOCUMENT_NODE: # Can pass encoding only to document, to put it into XML header self.writexml(writer, "", indent, newl, encoding) else: self.writexml(writer, "", indent, newl) if encoding is None: - return writer.stream.getvalue().decode(use_encoding) + return writer.getvalue() else: - return writer.stream.getvalue() + return writer.detach().getvalue() def hasChildNodes(self): return bool(self.childNodes) @@ -1788,12 +1790,12 @@ class Document(Node, DocumentLS): raise xml.dom.NotSupportedErr("cannot import document type nodes") return _clone_node(node, deep, self) - def writexml(self, writer, indent="", addindent="", newl="", - encoding = None): + def writexml(self, writer, indent="", addindent="", newl="", encoding=None): if encoding is None: writer.write(''+newl) else: - writer.write('%s' % (encoding, newl)) + writer.write('%s' % ( + encoding, newl)) for node in self.childNodes: node.writexml(writer, indent, addindent, newl) -- cgit v0.12