diff options
author | Eli Bendersky <eliben@gmail.com> | 2012-07-13 06:52:39 (GMT) |
---|---|---|
committer | Eli Bendersky <eliben@gmail.com> | 2012-07-13 06:52:39 (GMT) |
commit | 8a80502d2c80e4af771136bf7cefb6ecd7c7df4a (patch) | |
tree | ff3789e4a75380a867728f64d9c48fdd3aab945b | |
parent | b674dcf53eb2d17929214b042506c18df85a53d5 (diff) | |
download | cpython-8a80502d2c80e4af771136bf7cefb6ecd7c7df4a.zip cpython-8a80502d2c80e4af771136bf7cefb6ecd7c7df4a.tar.gz cpython-8a80502d2c80e4af771136bf7cefb6ecd7c7df4a.tar.bz2 |
Issue #15296: Fix minidom.toxml/toprettyxml for non-unicode encodings. Patch by Serhiy Storchaka, with some minor style adjustments by me.
-rw-r--r-- | Doc/library/xml.dom.minidom.rst | 7 | ||||
-rw-r--r-- | Lib/test/test_minidom.py | 5 | ||||
-rw-r--r-- | Lib/xml/dom/minidom.py | 22 |
3 files changed, 18 insertions, 16 deletions
diff --git a/Doc/library/xml.dom.minidom.rst b/Doc/library/xml.dom.minidom.rst index ae286b0..a5c6fb2 100644 --- a/Doc/library/xml.dom.minidom.rst +++ b/Doc/library/xml.dom.minidom.rst @@ -147,12 +147,7 @@ module documentation. This section lists the differences between the API and the DOM node. With an explicit *encoding* [1]_ argument, the result is a byte - string in the specified encoding. It is recommended that you - always specify an encoding; you may use any encoding you like, but - an argument of "utf-8" is the most common choice, avoiding - :exc:`UnicodeError` exceptions in case of unrepresentable text - data. - + string in the specified encoding. With no *encoding* argument, the result is a Unicode string, and the XML declaration in the resulting string does not specify an encoding. Encoding this string in an encoding other than UTF-8 is diff --git a/Lib/test/test_minidom.py b/Lib/test/test_minidom.py index cc4c95b..0427ba3 100644 --- a/Lib/test/test_minidom.py +++ b/Lib/test/test_minidom.py @@ -1067,6 +1067,11 @@ class MinidomTest(unittest.TestCase): b'<?xml version="1.0" encoding="utf-8"?><foo>\xe2\x82\xac</foo>') self.assertEqual(doc.toxml('iso-8859-15'), b'<?xml version="1.0" encoding="iso-8859-15"?><foo>\xa4</foo>') + self.assertEqual(doc.toxml('us-ascii'), + b'<?xml version="1.0" encoding="us-ascii"?><foo>€</foo>') + self.assertEqual(doc.toxml('utf-16'), + '<?xml version="1.0" encoding="utf-16"?>' + '<foo>\u20ac</foo>'.encode('utf-16')) # Verify that character decoding errors throw exceptions instead # of crashing diff --git a/Lib/xml/dom/minidom.py b/Lib/xml/dom/minidom.py index 275e20c..28e5030 100644 --- a/Lib/xml/dom/minidom.py +++ b/Lib/xml/dom/minidom.py @@ -14,7 +14,6 @@ Todo: * SAX 2 namespaces """ -import codecs import io import xml.dom @@ -47,19 +46,22 @@ class Node(xml.dom.Node): return self.toprettyxml("", "", encoding) def toprettyxml(self, indent="\t", newl="\n", encoding=None): - # indent = the indentation string to prepend, per level - # newl = the newline string to append - use_encoding = "utf-8" if encoding is None else encoding - writer = codecs.getwriter(use_encoding)(io.BytesIO()) + if encoding is None: + writer = io.StringIO() + else: + writer = io.TextIOWrapper(io.BytesIO(), + encoding=encoding, + errors="xmlcharrefreplace", + newline='\n') if self.nodeType == Node.DOCUMENT_NODE: # Can pass encoding only to document, to put it into XML header self.writexml(writer, "", indent, newl, encoding) else: self.writexml(writer, "", indent, newl) if encoding is None: - return writer.stream.getvalue().decode(use_encoding) + return writer.getvalue() else: - return writer.stream.getvalue() + return writer.detach().getvalue() def hasChildNodes(self): return bool(self.childNodes) @@ -1788,12 +1790,12 @@ class Document(Node, DocumentLS): raise xml.dom.NotSupportedErr("cannot import document type nodes") return _clone_node(node, deep, self) - def writexml(self, writer, indent="", addindent="", newl="", - encoding = None): + def writexml(self, writer, indent="", addindent="", newl="", encoding=None): if encoding is None: writer.write('<?xml version="1.0" ?>'+newl) else: - writer.write('<?xml version="1.0" encoding="%s"?>%s' % (encoding, newl)) + writer.write('<?xml version="1.0" encoding="%s"?>%s' % ( + encoding, newl)) for node in self.childNodes: node.writexml(writer, indent, addindent, newl) |