summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorEli Bendersky <eliben@gmail.com>2012-07-13 06:52:39 (GMT)
committerEli Bendersky <eliben@gmail.com>2012-07-13 06:52:39 (GMT)
commit8a80502d2c80e4af771136bf7cefb6ecd7c7df4a (patch)
treeff3789e4a75380a867728f64d9c48fdd3aab945b
parentb674dcf53eb2d17929214b042506c18df85a53d5 (diff)
downloadcpython-8a80502d2c80e4af771136bf7cefb6ecd7c7df4a.zip
cpython-8a80502d2c80e4af771136bf7cefb6ecd7c7df4a.tar.gz
cpython-8a80502d2c80e4af771136bf7cefb6ecd7c7df4a.tar.bz2
Issue #15296: Fix minidom.toxml/toprettyxml for non-unicode encodings. Patch by Serhiy Storchaka, with some minor style adjustments by me.
-rw-r--r--Doc/library/xml.dom.minidom.rst7
-rw-r--r--Lib/test/test_minidom.py5
-rw-r--r--Lib/xml/dom/minidom.py22
3 files changed, 18 insertions, 16 deletions
diff --git a/Doc/library/xml.dom.minidom.rst b/Doc/library/xml.dom.minidom.rst
index ae286b0..a5c6fb2 100644
--- a/Doc/library/xml.dom.minidom.rst
+++ b/Doc/library/xml.dom.minidom.rst
@@ -147,12 +147,7 @@ module documentation. This section lists the differences between the API and
the DOM node.
With an explicit *encoding* [1]_ argument, the result is a byte
- string in the specified encoding. It is recommended that you
- always specify an encoding; you may use any encoding you like, but
- an argument of "utf-8" is the most common choice, avoiding
- :exc:`UnicodeError` exceptions in case of unrepresentable text
- data.
-
+ string in the specified encoding.
With no *encoding* argument, the result is a Unicode string, and the
XML declaration in the resulting string does not specify an
encoding. Encoding this string in an encoding other than UTF-8 is
diff --git a/Lib/test/test_minidom.py b/Lib/test/test_minidom.py
index cc4c95b..0427ba3 100644
--- a/Lib/test/test_minidom.py
+++ b/Lib/test/test_minidom.py
@@ -1067,6 +1067,11 @@ class MinidomTest(unittest.TestCase):
b'<?xml version="1.0" encoding="utf-8"?><foo>\xe2\x82\xac</foo>')
self.assertEqual(doc.toxml('iso-8859-15'),
b'<?xml version="1.0" encoding="iso-8859-15"?><foo>\xa4</foo>')
+ self.assertEqual(doc.toxml('us-ascii'),
+ b'<?xml version="1.0" encoding="us-ascii"?><foo>&#8364;</foo>')
+ self.assertEqual(doc.toxml('utf-16'),
+ '<?xml version="1.0" encoding="utf-16"?>'
+ '<foo>\u20ac</foo>'.encode('utf-16'))
# Verify that character decoding errors throw exceptions instead
# of crashing
diff --git a/Lib/xml/dom/minidom.py b/Lib/xml/dom/minidom.py
index 275e20c..28e5030 100644
--- a/Lib/xml/dom/minidom.py
+++ b/Lib/xml/dom/minidom.py
@@ -14,7 +14,6 @@ Todo:
* SAX 2 namespaces
"""
-import codecs
import io
import xml.dom
@@ -47,19 +46,22 @@ class Node(xml.dom.Node):
return self.toprettyxml("", "", encoding)
def toprettyxml(self, indent="\t", newl="\n", encoding=None):
- # indent = the indentation string to prepend, per level
- # newl = the newline string to append
- use_encoding = "utf-8" if encoding is None else encoding
- writer = codecs.getwriter(use_encoding)(io.BytesIO())
+ if encoding is None:
+ writer = io.StringIO()
+ else:
+ writer = io.TextIOWrapper(io.BytesIO(),
+ encoding=encoding,
+ errors="xmlcharrefreplace",
+ newline='\n')
if self.nodeType == Node.DOCUMENT_NODE:
# Can pass encoding only to document, to put it into XML header
self.writexml(writer, "", indent, newl, encoding)
else:
self.writexml(writer, "", indent, newl)
if encoding is None:
- return writer.stream.getvalue().decode(use_encoding)
+ return writer.getvalue()
else:
- return writer.stream.getvalue()
+ return writer.detach().getvalue()
def hasChildNodes(self):
return bool(self.childNodes)
@@ -1788,12 +1790,12 @@ class Document(Node, DocumentLS):
raise xml.dom.NotSupportedErr("cannot import document type nodes")
return _clone_node(node, deep, self)
- def writexml(self, writer, indent="", addindent="", newl="",
- encoding = None):
+ def writexml(self, writer, indent="", addindent="", newl="", encoding=None):
if encoding is None:
writer.write('<?xml version="1.0" ?>'+newl)
else:
- writer.write('<?xml version="1.0" encoding="%s"?>%s' % (encoding, newl))
+ writer.write('<?xml version="1.0" encoding="%s"?>%s' % (
+ encoding, newl))
for node in self.childNodes:
node.writexml(writer, indent, addindent, newl)