Implement the encoding argument for toxml and toprettyxml.

Document toprettyxml.
author: Martin v. Löwis <martin@v.loewis.de> 2002-06-30 15:05:00 (GMT)
committer: Martin v. Löwis <martin@v.loewis.de> 2002-06-30 15:05:00 (GMT)
commit: 7d650ca83bfdc42e852a4a6af00b80d230ecc54a (patch)
tree: 3f421d109018e27740476facebbc2fef2e129907
parent: 2ebfd09e5818b7c6d555bcb297ecbb7cf863fe2c (diff)
download: cpython-7d650ca83bfdc42e852a4a6af00b80d230ecc54a.zip
cpython-7d650ca83bfdc42e852a4a6af00b80d230ecc54a.tar.gz
cpython-7d650ca83bfdc42e852a4a6af00b80d230ecc54a.tar.bz2
5 files changed, 68 insertions, 9 deletions
diff --git a/Doc/lib/xmldomminidom.tex b/Doc/lib/xmldomminidom.tex
index d743c7d..0d5bfea 100644
--- a/Doc/lib/xmldomminidom.tex
+++ b/Doc/lib/xmldomminidom.tex
@@ -121,10 +121,45 @@ children of that node.
 Write XML to the writer object.  The writer should have a
 \method{write()} method which matches that of the file object
 interface.
+
+\versionadded[To support pretty output, new keyword parameters indent,
+addindent, and newl have been added]{2.1}
+
+\versionadded[For the \class{Document} node, an additional keyword
+argument encoding can be used to specify the encoding field of the XML
+header]{2.3}
+
 \end{methoddesc}
 
-\begin{methoddesc}{toxml}{}
+\begin{methoddesc}{toxml}{\optional{encoding}}
 Return the XML that the DOM represents as a string.
+
+\versionadded[the \var{encoding} argument]{2.3}
+
+With no argument, the XML header does not specify an encoding, and the
+result is Unicode string if the default encoding cannot represent all
+characters in the document. Encoding this string in an encoding other
+than UTF-8 is likely incorrect, since UTF-8 is the default encoding of
+XML.
+
+With an explicit \var{encoding} argument, the result is a byte string
+in the specified encoding. It is recommended that this argument is
+always specified. To avoid UnicodeError exceptions in case of
+unrepresentable text data, the encoding argument should be specified
+as "utf-8".
+
+\end{methoddesc}
+
+\begin{methoddesc}{toprettyxml}{\optional{indent\optional{, newl}}}
+
+Return a pretty-printed version of the document. \var{indent} specifies
+the indentation string and defaults to a tabulator; \var{newl} specifies
+the string emitted at the end of each line and defaults to \\n.
+
+\versionadded{2.1}
+
+\versionadded[the encoding argument; see \method{toxml}]{2.3}
+
 \end{methoddesc}
 
 The following standard DOM methods have special considerations with
diff --git a/Lib/test/output/test_minidom b/Lib/test/output/test_minidom
index fc1017b..1612f10 100644
--- a/Lib/test/output/test_minidom
+++ b/Lib/test/output/test_minidom
@@ -98,6 +98,9 @@ Passed assertion: len(Node.allnodes) == 0
 Passed Test
 Test Succeeded testElementReprAndStr
 Passed assertion: len(Node.allnodes) == 0
+Passed testEncodings - encoding EURO SIGN
+Test Succeeded testEncodings
+Passed assertion: len(Node.allnodes) == 0
 Test Succeeded testFirstChild
 Passed assertion: len(Node.allnodes) == 0
 Test Succeeded testGetAttrLength
diff --git a/Lib/test/test_minidom.py b/Lib/test/test_minidom.py
index 4483fc5..d398d73 100644
--- a/Lib/test/test_minidom.py
+++ b/Lib/test/test_minidom.py
@@ -606,6 +606,14 @@ def testSAX2DOM():
 
     doc.unlink()
 
+def testEncodings():
+    doc = parseString('<foo>&#x20ac;</foo>')
+    confirm(doc.toxml() == u'<?xml version="1.0" ?>\n<foo>\u20ac</foo>'
+            and doc.toxml('utf-8') == '<?xml version="1.0" encoding="utf-8"?>\n<foo>\xe2\x82\xac</foo>'
+            and doc.toxml('iso-8859-15') == '<?xml version="1.0" encoding="iso-8859-15"?>\n<foo>\xa4</foo>',
+            "testEncodings - encoding EURO SIGN")
+    doc.unlink()
+
 # --- MAIN PROGRAM
 
 names = globals().keys()
diff --git a/Lib/xml/dom/minidom.py b/Lib/xml/dom/minidom.py
index cb2c4d2..33ad736 100644
--- a/Lib/xml/dom/minidom.py
+++ b/Lib/xml/dom/minidom.py
@@ -65,16 +65,22 @@ class Node(xml.dom.Node):
     def __nonzero__(self):
         return 1
 
-    def toxml(self):
-        writer = _get_StringIO()
-        self.writexml(writer)
-        return writer.getvalue()
+    def toxml(self, encoding = None):
+        return self.toprettyxml("", "", encoding)
 
-    def toprettyxml(self, indent="\t", newl="\n"):
+    def toprettyxml(self, indent="\t", newl="\n", encoding = None):
         # indent = the indentation string to prepend, per level
         # newl = the newline string to append
         writer = _get_StringIO()
-        self.writexml(writer, "", indent, newl)
+        if encoding is not None:
+            import codecs
+            # Can't use codecs.getwriter to preserve 2.0 compatibility
+            writer = codecs.lookup(encoding)[3](writer)
+        if self.nodeType == Node.DOCUMENT_NODE:
+            # Can pass encoding only to document, to put it into XML header
+            self.writexml(writer, "", indent, newl, encoding)
+        else:
+            self.writexml(writer, "", indent, newl)
         return writer.getvalue()
 
     def hasChildNodes(self):
@@ -934,8 +940,12 @@ class Document(Node):
         return _getElementsByTagNameNSHelper(self, namespaceURI, localName,
                                              NodeList())
 
-    def writexml(self, writer, indent="", addindent="", newl=""):
-        writer.write('<?xml version="1.0" ?>\n')
+    def writexml(self, writer, indent="", addindent="", newl="",
+                 encoding = None):
+        if encoding is None:
+            writer.write('<?xml version="1.0" ?>\n')
+        else:
+            writer.write('<?xml version="1.0" encoding="%s"?>\n' % encoding)
         for node in self.childNodes:
             node.writexml(writer, indent, addindent, newl)
 
diff --git a/Misc/NEWS b/Misc/NEWS
index 9fb542d..defbc47 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -170,6 +170,9 @@ Extension modules
 
 Library
 
+- xml.dom.minidom.toxml and toprettyxml now take an optional encoding
+  argument.
+
 - Some fixes in the copy module: when an object is copied through its
   __reduce__ method, there was no check for a __setstate__ method on
   the result [SF patch 565085]; deepcopy should treat instances of
author	Martin v. Löwis <martin@v.loewis.de>	2002-06-30 15:05:00 (GMT)
committer	Martin v. Löwis <martin@v.loewis.de>	2002-06-30 15:05:00 (GMT)
commit	7d650ca83bfdc42e852a4a6af00b80d230ecc54a (patch)
tree	3f421d109018e27740476facebbc2fef2e129907
parent	2ebfd09e5818b7c6d555bcb297ecbb7cf863fe2c (diff)
download	cpython-7d650ca83bfdc42e852a4a6af00b80d230ecc54a.zip cpython-7d650ca83bfdc42e852a4a6af00b80d230ecc54a.tar.gz cpython-7d650ca83bfdc42e852a4a6af00b80d230ecc54a.tar.bz2