diff options
author | Martin v. Löwis <martin@v.loewis.de> | 2002-06-30 15:05:00 (GMT) |
---|---|---|
committer | Martin v. Löwis <martin@v.loewis.de> | 2002-06-30 15:05:00 (GMT) |
commit | 7d650ca83bfdc42e852a4a6af00b80d230ecc54a (patch) | |
tree | 3f421d109018e27740476facebbc2fef2e129907 | |
parent | 2ebfd09e5818b7c6d555bcb297ecbb7cf863fe2c (diff) | |
download | cpython-7d650ca83bfdc42e852a4a6af00b80d230ecc54a.zip cpython-7d650ca83bfdc42e852a4a6af00b80d230ecc54a.tar.gz cpython-7d650ca83bfdc42e852a4a6af00b80d230ecc54a.tar.bz2 |
Implement the encoding argument for toxml and toprettyxml.
Document toprettyxml.
-rw-r--r-- | Doc/lib/xmldomminidom.tex | 37 | ||||
-rw-r--r-- | Lib/test/output/test_minidom | 3 | ||||
-rw-r--r-- | Lib/test/test_minidom.py | 8 | ||||
-rw-r--r-- | Lib/xml/dom/minidom.py | 26 | ||||
-rw-r--r-- | Misc/NEWS | 3 |
5 files changed, 68 insertions, 9 deletions
diff --git a/Doc/lib/xmldomminidom.tex b/Doc/lib/xmldomminidom.tex index d743c7d..0d5bfea 100644 --- a/Doc/lib/xmldomminidom.tex +++ b/Doc/lib/xmldomminidom.tex @@ -121,10 +121,45 @@ children of that node. Write XML to the writer object. The writer should have a \method{write()} method which matches that of the file object interface. + +\versionadded[To support pretty output, new keyword parameters indent, +addindent, and newl have been added]{2.1} + +\versionadded[For the \class{Document} node, an additional keyword +argument encoding can be used to specify the encoding field of the XML +header]{2.3} + \end{methoddesc} -\begin{methoddesc}{toxml}{} +\begin{methoddesc}{toxml}{\optional{encoding}} Return the XML that the DOM represents as a string. + +\versionadded[the \var{encoding} argument]{2.3} + +With no argument, the XML header does not specify an encoding, and the +result is Unicode string if the default encoding cannot represent all +characters in the document. Encoding this string in an encoding other +than UTF-8 is likely incorrect, since UTF-8 is the default encoding of +XML. + +With an explicit \var{encoding} argument, the result is a byte string +in the specified encoding. It is recommended that this argument is +always specified. To avoid UnicodeError exceptions in case of +unrepresentable text data, the encoding argument should be specified +as "utf-8". + +\end{methoddesc} + +\begin{methoddesc}{toprettyxml}{\optional{indent\optional{, newl}}} + +Return a pretty-printed version of the document. \var{indent} specifies +the indentation string and defaults to a tabulator; \var{newl} specifies +the string emitted at the end of each line and defaults to \\n. + +\versionadded{2.1} + +\versionadded[the encoding argument; see \method{toxml}]{2.3} + \end{methoddesc} The following standard DOM methods have special considerations with diff --git a/Lib/test/output/test_minidom b/Lib/test/output/test_minidom index fc1017b..1612f10 100644 --- a/Lib/test/output/test_minidom +++ b/Lib/test/output/test_minidom @@ -98,6 +98,9 @@ Passed assertion: len(Node.allnodes) == 0 Passed Test Test Succeeded testElementReprAndStr Passed assertion: len(Node.allnodes) == 0 +Passed testEncodings - encoding EURO SIGN +Test Succeeded testEncodings +Passed assertion: len(Node.allnodes) == 0 Test Succeeded testFirstChild Passed assertion: len(Node.allnodes) == 0 Test Succeeded testGetAttrLength diff --git a/Lib/test/test_minidom.py b/Lib/test/test_minidom.py index 4483fc5..d398d73 100644 --- a/Lib/test/test_minidom.py +++ b/Lib/test/test_minidom.py @@ -606,6 +606,14 @@ def testSAX2DOM(): doc.unlink() +def testEncodings(): + doc = parseString('<foo>€</foo>') + confirm(doc.toxml() == u'<?xml version="1.0" ?>\n<foo>\u20ac</foo>' + and doc.toxml('utf-8') == '<?xml version="1.0" encoding="utf-8"?>\n<foo>\xe2\x82\xac</foo>' + and doc.toxml('iso-8859-15') == '<?xml version="1.0" encoding="iso-8859-15"?>\n<foo>\xa4</foo>', + "testEncodings - encoding EURO SIGN") + doc.unlink() + # --- MAIN PROGRAM names = globals().keys() diff --git a/Lib/xml/dom/minidom.py b/Lib/xml/dom/minidom.py index cb2c4d2..33ad736 100644 --- a/Lib/xml/dom/minidom.py +++ b/Lib/xml/dom/minidom.py @@ -65,16 +65,22 @@ class Node(xml.dom.Node): def __nonzero__(self): return 1 - def toxml(self): - writer = _get_StringIO() - self.writexml(writer) - return writer.getvalue() + def toxml(self, encoding = None): + return self.toprettyxml("", "", encoding) - def toprettyxml(self, indent="\t", newl="\n"): + def toprettyxml(self, indent="\t", newl="\n", encoding = None): # indent = the indentation string to prepend, per level # newl = the newline string to append writer = _get_StringIO() - self.writexml(writer, "", indent, newl) + if encoding is not None: + import codecs + # Can't use codecs.getwriter to preserve 2.0 compatibility + writer = codecs.lookup(encoding)[3](writer) + if self.nodeType == Node.DOCUMENT_NODE: + # Can pass encoding only to document, to put it into XML header + self.writexml(writer, "", indent, newl, encoding) + else: + self.writexml(writer, "", indent, newl) return writer.getvalue() def hasChildNodes(self): @@ -934,8 +940,12 @@ class Document(Node): return _getElementsByTagNameNSHelper(self, namespaceURI, localName, NodeList()) - def writexml(self, writer, indent="", addindent="", newl=""): - writer.write('<?xml version="1.0" ?>\n') + def writexml(self, writer, indent="", addindent="", newl="", + encoding = None): + if encoding is None: + writer.write('<?xml version="1.0" ?>\n') + else: + writer.write('<?xml version="1.0" encoding="%s"?>\n' % encoding) for node in self.childNodes: node.writexml(writer, indent, addindent, newl) @@ -170,6 +170,9 @@ Extension modules Library +- xml.dom.minidom.toxml and toprettyxml now take an optional encoding + argument. + - Some fixes in the copy module: when an object is copied through its __reduce__ method, there was no check for a __setstate__ method on the result [SF patch 565085]; deepcopy should treat instances of |