From dc04a0571e362cd3de040771d7705cb107ae26fc Mon Sep 17 00:00:00 2001 From: Henry Harutyunyan Date: Sat, 29 Feb 2020 12:22:19 +0400 Subject: bpo-37534: Allow adding Standalone Document Declaration when generating XML documents (GH-14912) --- Doc/library/xml.dom.minidom.rst | 19 +++++++++++++--- Lib/test/test_minidom.py | 16 +++++++++++++ Lib/xml/dom/minidom.py | 26 +++++++++++++--------- Misc/ACKS | 1 + .../2019-08-20-00-02-37.bpo-37534.TvjAUi.rst | 2 ++ 5 files changed, 51 insertions(+), 13 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2019-08-20-00-02-37.bpo-37534.TvjAUi.rst diff --git a/Doc/library/xml.dom.minidom.rst b/Doc/library/xml.dom.minidom.rst index 8711242..2c78cd9 100644 --- a/Doc/library/xml.dom.minidom.rst +++ b/Doc/library/xml.dom.minidom.rst @@ -132,7 +132,8 @@ module documentation. This section lists the differences between the API and ... # Work with dom. -.. method:: Node.writexml(writer, indent="", addindent="", newl="") +.. method:: Node.writexml(writer, indent="", addindent="", newl="", + encoding=None, standalone=None) Write XML to the writer object. The writer receives texts but not bytes as input, it should have a :meth:`write` method which matches that of the file object @@ -144,11 +145,18 @@ module documentation. This section lists the differences between the API and For the :class:`Document` node, an additional keyword argument *encoding* can be used to specify the encoding field of the XML header. + Silimarly, explicitly stating the *standalone* argument causes the + standalone document declarations to be added to the prologue of the XML + document. + If the value is set to `True`, `standalone="yes"` is added, + otherwise it is set to `"no"`. + Not stating the argument will omit the declaration from the document. + .. versionchanged:: 3.8 The :meth:`writexml` method now preserves the attribute order specified by the user. -.. method:: Node.toxml(encoding=None) +.. method:: Node.toxml(encoding=None, standalone=None) Return a string or byte string containing the XML represented by the DOM node. @@ -160,11 +168,14 @@ module documentation. This section lists the differences between the API and encoding. Encoding this string in an encoding other than UTF-8 is likely incorrect, since UTF-8 is the default encoding of XML. + The *standalone* argument behaves exactly as in :meth:`writexml`. + .. versionchanged:: 3.8 The :meth:`toxml` method now preserves the attribute order specified by the user. -.. method:: Node.toprettyxml(indent="\\t", newl="\\n", encoding=None) +.. method:: Node.toprettyxml(indent="\\t", newl="\\n", encoding=None, + standalone=None) Return a pretty-printed version of the document. *indent* specifies the indentation string and defaults to a tabulator; *newl* specifies the string @@ -173,6 +184,8 @@ module documentation. This section lists the differences between the API and The *encoding* argument behaves like the corresponding argument of :meth:`toxml`. + The *standalone* argument behaves exactly as in :meth:`writexml`. + .. versionchanged:: 3.8 The :meth:`toprettyxml` method now preserves the attribute order specified by the user. diff --git a/Lib/test/test_minidom.py b/Lib/test/test_minidom.py index 7096585..1663b1f 100644 --- a/Lib/test/test_minidom.py +++ b/Lib/test/test_minidom.py @@ -1152,6 +1152,22 @@ class MinidomTest(unittest.TestCase): doc.unlink() + def testStandalone(self): + doc = parseString('') + self.assertEqual(doc.toxml(), + '\u20ac') + self.assertEqual(doc.toxml(standalone=None), + '\u20ac') + self.assertEqual(doc.toxml(standalone=True), + '\u20ac') + self.assertEqual(doc.toxml(standalone=False), + '\u20ac') + self.assertEqual(doc.toxml('utf-8', True), + b'' + b'\xe2\x82\xac') + + doc.unlink() + class UserDataHandler: called = 0 def handle(self, operation, key, data, src, dst): diff --git a/Lib/xml/dom/minidom.py b/Lib/xml/dom/minidom.py index 464420b..1083b48 100644 --- a/Lib/xml/dom/minidom.py +++ b/Lib/xml/dom/minidom.py @@ -43,10 +43,11 @@ class Node(xml.dom.Node): def __bool__(self): return True - def toxml(self, encoding=None): - return self.toprettyxml("", "", encoding) + def toxml(self, encoding=None, standalone=None): + return self.toprettyxml("", "", encoding, standalone) - def toprettyxml(self, indent="\t", newl="\n", encoding=None): + def toprettyxml(self, indent="\t", newl="\n", encoding=None, + standalone=None): if encoding is None: writer = io.StringIO() else: @@ -56,7 +57,7 @@ class Node(xml.dom.Node): newline='\n') if self.nodeType == Node.DOCUMENT_NODE: # Can pass encoding only to document, to put it into XML header - self.writexml(writer, "", indent, newl, encoding) + self.writexml(writer, "", indent, newl, encoding, standalone) else: self.writexml(writer, "", indent, newl) if encoding is None: @@ -1787,12 +1788,17 @@ class Document(Node, DocumentLS): raise xml.dom.NotSupportedErr("cannot import document type nodes") return _clone_node(node, deep, self) - def writexml(self, writer, indent="", addindent="", newl="", encoding=None): - if encoding is None: - writer.write(''+newl) - else: - writer.write('%s' % ( - encoding, newl)) + def writexml(self, writer, indent="", addindent="", newl="", encoding=None, + standalone=None): + declarations = [] + + if encoding: + declarations.append(f'encoding="{encoding}"') + if standalone is not None: + declarations.append(f'standalone="{"yes" if standalone else "no"}"') + + writer.write(f'{newl}') + for node in self.childNodes: node.writexml(writer, indent, addindent, newl) diff --git a/Misc/ACKS b/Misc/ACKS index fe24a56..1b5febb 100644 --- a/Misc/ACKS +++ b/Misc/ACKS @@ -659,6 +659,7 @@ David Harrigan Brian Harring Jonathan Hartley Travis B. Hartwell +Henrik Harutyunyan Shane Harvey Larry Hastings Tim Hatch diff --git a/Misc/NEWS.d/next/Library/2019-08-20-00-02-37.bpo-37534.TvjAUi.rst b/Misc/NEWS.d/next/Library/2019-08-20-00-02-37.bpo-37534.TvjAUi.rst new file mode 100644 index 0000000..0c9dd29 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2019-08-20-00-02-37.bpo-37534.TvjAUi.rst @@ -0,0 +1,2 @@ +When using minidom module to generate XML documents the ability to add Standalone Document Declaration is added. +All the changes are made to generate a document in compliance with Extensible Markup Language (XML) 1.0 (Fifth Edition) W3C Recommendation (available here: https://www.w3.org/TR/xml/#sec-prolog-dtd). \ No newline at end of file -- cgit v0.12