diff options
-rw-r--r-- | Doc/library/xml.etree.elementtree.rst | 12 | ||||
-rw-r--r-- | Lib/test/test_xml_etree.py | 117 | ||||
-rw-r--r-- | Lib/xml/etree/ElementTree.py | 53 | ||||
-rw-r--r-- | Misc/NEWS.d/next/Library/2019-08-10-18-50-04.bpo-14465.qZGC4g.rst | 2 |
4 files changed, 183 insertions, 1 deletions
diff --git a/Doc/library/xml.etree.elementtree.rst b/Doc/library/xml.etree.elementtree.rst index 9f46755..6047e6e 100644 --- a/Doc/library/xml.etree.elementtree.rst +++ b/Doc/library/xml.etree.elementtree.rst @@ -572,6 +572,18 @@ Functions .. versionadded:: 3.2 +.. function:: indent(tree, space=" ", level=0) + + Appends whitespace to the subtree to indent the tree visually. + This can be used to generate pretty-printed XML output. + *tree* can be an Element or ElementTree. *space* is the whitespace + string that will be inserted for each indentation level, two space + characters by default. For indenting partial subtrees inside of an + already indented tree, pass the initial indentation level as *level*. + + .. versionadded:: 3.9 + + .. function:: iselement(element) Checks if an object appears to be a valid element object. *element* is an diff --git a/Lib/test/test_xml_etree.py b/Lib/test/test_xml_etree.py index b2492cd..db06ace 100644 --- a/Lib/test/test_xml_etree.py +++ b/Lib/test/test_xml_etree.py @@ -788,6 +788,123 @@ class ElementTreeTest(unittest.TestCase): elem = ET.fromstring("<html><body>text</body></html>") self.assertEqual(ET.tostring(elem), b'<html><body>text</body></html>') + def test_indent(self): + elem = ET.XML("<root></root>") + ET.indent(elem) + self.assertEqual(ET.tostring(elem), b'<root />') + + elem = ET.XML("<html><body>text</body></html>") + ET.indent(elem) + self.assertEqual(ET.tostring(elem), b'<html>\n <body>text</body>\n</html>') + + elem = ET.XML("<html> <body>text</body> </html>") + ET.indent(elem) + self.assertEqual(ET.tostring(elem), b'<html>\n <body>text</body>\n</html>') + + elem = ET.XML("<html><body>text</body>tail</html>") + ET.indent(elem) + self.assertEqual(ET.tostring(elem), b'<html>\n <body>text</body>tail</html>') + + elem = ET.XML("<html><body><p>par</p>\n<p>text</p>\t<p><br/></p></body></html>") + ET.indent(elem) + self.assertEqual( + ET.tostring(elem), + b'<html>\n' + b' <body>\n' + b' <p>par</p>\n' + b' <p>text</p>\n' + b' <p>\n' + b' <br />\n' + b' </p>\n' + b' </body>\n' + b'</html>' + ) + + elem = ET.XML("<html><body><p>pre<br/>post</p><p>text</p></body></html>") + ET.indent(elem) + self.assertEqual( + ET.tostring(elem), + b'<html>\n' + b' <body>\n' + b' <p>pre<br />post</p>\n' + b' <p>text</p>\n' + b' </body>\n' + b'</html>' + ) + + def test_indent_space(self): + elem = ET.XML("<html><body><p>pre<br/>post</p><p>text</p></body></html>") + ET.indent(elem, space='\t') + self.assertEqual( + ET.tostring(elem), + b'<html>\n' + b'\t<body>\n' + b'\t\t<p>pre<br />post</p>\n' + b'\t\t<p>text</p>\n' + b'\t</body>\n' + b'</html>' + ) + + elem = ET.XML("<html><body><p>pre<br/>post</p><p>text</p></body></html>") + ET.indent(elem, space='') + self.assertEqual( + ET.tostring(elem), + b'<html>\n' + b'<body>\n' + b'<p>pre<br />post</p>\n' + b'<p>text</p>\n' + b'</body>\n' + b'</html>' + ) + + def test_indent_space_caching(self): + elem = ET.XML("<html><body><p>par</p><p>text</p><p><br/></p><p /></body></html>") + ET.indent(elem) + self.assertEqual( + {el.tail for el in elem.iter()}, + {None, "\n", "\n ", "\n "} + ) + self.assertEqual( + {el.text for el in elem.iter()}, + {None, "\n ", "\n ", "\n ", "par", "text"} + ) + self.assertEqual( + len({el.tail for el in elem.iter()}), + len({id(el.tail) for el in elem.iter()}), + ) + + def test_indent_level(self): + elem = ET.XML("<html><body><p>pre<br/>post</p><p>text</p></body></html>") + with self.assertRaises(ValueError): + ET.indent(elem, level=-1) + self.assertEqual( + ET.tostring(elem), + b"<html><body><p>pre<br />post</p><p>text</p></body></html>" + ) + + ET.indent(elem, level=2) + self.assertEqual( + ET.tostring(elem), + b'<html>\n' + b' <body>\n' + b' <p>pre<br />post</p>\n' + b' <p>text</p>\n' + b' </body>\n' + b' </html>' + ) + + elem = ET.XML("<html><body><p>pre<br/>post</p><p>text</p></body></html>") + ET.indent(elem, level=1, space=' ') + self.assertEqual( + ET.tostring(elem), + b'<html>\n' + b' <body>\n' + b' <p>pre<br />post</p>\n' + b' <p>text</p>\n' + b' </body>\n' + b' </html>' + ) + def test_tostring_default_namespace(self): elem = ET.XML('<body xmlns="http://effbot.org/ns"><tag/></body>') self.assertEqual( diff --git a/Lib/xml/etree/ElementTree.py b/Lib/xml/etree/ElementTree.py index 645e999..431ecd0 100644 --- a/Lib/xml/etree/ElementTree.py +++ b/Lib/xml/etree/ElementTree.py @@ -76,7 +76,7 @@ __all__ = [ "dump", "Element", "ElementTree", "fromstring", "fromstringlist", - "iselement", "iterparse", + "indent", "iselement", "iterparse", "parse", "ParseError", "PI", "ProcessingInstruction", "QName", @@ -1185,6 +1185,57 @@ def dump(elem): if not tail or tail[-1] != "\n": sys.stdout.write("\n") + +def indent(tree, space=" ", level=0): + """Indent an XML document by inserting newlines and indentation space + after elements. + + *tree* is the ElementTree or Element to modify. The (root) element + itself will not be changed, but the tail text of all elements in its + subtree will be adapted. + + *space* is the whitespace to insert for each indentation level, two + space characters by default. + + *level* is the initial indentation level. Setting this to a higher + value than 0 can be used for indenting subtrees that are more deeply + nested inside of a document. + """ + if isinstance(tree, ElementTree): + tree = tree.getroot() + if level < 0: + raise ValueError(f"Initial indentation level must be >= 0, got {level}") + if not len(tree): + return + + # Reduce the memory consumption by reusing indentation strings. + indentations = ["\n" + level * space] + + def _indent_children(elem, level): + # Start a new indentation level for the first child. + child_level = level + 1 + try: + child_indentation = indentations[child_level] + except IndexError: + child_indentation = indentations[level] + space + indentations.append(child_indentation) + + if not elem.text or not elem.text.strip(): + elem.text = child_indentation + + for child in elem: + if len(child): + _indent_children(child, child_level) + if not child.tail or not child.tail.strip(): + child.tail = child_indentation + + # Dedent after the last child by overwriting the previous indentation. + if not child.tail.strip(): + child.tail = indentations[level] + + _indent_children(tree, 0) + + # -------------------------------------------------------------------- # parsing diff --git a/Misc/NEWS.d/next/Library/2019-08-10-18-50-04.bpo-14465.qZGC4g.rst b/Misc/NEWS.d/next/Library/2019-08-10-18-50-04.bpo-14465.qZGC4g.rst new file mode 100644 index 0000000..5f8b7a0 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2019-08-10-18-50-04.bpo-14465.qZGC4g.rst @@ -0,0 +1,2 @@ +Add an xml.etree.ElementTree.indent() function for pretty-printing XML trees. +Contributed by Stefan Behnel. |