summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Doc/library/xml.etree.elementtree.rst12
-rw-r--r--Lib/test/test_xml_etree.py117
-rw-r--r--Lib/xml/etree/ElementTree.py53
-rw-r--r--Misc/NEWS.d/next/Library/2019-08-10-18-50-04.bpo-14465.qZGC4g.rst2
4 files changed, 183 insertions, 1 deletions
diff --git a/Doc/library/xml.etree.elementtree.rst b/Doc/library/xml.etree.elementtree.rst
index 9f46755..6047e6e 100644
--- a/Doc/library/xml.etree.elementtree.rst
+++ b/Doc/library/xml.etree.elementtree.rst
@@ -572,6 +572,18 @@ Functions
.. versionadded:: 3.2
+.. function:: indent(tree, space=" ", level=0)
+
+ Appends whitespace to the subtree to indent the tree visually.
+ This can be used to generate pretty-printed XML output.
+ *tree* can be an Element or ElementTree. *space* is the whitespace
+ string that will be inserted for each indentation level, two space
+ characters by default. For indenting partial subtrees inside of an
+ already indented tree, pass the initial indentation level as *level*.
+
+ .. versionadded:: 3.9
+
+
.. function:: iselement(element)
Checks if an object appears to be a valid element object. *element* is an
diff --git a/Lib/test/test_xml_etree.py b/Lib/test/test_xml_etree.py
index b2492cd..db06ace 100644
--- a/Lib/test/test_xml_etree.py
+++ b/Lib/test/test_xml_etree.py
@@ -788,6 +788,123 @@ class ElementTreeTest(unittest.TestCase):
elem = ET.fromstring("<html><body>text</body></html>")
self.assertEqual(ET.tostring(elem), b'<html><body>text</body></html>')
+ def test_indent(self):
+ elem = ET.XML("<root></root>")
+ ET.indent(elem)
+ self.assertEqual(ET.tostring(elem), b'<root />')
+
+ elem = ET.XML("<html><body>text</body></html>")
+ ET.indent(elem)
+ self.assertEqual(ET.tostring(elem), b'<html>\n <body>text</body>\n</html>')
+
+ elem = ET.XML("<html> <body>text</body> </html>")
+ ET.indent(elem)
+ self.assertEqual(ET.tostring(elem), b'<html>\n <body>text</body>\n</html>')
+
+ elem = ET.XML("<html><body>text</body>tail</html>")
+ ET.indent(elem)
+ self.assertEqual(ET.tostring(elem), b'<html>\n <body>text</body>tail</html>')
+
+ elem = ET.XML("<html><body><p>par</p>\n<p>text</p>\t<p><br/></p></body></html>")
+ ET.indent(elem)
+ self.assertEqual(
+ ET.tostring(elem),
+ b'<html>\n'
+ b' <body>\n'
+ b' <p>par</p>\n'
+ b' <p>text</p>\n'
+ b' <p>\n'
+ b' <br />\n'
+ b' </p>\n'
+ b' </body>\n'
+ b'</html>'
+ )
+
+ elem = ET.XML("<html><body><p>pre<br/>post</p><p>text</p></body></html>")
+ ET.indent(elem)
+ self.assertEqual(
+ ET.tostring(elem),
+ b'<html>\n'
+ b' <body>\n'
+ b' <p>pre<br />post</p>\n'
+ b' <p>text</p>\n'
+ b' </body>\n'
+ b'</html>'
+ )
+
+ def test_indent_space(self):
+ elem = ET.XML("<html><body><p>pre<br/>post</p><p>text</p></body></html>")
+ ET.indent(elem, space='\t')
+ self.assertEqual(
+ ET.tostring(elem),
+ b'<html>\n'
+ b'\t<body>\n'
+ b'\t\t<p>pre<br />post</p>\n'
+ b'\t\t<p>text</p>\n'
+ b'\t</body>\n'
+ b'</html>'
+ )
+
+ elem = ET.XML("<html><body><p>pre<br/>post</p><p>text</p></body></html>")
+ ET.indent(elem, space='')
+ self.assertEqual(
+ ET.tostring(elem),
+ b'<html>\n'
+ b'<body>\n'
+ b'<p>pre<br />post</p>\n'
+ b'<p>text</p>\n'
+ b'</body>\n'
+ b'</html>'
+ )
+
+ def test_indent_space_caching(self):
+ elem = ET.XML("<html><body><p>par</p><p>text</p><p><br/></p><p /></body></html>")
+ ET.indent(elem)
+ self.assertEqual(
+ {el.tail for el in elem.iter()},
+ {None, "\n", "\n ", "\n "}
+ )
+ self.assertEqual(
+ {el.text for el in elem.iter()},
+ {None, "\n ", "\n ", "\n ", "par", "text"}
+ )
+ self.assertEqual(
+ len({el.tail for el in elem.iter()}),
+ len({id(el.tail) for el in elem.iter()}),
+ )
+
+ def test_indent_level(self):
+ elem = ET.XML("<html><body><p>pre<br/>post</p><p>text</p></body></html>")
+ with self.assertRaises(ValueError):
+ ET.indent(elem, level=-1)
+ self.assertEqual(
+ ET.tostring(elem),
+ b"<html><body><p>pre<br />post</p><p>text</p></body></html>"
+ )
+
+ ET.indent(elem, level=2)
+ self.assertEqual(
+ ET.tostring(elem),
+ b'<html>\n'
+ b' <body>\n'
+ b' <p>pre<br />post</p>\n'
+ b' <p>text</p>\n'
+ b' </body>\n'
+ b' </html>'
+ )
+
+ elem = ET.XML("<html><body><p>pre<br/>post</p><p>text</p></body></html>")
+ ET.indent(elem, level=1, space=' ')
+ self.assertEqual(
+ ET.tostring(elem),
+ b'<html>\n'
+ b' <body>\n'
+ b' <p>pre<br />post</p>\n'
+ b' <p>text</p>\n'
+ b' </body>\n'
+ b' </html>'
+ )
+
def test_tostring_default_namespace(self):
elem = ET.XML('<body xmlns="http://effbot.org/ns"><tag/></body>')
self.assertEqual(
diff --git a/Lib/xml/etree/ElementTree.py b/Lib/xml/etree/ElementTree.py
index 645e999..431ecd0 100644
--- a/Lib/xml/etree/ElementTree.py
+++ b/Lib/xml/etree/ElementTree.py
@@ -76,7 +76,7 @@ __all__ = [
"dump",
"Element", "ElementTree",
"fromstring", "fromstringlist",
- "iselement", "iterparse",
+ "indent", "iselement", "iterparse",
"parse", "ParseError",
"PI", "ProcessingInstruction",
"QName",
@@ -1185,6 +1185,57 @@ def dump(elem):
if not tail or tail[-1] != "\n":
sys.stdout.write("\n")
+
+def indent(tree, space=" ", level=0):
+ """Indent an XML document by inserting newlines and indentation space
+ after elements.
+
+ *tree* is the ElementTree or Element to modify. The (root) element
+ itself will not be changed, but the tail text of all elements in its
+ subtree will be adapted.
+
+ *space* is the whitespace to insert for each indentation level, two
+ space characters by default.
+
+ *level* is the initial indentation level. Setting this to a higher
+ value than 0 can be used for indenting subtrees that are more deeply
+ nested inside of a document.
+ """
+ if isinstance(tree, ElementTree):
+ tree = tree.getroot()
+ if level < 0:
+ raise ValueError(f"Initial indentation level must be >= 0, got {level}")
+ if not len(tree):
+ return
+
+ # Reduce the memory consumption by reusing indentation strings.
+ indentations = ["\n" + level * space]
+
+ def _indent_children(elem, level):
+ # Start a new indentation level for the first child.
+ child_level = level + 1
+ try:
+ child_indentation = indentations[child_level]
+ except IndexError:
+ child_indentation = indentations[level] + space
+ indentations.append(child_indentation)
+
+ if not elem.text or not elem.text.strip():
+ elem.text = child_indentation
+
+ for child in elem:
+ if len(child):
+ _indent_children(child, child_level)
+ if not child.tail or not child.tail.strip():
+ child.tail = child_indentation
+
+ # Dedent after the last child by overwriting the previous indentation.
+ if not child.tail.strip():
+ child.tail = indentations[level]
+
+ _indent_children(tree, 0)
+
+
# --------------------------------------------------------------------
# parsing
diff --git a/Misc/NEWS.d/next/Library/2019-08-10-18-50-04.bpo-14465.qZGC4g.rst b/Misc/NEWS.d/next/Library/2019-08-10-18-50-04.bpo-14465.qZGC4g.rst
new file mode 100644
index 0000000..5f8b7a0
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2019-08-10-18-50-04.bpo-14465.qZGC4g.rst
@@ -0,0 +1,2 @@
+Add an xml.etree.ElementTree.indent() function for pretty-printing XML trees.
+Contributed by Stefan Behnel.