summaryrefslogtreecommitdiffstats
path: root/Lib
diff options
context:
space:
mode:
authorStefan Behnel <stefan_ml@behnel.de>2019-08-23 14:44:25 (GMT)
committerGitHub <noreply@github.com>2019-08-23 14:44:25 (GMT)
commitb5d3ceea48c181b3e2c6c67424317afed606bd39 (patch)
tree6ee510b9c8db32251e3aed8a16b186ac08c3f04f /Lib
parent81446fd0d4fa60042ac2752350f31004324510f9 (diff)
downloadcpython-b5d3ceea48c181b3e2c6c67424317afed606bd39.zip
cpython-b5d3ceea48c181b3e2c6c67424317afed606bd39.tar.gz
cpython-b5d3ceea48c181b3e2c6c67424317afed606bd39.tar.bz2
bpo-14465: Add an indent() function to xml.etree.ElementTree to pretty-print XML trees (GH-15200)
Diffstat (limited to 'Lib')
-rw-r--r--Lib/test/test_xml_etree.py117
-rw-r--r--Lib/xml/etree/ElementTree.py53
2 files changed, 169 insertions, 1 deletions
diff --git a/Lib/test/test_xml_etree.py b/Lib/test/test_xml_etree.py
index b2492cd..db06ace 100644
--- a/Lib/test/test_xml_etree.py
+++ b/Lib/test/test_xml_etree.py
@@ -788,6 +788,123 @@ class ElementTreeTest(unittest.TestCase):
elem = ET.fromstring("<html><body>text</body></html>")
self.assertEqual(ET.tostring(elem), b'<html><body>text</body></html>')
+ def test_indent(self):
+ elem = ET.XML("<root></root>")
+ ET.indent(elem)
+ self.assertEqual(ET.tostring(elem), b'<root />')
+
+ elem = ET.XML("<html><body>text</body></html>")
+ ET.indent(elem)
+ self.assertEqual(ET.tostring(elem), b'<html>\n <body>text</body>\n</html>')
+
+ elem = ET.XML("<html> <body>text</body> </html>")
+ ET.indent(elem)
+ self.assertEqual(ET.tostring(elem), b'<html>\n <body>text</body>\n</html>')
+
+ elem = ET.XML("<html><body>text</body>tail</html>")
+ ET.indent(elem)
+ self.assertEqual(ET.tostring(elem), b'<html>\n <body>text</body>tail</html>')
+
+ elem = ET.XML("<html><body><p>par</p>\n<p>text</p>\t<p><br/></p></body></html>")
+ ET.indent(elem)
+ self.assertEqual(
+ ET.tostring(elem),
+ b'<html>\n'
+ b' <body>\n'
+ b' <p>par</p>\n'
+ b' <p>text</p>\n'
+ b' <p>\n'
+ b' <br />\n'
+ b' </p>\n'
+ b' </body>\n'
+ b'</html>'
+ )
+
+ elem = ET.XML("<html><body><p>pre<br/>post</p><p>text</p></body></html>")
+ ET.indent(elem)
+ self.assertEqual(
+ ET.tostring(elem),
+ b'<html>\n'
+ b' <body>\n'
+ b' <p>pre<br />post</p>\n'
+ b' <p>text</p>\n'
+ b' </body>\n'
+ b'</html>'
+ )
+
+ def test_indent_space(self):
+ elem = ET.XML("<html><body><p>pre<br/>post</p><p>text</p></body></html>")
+ ET.indent(elem, space='\t')
+ self.assertEqual(
+ ET.tostring(elem),
+ b'<html>\n'
+ b'\t<body>\n'
+ b'\t\t<p>pre<br />post</p>\n'
+ b'\t\t<p>text</p>\n'
+ b'\t</body>\n'
+ b'</html>'
+ )
+
+ elem = ET.XML("<html><body><p>pre<br/>post</p><p>text</p></body></html>")
+ ET.indent(elem, space='')
+ self.assertEqual(
+ ET.tostring(elem),
+ b'<html>\n'
+ b'<body>\n'
+ b'<p>pre<br />post</p>\n'
+ b'<p>text</p>\n'
+ b'</body>\n'
+ b'</html>'
+ )
+
+ def test_indent_space_caching(self):
+ elem = ET.XML("<html><body><p>par</p><p>text</p><p><br/></p><p /></body></html>")
+ ET.indent(elem)
+ self.assertEqual(
+ {el.tail for el in elem.iter()},
+ {None, "\n", "\n ", "\n "}
+ )
+ self.assertEqual(
+ {el.text for el in elem.iter()},
+ {None, "\n ", "\n ", "\n ", "par", "text"}
+ )
+ self.assertEqual(
+ len({el.tail for el in elem.iter()}),
+ len({id(el.tail) for el in elem.iter()}),
+ )
+
+ def test_indent_level(self):
+ elem = ET.XML("<html><body><p>pre<br/>post</p><p>text</p></body></html>")
+ with self.assertRaises(ValueError):
+ ET.indent(elem, level=-1)
+ self.assertEqual(
+ ET.tostring(elem),
+ b"<html><body><p>pre<br />post</p><p>text</p></body></html>"
+ )
+
+ ET.indent(elem, level=2)
+ self.assertEqual(
+ ET.tostring(elem),
+ b'<html>\n'
+ b' <body>\n'
+ b' <p>pre<br />post</p>\n'
+ b' <p>text</p>\n'
+ b' </body>\n'
+ b' </html>'
+ )
+
+ elem = ET.XML("<html><body><p>pre<br/>post</p><p>text</p></body></html>")
+ ET.indent(elem, level=1, space=' ')
+ self.assertEqual(
+ ET.tostring(elem),
+ b'<html>\n'
+ b' <body>\n'
+ b' <p>pre<br />post</p>\n'
+ b' <p>text</p>\n'
+ b' </body>\n'
+ b' </html>'
+ )
+
def test_tostring_default_namespace(self):
elem = ET.XML('<body xmlns="http://effbot.org/ns"><tag/></body>')
self.assertEqual(
diff --git a/Lib/xml/etree/ElementTree.py b/Lib/xml/etree/ElementTree.py
index 645e999..431ecd0 100644
--- a/Lib/xml/etree/ElementTree.py
+++ b/Lib/xml/etree/ElementTree.py
@@ -76,7 +76,7 @@ __all__ = [
"dump",
"Element", "ElementTree",
"fromstring", "fromstringlist",
- "iselement", "iterparse",
+ "indent", "iselement", "iterparse",
"parse", "ParseError",
"PI", "ProcessingInstruction",
"QName",
@@ -1185,6 +1185,57 @@ def dump(elem):
if not tail or tail[-1] != "\n":
sys.stdout.write("\n")
+
+def indent(tree, space=" ", level=0):
+ """Indent an XML document by inserting newlines and indentation space
+ after elements.
+
+ *tree* is the ElementTree or Element to modify. The (root) element
+ itself will not be changed, but the tail text of all elements in its
+ subtree will be adapted.
+
+ *space* is the whitespace to insert for each indentation level, two
+ space characters by default.
+
+ *level* is the initial indentation level. Setting this to a higher
+ value than 0 can be used for indenting subtrees that are more deeply
+ nested inside of a document.
+ """
+ if isinstance(tree, ElementTree):
+ tree = tree.getroot()
+ if level < 0:
+ raise ValueError(f"Initial indentation level must be >= 0, got {level}")
+ if not len(tree):
+ return
+
+ # Reduce the memory consumption by reusing indentation strings.
+ indentations = ["\n" + level * space]
+
+ def _indent_children(elem, level):
+ # Start a new indentation level for the first child.
+ child_level = level + 1
+ try:
+ child_indentation = indentations[child_level]
+ except IndexError:
+ child_indentation = indentations[level] + space
+ indentations.append(child_indentation)
+
+ if not elem.text or not elem.text.strip():
+ elem.text = child_indentation
+
+ for child in elem:
+ if len(child):
+ _indent_children(child, child_level)
+ if not child.tail or not child.tail.strip():
+ child.tail = child_indentation
+
+ # Dedent after the last child by overwriting the previous indentation.
+ if not child.tail.strip():
+ child.tail = indentations[level]
+
+ _indent_children(tree, 0)
+
+
# --------------------------------------------------------------------
# parsing