diff options
author | Stefan Behnel <stefan_ml@behnel.de> | 2019-08-23 14:44:25 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2019-08-23 14:44:25 (GMT) |
commit | b5d3ceea48c181b3e2c6c67424317afed606bd39 (patch) | |
tree | 6ee510b9c8db32251e3aed8a16b186ac08c3f04f /Lib | |
parent | 81446fd0d4fa60042ac2752350f31004324510f9 (diff) | |
download | cpython-b5d3ceea48c181b3e2c6c67424317afed606bd39.zip cpython-b5d3ceea48c181b3e2c6c67424317afed606bd39.tar.gz cpython-b5d3ceea48c181b3e2c6c67424317afed606bd39.tar.bz2 |
bpo-14465: Add an indent() function to xml.etree.ElementTree to pretty-print XML trees (GH-15200)
Diffstat (limited to 'Lib')
-rw-r--r-- | Lib/test/test_xml_etree.py | 117 | ||||
-rw-r--r-- | Lib/xml/etree/ElementTree.py | 53 |
2 files changed, 169 insertions, 1 deletions
diff --git a/Lib/test/test_xml_etree.py b/Lib/test/test_xml_etree.py index b2492cd..db06ace 100644 --- a/Lib/test/test_xml_etree.py +++ b/Lib/test/test_xml_etree.py @@ -788,6 +788,123 @@ class ElementTreeTest(unittest.TestCase): elem = ET.fromstring("<html><body>text</body></html>") self.assertEqual(ET.tostring(elem), b'<html><body>text</body></html>') + def test_indent(self): + elem = ET.XML("<root></root>") + ET.indent(elem) + self.assertEqual(ET.tostring(elem), b'<root />') + + elem = ET.XML("<html><body>text</body></html>") + ET.indent(elem) + self.assertEqual(ET.tostring(elem), b'<html>\n <body>text</body>\n</html>') + + elem = ET.XML("<html> <body>text</body> </html>") + ET.indent(elem) + self.assertEqual(ET.tostring(elem), b'<html>\n <body>text</body>\n</html>') + + elem = ET.XML("<html><body>text</body>tail</html>") + ET.indent(elem) + self.assertEqual(ET.tostring(elem), b'<html>\n <body>text</body>tail</html>') + + elem = ET.XML("<html><body><p>par</p>\n<p>text</p>\t<p><br/></p></body></html>") + ET.indent(elem) + self.assertEqual( + ET.tostring(elem), + b'<html>\n' + b' <body>\n' + b' <p>par</p>\n' + b' <p>text</p>\n' + b' <p>\n' + b' <br />\n' + b' </p>\n' + b' </body>\n' + b'</html>' + ) + + elem = ET.XML("<html><body><p>pre<br/>post</p><p>text</p></body></html>") + ET.indent(elem) + self.assertEqual( + ET.tostring(elem), + b'<html>\n' + b' <body>\n' + b' <p>pre<br />post</p>\n' + b' <p>text</p>\n' + b' </body>\n' + b'</html>' + ) + + def test_indent_space(self): + elem = ET.XML("<html><body><p>pre<br/>post</p><p>text</p></body></html>") + ET.indent(elem, space='\t') + self.assertEqual( + ET.tostring(elem), + b'<html>\n' + b'\t<body>\n' + b'\t\t<p>pre<br />post</p>\n' + b'\t\t<p>text</p>\n' + b'\t</body>\n' + b'</html>' + ) + + elem = ET.XML("<html><body><p>pre<br/>post</p><p>text</p></body></html>") + ET.indent(elem, space='') + self.assertEqual( + ET.tostring(elem), + b'<html>\n' + b'<body>\n' + b'<p>pre<br />post</p>\n' + b'<p>text</p>\n' + b'</body>\n' + b'</html>' + ) + + def test_indent_space_caching(self): + elem = ET.XML("<html><body><p>par</p><p>text</p><p><br/></p><p /></body></html>") + ET.indent(elem) + self.assertEqual( + {el.tail for el in elem.iter()}, + {None, "\n", "\n ", "\n "} + ) + self.assertEqual( + {el.text for el in elem.iter()}, + {None, "\n ", "\n ", "\n ", "par", "text"} + ) + self.assertEqual( + len({el.tail for el in elem.iter()}), + len({id(el.tail) for el in elem.iter()}), + ) + + def test_indent_level(self): + elem = ET.XML("<html><body><p>pre<br/>post</p><p>text</p></body></html>") + with self.assertRaises(ValueError): + ET.indent(elem, level=-1) + self.assertEqual( + ET.tostring(elem), + b"<html><body><p>pre<br />post</p><p>text</p></body></html>" + ) + + ET.indent(elem, level=2) + self.assertEqual( + ET.tostring(elem), + b'<html>\n' + b' <body>\n' + b' <p>pre<br />post</p>\n' + b' <p>text</p>\n' + b' </body>\n' + b' </html>' + ) + + elem = ET.XML("<html><body><p>pre<br/>post</p><p>text</p></body></html>") + ET.indent(elem, level=1, space=' ') + self.assertEqual( + ET.tostring(elem), + b'<html>\n' + b' <body>\n' + b' <p>pre<br />post</p>\n' + b' <p>text</p>\n' + b' </body>\n' + b' </html>' + ) + def test_tostring_default_namespace(self): elem = ET.XML('<body xmlns="http://effbot.org/ns"><tag/></body>') self.assertEqual( diff --git a/Lib/xml/etree/ElementTree.py b/Lib/xml/etree/ElementTree.py index 645e999..431ecd0 100644 --- a/Lib/xml/etree/ElementTree.py +++ b/Lib/xml/etree/ElementTree.py @@ -76,7 +76,7 @@ __all__ = [ "dump", "Element", "ElementTree", "fromstring", "fromstringlist", - "iselement", "iterparse", + "indent", "iselement", "iterparse", "parse", "ParseError", "PI", "ProcessingInstruction", "QName", @@ -1185,6 +1185,57 @@ def dump(elem): if not tail or tail[-1] != "\n": sys.stdout.write("\n") + +def indent(tree, space=" ", level=0): + """Indent an XML document by inserting newlines and indentation space + after elements. + + *tree* is the ElementTree or Element to modify. The (root) element + itself will not be changed, but the tail text of all elements in its + subtree will be adapted. + + *space* is the whitespace to insert for each indentation level, two + space characters by default. + + *level* is the initial indentation level. Setting this to a higher + value than 0 can be used for indenting subtrees that are more deeply + nested inside of a document. + """ + if isinstance(tree, ElementTree): + tree = tree.getroot() + if level < 0: + raise ValueError(f"Initial indentation level must be >= 0, got {level}") + if not len(tree): + return + + # Reduce the memory consumption by reusing indentation strings. + indentations = ["\n" + level * space] + + def _indent_children(elem, level): + # Start a new indentation level for the first child. + child_level = level + 1 + try: + child_indentation = indentations[child_level] + except IndexError: + child_indentation = indentations[level] + space + indentations.append(child_indentation) + + if not elem.text or not elem.text.strip(): + elem.text = child_indentation + + for child in elem: + if len(child): + _indent_children(child, child_level) + if not child.tail or not child.tail.strip(): + child.tail = child_indentation + + # Dedent after the last child by overwriting the previous indentation. + if not child.tail.strip(): + child.tail = indentations[level] + + _indent_children(tree, 0) + + # -------------------------------------------------------------------- # parsing |