From 345572a1a0263076081020524016eae867677cac Mon Sep 17 00:00:00 2001 From: Jannis Vajen Date: Sun, 27 Feb 2022 15:25:54 +0100 Subject: bpo-46786: Make ElementTree write the HTML tags embed, source, track, wbr as empty tags (GH-31406) See https://html.spec.whatwg.org/multipage/syntax.html#void-elements for reference. --- Lib/test/test_xml_etree.py | 5 +++-- Lib/xml/etree/ElementTree.py | 10 +++------- .../next/Library/2022-02-18-12-10-26.bpo-46786.P0xRvS.rst | 2 ++ 3 files changed, 8 insertions(+), 9 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2022-02-18-12-10-26.bpo-46786.P0xRvS.rst diff --git a/Lib/test/test_xml_etree.py b/Lib/test/test_xml_etree.py index c5292b5..35d901f 100644 --- a/Lib/test/test_xml_etree.py +++ b/Lib/test/test_xml_etree.py @@ -1350,8 +1350,9 @@ class ElementTreeTest(unittest.TestCase): def test_html_empty_elems_serialization(self): # issue 15970 # from http://www.w3.org/TR/html401/index/elements.html - for element in ['AREA', 'BASE', 'BASEFONT', 'BR', 'COL', 'FRAME', 'HR', - 'IMG', 'INPUT', 'ISINDEX', 'LINK', 'META', 'PARAM']: + for element in ['AREA', 'BASE', 'BASEFONT', 'BR', 'COL', 'EMBED', 'FRAME', + 'HR', 'IMG', 'INPUT', 'ISINDEX', 'LINK', 'META', 'PARAM', + 'SOURCE', 'TRACK', 'WBR']: for elem in [element, element.lower()]: expected = '<%s>' % elem serialized = serialize(ET.XML('<%s />' % elem), method='html') diff --git a/Lib/xml/etree/ElementTree.py b/Lib/xml/etree/ElementTree.py index e9409fd..6059e2f 100644 --- a/Lib/xml/etree/ElementTree.py +++ b/Lib/xml/etree/ElementTree.py @@ -918,13 +918,9 @@ def _serialize_xml(write, elem, qnames, namespaces, if elem.tail: write(_escape_cdata(elem.tail)) -HTML_EMPTY = ("area", "base", "basefont", "br", "col", "frame", "hr", - "img", "input", "isindex", "link", "meta", "param") - -try: - HTML_EMPTY = set(HTML_EMPTY) -except NameError: - pass +HTML_EMPTY = {"area", "base", "basefont", "br", "col", "embed", "frame", "hr", + "img", "input", "isindex", "link", "meta", "param", "source", + "track", "wbr"} def _serialize_html(write, elem, qnames, namespaces, **kwargs): tag = elem.tag diff --git a/Misc/NEWS.d/next/Library/2022-02-18-12-10-26.bpo-46786.P0xRvS.rst b/Misc/NEWS.d/next/Library/2022-02-18-12-10-26.bpo-46786.P0xRvS.rst new file mode 100644 index 0000000..e0384a8 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2022-02-18-12-10-26.bpo-46786.P0xRvS.rst @@ -0,0 +1,2 @@ +The HTML serialisation in xml.etree.ElementTree now writes ``embed``, +``source``, ``track`` and ``wbr`` as empty tags, as defined in HTML 5. -- cgit v0.12