summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authormefistotelis <listom@gmail.com>2020-04-12 12:51:58 (GMT)
committerGitHub <noreply@github.com>2020-04-12 12:51:58 (GMT)
commit5fd8123dfdf6df0a9c29363c8327ccfa0c1d41ac (patch)
treee2e54c6f4857356fec8c334a5d12b01ae3ca7ebd
parent8f87eefe7f0576c05c488874eb9601a7a87c7312 (diff)
downloadcpython-5fd8123dfdf6df0a9c29363c8327ccfa0c1d41ac.zip
cpython-5fd8123dfdf6df0a9c29363c8327ccfa0c1d41ac.tar.gz
cpython-5fd8123dfdf6df0a9c29363c8327ccfa0c1d41ac.tar.bz2
bpo-39011: Preserve line endings within ElementTree attributes (GH-18468)
* bpo-39011: Preserve line endings within attributes Line endings within attributes were previously normalized to "\n" in Py3.7/3.8. This patch removes that normalization, as line endings which were replaced by entity numbers should be preserved in original form.
-rw-r--r--Doc/whatsnew/3.9.rst9
-rw-r--r--Lib/test/test_xml_etree.py5
-rw-r--r--Lib/xml/etree/ElementTree.py14
-rw-r--r--Misc/NEWS.d/next/Library/2020-02-12-01-48-51.bpo-39011.hGve_t.rst3
4 files changed, 22 insertions, 9 deletions
diff --git a/Doc/whatsnew/3.9.rst b/Doc/whatsnew/3.9.rst
index 3beb721..6cd80ce 100644
--- a/Doc/whatsnew/3.9.rst
+++ b/Doc/whatsnew/3.9.rst
@@ -412,6 +412,15 @@ customization consistently by always using the value specified by
case), and one used ``__VENV_NAME__`` instead.
(Contributed by Brett Cannon in :issue:`37663`.)
+xml
+---
+
+White space characters within attributes are now preserved when serializing
+:mod:`xml.etree.ElementTree` to XML file. EOLNs are no longer normalized
+to "\n". This is the result of discussion about how to interpret
+section 2.11 of XML spec.
+(Contributed by Mefistotelis in :issue:`39011`.)
+
Optimizations
=============
diff --git a/Lib/test/test_xml_etree.py b/Lib/test/test_xml_etree.py
index 785edb7..d01649d 100644
--- a/Lib/test/test_xml_etree.py
+++ b/Lib/test/test_xml_etree.py
@@ -430,13 +430,14 @@ class ElementTreeTest(unittest.TestCase):
self.assertEqual(ET.tostring(elem),
b'<test testa="testval" testb="test1" testc="test2">aa</test>')
+ # Test preserving white space chars in attributes
elem = ET.Element('test')
elem.set('a', '\r')
elem.set('b', '\r\n')
elem.set('c', '\t\n\r ')
- elem.set('d', '\n\n')
+ elem.set('d', '\n\n\r\r\t\t ')
self.assertEqual(ET.tostring(elem),
- b'<test a="&#10;" b="&#10;" c="&#09;&#10;&#10; " d="&#10;&#10;" />')
+ b'<test a="&#13;" b="&#13;&#10;" c="&#09;&#10;&#13; " d="&#10;&#10;&#13;&#13;&#09;&#09; " />')
def test_makeelement(self):
# Test makeelement handling.
diff --git a/Lib/xml/etree/ElementTree.py b/Lib/xml/etree/ElementTree.py
index c8d898f..da2bcad 100644
--- a/Lib/xml/etree/ElementTree.py
+++ b/Lib/xml/etree/ElementTree.py
@@ -1057,15 +1057,15 @@ def _escape_attrib(text):
text = text.replace(">", "&gt;")
if "\"" in text:
text = text.replace("\"", "&quot;")
- # The following business with carriage returns is to satisfy
- # Section 2.11 of the XML specification, stating that
- # CR or CR LN should be replaced with just LN
+ # Although section 2.11 of the XML specification states that CR or
+ # CR LN should be replaced with just LN, it applies only to EOLNs
+ # which take part of organizing file into lines. Within attributes,
+ # we are replacing these with entity numbers, so they do not count.
# http://www.w3.org/TR/REC-xml/#sec-line-ends
- if "\r\n" in text:
- text = text.replace("\r\n", "\n")
+ # The current solution, contained in following six lines, was
+ # discussed in issue 17582 and 39011.
if "\r" in text:
- text = text.replace("\r", "\n")
- #The following four lines are issue 17582
+ text = text.replace("\r", "&#13;")
if "\n" in text:
text = text.replace("\n", "&#10;")
if "\t" in text:
diff --git a/Misc/NEWS.d/next/Library/2020-02-12-01-48-51.bpo-39011.hGve_t.rst b/Misc/NEWS.d/next/Library/2020-02-12-01-48-51.bpo-39011.hGve_t.rst
new file mode 100644
index 0000000..43962f0
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2020-02-12-01-48-51.bpo-39011.hGve_t.rst
@@ -0,0 +1,3 @@
+Normalization of line endings in ElementTree attributes was removed, as line
+endings which were replaced by entity numbers should be preserved in
+original form.