summaryrefslogtreecommitdiffstats
path: root/Lib
diff options
context:
space:
mode:
authorAntoine Pitrou <solipsis@pitrou.net>2010-02-09 16:53:09 (GMT)
committerAntoine Pitrou <solipsis@pitrou.net>2010-02-09 16:53:09 (GMT)
commit54319287c9b3df17c0f6ce15b14619a9814ae292 (patch)
tree24718ad101ee49d9c2fddfe86d0bb61668b005c0 /Lib
parent0f36573f10fb1125042ada81b9feaee0c5b8a9ff (diff)
downloadcpython-54319287c9b3df17c0f6ce15b14619a9814ae292.zip
cpython-54319287c9b3df17c0f6ce15b14619a9814ae292.tar.gz
cpython-54319287c9b3df17c0f6ce15b14619a9814ae292.tar.bz2
Merged revisions 78123 via svnmerge from
svn+ssh://pythondev@svn.python.org/python/branches/py3k ........ r78123 | antoine.pitrou | 2010-02-09 17:51:16 +0100 (mar., 09 févr. 2010) | 5 lines Issue #6233: ElementTree failed converting unicode characters to XML entities when they could't be represented in the requested output encoding. Patch by Jerry Chen. ........
Diffstat (limited to 'Lib')
-rw-r--r--Lib/test/test_xml_etree.py11
-rw-r--r--Lib/xml/etree/ElementTree.py15
2 files changed, 20 insertions, 6 deletions
diff --git a/Lib/test/test_xml_etree.py b/Lib/test/test_xml_etree.py
index 895902f..a7ad48b 100644
--- a/Lib/test/test_xml_etree.py
+++ b/Lib/test/test_xml_etree.py
@@ -210,6 +210,17 @@ def check_encoding(ET, encoding):
"""
ET.XML("<?xml version='1.0' encoding='%s'?><xml />" % encoding)
+def check_issue6233():
+ """
+ >>> from xml.etree import ElementTree as ET
+
+ >>> e = ET.XML("<?xml version='1.0' encoding='utf-8'?><body>t\xe3g</body>")
+ >>> ET.tostring(e, 'ascii')
+ b"<?xml version='1.0' encoding='ascii'?>\\n<body>t&#227;g</body>"
+ >>> e = ET.XML("<?xml version='1.0' encoding='iso-8859-1'?><body>t\xe3g</body>".encode('iso-8859-1')) # create byte string with the right encoding
+ >>> ET.tostring(e, 'ascii')
+ b"<?xml version='1.0' encoding='ascii'?>\\n<body>t&#227;g</body>"
+ """
#
# xinclude tests (samples from appendix C of the xinclude specification)
diff --git a/Lib/xml/etree/ElementTree.py b/Lib/xml/etree/ElementTree.py
index cfac4f7..c47573e 100644
--- a/Lib/xml/etree/ElementTree.py
+++ b/Lib/xml/etree/ElementTree.py
@@ -662,9 +662,9 @@ class ElementTree:
# write XML to file
tag = node.tag
if tag is Comment:
- file.write(_encode("<!-- %s -->" % _escape_cdata(node.text), encoding))
+ file.write(b"<!-- " + _encode_cdata(node.text, encoding) + b" -->")
elif tag is ProcessingInstruction:
- file.write(_encode("<?%s?>" % _escape_cdata(node.text), encoding))
+ file.write(b"<?" + _encode_cdata(node.text, encoding) + b"?>")
else:
items = list(node.items())
xmlns_items = [] # new namespaces in this scope
@@ -696,7 +696,7 @@ class ElementTree:
if node.text or len(node):
file.write(_encode(">", encoding))
if node.text:
- file.write(_encode(_escape_cdata(node.text), encoding))
+ file.write(_encode_cdata(node.text, encoding))
for n in node:
self._write(file, n, encoding, namespaces)
file.write(_encode("</" + tag + ">", encoding))
@@ -705,7 +705,7 @@ class ElementTree:
for k, v in xmlns_items:
del namespaces[v]
if node.tail:
- file.write(_encode(_escape_cdata(node.tail), encoding))
+ file.write(_encode_cdata(node.tail, encoding))
# --------------------------------------------------------------------
# helpers
@@ -788,13 +788,16 @@ def _encode_entity(text, pattern=_escape):
# the following functions assume an ascii-compatible encoding
# (or "utf-16")
-def _escape_cdata(text):
+def _encode_cdata(text, encoding):
# escape character data
try:
text = text.replace("&", "&amp;")
text = text.replace("<", "&lt;")
text = text.replace(">", "&gt;")
- return text
+ if encoding:
+ return text.encode(encoding, "xmlcharrefreplace")
+ else:
+ return text
except (TypeError, AttributeError):
_raise_serialization_error(text)