From c77dd32be4557ad5d2e5c9a710ebeaf52d5092d1 Mon Sep 17 00:00:00 2001 From: Antoine Pitrou Date: Tue, 9 Feb 2010 16:51:16 +0000 Subject: Issue #6233: ElementTree failed converting unicode characters to XML entities when they could't be represented in the requested output encoding. Patch by Jerry Chen. --- Lib/test/test_xml_etree.py | 11 +++++++++++ Lib/xml/etree/ElementTree.py | 15 +++++++++------ Misc/ACKS | 1 + Misc/NEWS | 4 ++++ 4 files changed, 25 insertions(+), 6 deletions(-) diff --git a/Lib/test/test_xml_etree.py b/Lib/test/test_xml_etree.py index 895902f..a7ad48b 100644 --- a/Lib/test/test_xml_etree.py +++ b/Lib/test/test_xml_etree.py @@ -210,6 +210,17 @@ def check_encoding(ET, encoding): """ ET.XML("" % encoding) +def check_issue6233(): + """ + >>> from xml.etree import ElementTree as ET + + >>> e = ET.XML("t\xe3g") + >>> ET.tostring(e, 'ascii') + b"\\ntãg" + >>> e = ET.XML("t\xe3g".encode('iso-8859-1')) # create byte string with the right encoding + >>> ET.tostring(e, 'ascii') + b"\\ntãg" + """ # # xinclude tests (samples from appendix C of the xinclude specification) diff --git a/Lib/xml/etree/ElementTree.py b/Lib/xml/etree/ElementTree.py index cfac4f7..c47573e 100644 --- a/Lib/xml/etree/ElementTree.py +++ b/Lib/xml/etree/ElementTree.py @@ -662,9 +662,9 @@ class ElementTree: # write XML to file tag = node.tag if tag is Comment: - file.write(_encode("" % _escape_cdata(node.text), encoding)) + file.write(b"") elif tag is ProcessingInstruction: - file.write(_encode("" % _escape_cdata(node.text), encoding)) + file.write(b"") else: items = list(node.items()) xmlns_items = [] # new namespaces in this scope @@ -696,7 +696,7 @@ class ElementTree: if node.text or len(node): file.write(_encode(">", encoding)) if node.text: - file.write(_encode(_escape_cdata(node.text), encoding)) + file.write(_encode_cdata(node.text, encoding)) for n in node: self._write(file, n, encoding, namespaces) file.write(_encode("", encoding)) @@ -705,7 +705,7 @@ class ElementTree: for k, v in xmlns_items: del namespaces[v] if node.tail: - file.write(_encode(_escape_cdata(node.tail), encoding)) + file.write(_encode_cdata(node.tail, encoding)) # -------------------------------------------------------------------- # helpers @@ -788,13 +788,16 @@ def _encode_entity(text, pattern=_escape): # the following functions assume an ascii-compatible encoding # (or "utf-16") -def _escape_cdata(text): +def _encode_cdata(text, encoding): # escape character data try: text = text.replace("&", "&") text = text.replace("<", "<") text = text.replace(">", ">") - return text + if encoding: + return text.encode(encoding, "xmlcharrefreplace") + else: + return text except (TypeError, AttributeError): _raise_serialization_error(text) diff --git a/Misc/ACKS b/Misc/ACKS index f349b1e..b5dccde 100644 --- a/Misc/ACKS +++ b/Misc/ACKS @@ -131,6 +131,7 @@ Greg Chapman Brad Chapman David Chaum Nicolas Chauvat +Jerry Chen Michael Chermside Albert Chin-A-Young Adal Chiriliuc diff --git a/Misc/NEWS b/Misc/NEWS index 124ea0f..9d2b7cf 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -242,6 +242,10 @@ C-API Library ------- +- Issue #6233: ElementTree failed converting unicode characters to XML + entities when they could't be represented in the requested output + encoding. Patch by Jerry Chen. + - Issue #6003: add an argument to ``zipfile.Zipfile.writestr`` to specify the compression type. -- cgit v0.12