diff options
author | Antoine Pitrou <solipsis@pitrou.net> | 2010-02-09 16:51:16 (GMT) |
---|---|---|
committer | Antoine Pitrou <solipsis@pitrou.net> | 2010-02-09 16:51:16 (GMT) |
commit | c77dd32be4557ad5d2e5c9a710ebeaf52d5092d1 (patch) | |
tree | f77e45403d9f7dc9ad863caacb9928d6fd90fc90 | |
parent | 28a817e3bacd1de1fbca2d0cd8f2f7dd3cc72b61 (diff) | |
download | cpython-c77dd32be4557ad5d2e5c9a710ebeaf52d5092d1.zip cpython-c77dd32be4557ad5d2e5c9a710ebeaf52d5092d1.tar.gz cpython-c77dd32be4557ad5d2e5c9a710ebeaf52d5092d1.tar.bz2 |
Issue #6233: ElementTree failed converting unicode characters to XML
entities when they could't be represented in the requested output
encoding. Patch by Jerry Chen.
-rw-r--r-- | Lib/test/test_xml_etree.py | 11 | ||||
-rw-r--r-- | Lib/xml/etree/ElementTree.py | 15 | ||||
-rw-r--r-- | Misc/ACKS | 1 | ||||
-rw-r--r-- | Misc/NEWS | 4 |
4 files changed, 25 insertions, 6 deletions
diff --git a/Lib/test/test_xml_etree.py b/Lib/test/test_xml_etree.py index 895902f..a7ad48b 100644 --- a/Lib/test/test_xml_etree.py +++ b/Lib/test/test_xml_etree.py @@ -210,6 +210,17 @@ def check_encoding(ET, encoding): """ ET.XML("<?xml version='1.0' encoding='%s'?><xml />" % encoding) +def check_issue6233(): + """ + >>> from xml.etree import ElementTree as ET + + >>> e = ET.XML("<?xml version='1.0' encoding='utf-8'?><body>t\xe3g</body>") + >>> ET.tostring(e, 'ascii') + b"<?xml version='1.0' encoding='ascii'?>\\n<body>tãg</body>" + >>> e = ET.XML("<?xml version='1.0' encoding='iso-8859-1'?><body>t\xe3g</body>".encode('iso-8859-1')) # create byte string with the right encoding + >>> ET.tostring(e, 'ascii') + b"<?xml version='1.0' encoding='ascii'?>\\n<body>tãg</body>" + """ # # xinclude tests (samples from appendix C of the xinclude specification) diff --git a/Lib/xml/etree/ElementTree.py b/Lib/xml/etree/ElementTree.py index cfac4f7..c47573e 100644 --- a/Lib/xml/etree/ElementTree.py +++ b/Lib/xml/etree/ElementTree.py @@ -662,9 +662,9 @@ class ElementTree: # write XML to file tag = node.tag if tag is Comment: - file.write(_encode("<!-- %s -->" % _escape_cdata(node.text), encoding)) + file.write(b"<!-- " + _encode_cdata(node.text, encoding) + b" -->") elif tag is ProcessingInstruction: - file.write(_encode("<?%s?>" % _escape_cdata(node.text), encoding)) + file.write(b"<?" + _encode_cdata(node.text, encoding) + b"?>") else: items = list(node.items()) xmlns_items = [] # new namespaces in this scope @@ -696,7 +696,7 @@ class ElementTree: if node.text or len(node): file.write(_encode(">", encoding)) if node.text: - file.write(_encode(_escape_cdata(node.text), encoding)) + file.write(_encode_cdata(node.text, encoding)) for n in node: self._write(file, n, encoding, namespaces) file.write(_encode("</" + tag + ">", encoding)) @@ -705,7 +705,7 @@ class ElementTree: for k, v in xmlns_items: del namespaces[v] if node.tail: - file.write(_encode(_escape_cdata(node.tail), encoding)) + file.write(_encode_cdata(node.tail, encoding)) # -------------------------------------------------------------------- # helpers @@ -788,13 +788,16 @@ def _encode_entity(text, pattern=_escape): # the following functions assume an ascii-compatible encoding # (or "utf-16") -def _escape_cdata(text): +def _encode_cdata(text, encoding): # escape character data try: text = text.replace("&", "&") text = text.replace("<", "<") text = text.replace(">", ">") - return text + if encoding: + return text.encode(encoding, "xmlcharrefreplace") + else: + return text except (TypeError, AttributeError): _raise_serialization_error(text) @@ -131,6 +131,7 @@ Greg Chapman Brad Chapman David Chaum Nicolas Chauvat +Jerry Chen Michael Chermside Albert Chin-A-Young Adal Chiriliuc @@ -242,6 +242,10 @@ C-API Library ------- +- Issue #6233: ElementTree failed converting unicode characters to XML + entities when they could't be represented in the requested output + encoding. Patch by Jerry Chen. + - Issue #6003: add an argument to ``zipfile.Zipfile.writestr`` to specify the compression type. |