diff options
author | Serhiy Storchaka <storchaka@gmail.com> | 2022-05-11 06:31:07 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-05-11 06:31:07 (GMT) |
commit | 707839b0fe02ba2c891a40f40e7a869d84c2c9c5 (patch) | |
tree | 862f7d27088ea23ecaa6da2a53b028b388ea1d9f /Lib/xml | |
parent | 75e463430efcb5b20efa93f9a5d98ccd03d83a3d (diff) | |
download | cpython-707839b0fe02ba2c891a40f40e7a869d84c2c9c5.zip cpython-707839b0fe02ba2c891a40f40e7a869d84c2c9c5.tar.gz cpython-707839b0fe02ba2c891a40f40e7a869d84c2c9c5.tar.bz2 |
gh-91810: ElementTree: Use text file's encoding by default in XML declaration (GH-91903)
ElementTree method write() and function tostring() now use the text file's
encoding ("UTF-8" if not available) instead of locale encoding in XML
declaration when encoding="unicode" is specified.
Diffstat (limited to 'Lib/xml')
-rw-r--r-- | Lib/xml/etree/ElementTree.py | 23 |
1 files changed, 9 insertions, 14 deletions
diff --git a/Lib/xml/etree/ElementTree.py b/Lib/xml/etree/ElementTree.py index 5249c7a..a5cc65e 100644 --- a/Lib/xml/etree/ElementTree.py +++ b/Lib/xml/etree/ElementTree.py @@ -728,16 +728,10 @@ class ElementTree: encoding = "utf-8" else: encoding = "us-ascii" - enc_lower = encoding.lower() - with _get_writer(file_or_filename, enc_lower) as write: + with _get_writer(file_or_filename, encoding) as (write, declared_encoding): if method == "xml" and (xml_declaration or (xml_declaration is None and - enc_lower not in ("utf-8", "us-ascii", "unicode"))): - declared_encoding = encoding - if enc_lower == "unicode": - # Retrieve the default encoding for the xml declaration - import locale - declared_encoding = locale.getpreferredencoding() + declared_encoding.lower() not in ("utf-8", "us-ascii"))): write("<?xml version='1.0' encoding='%s'?>\n" % ( declared_encoding,)) if method == "text": @@ -762,19 +756,20 @@ def _get_writer(file_or_filename, encoding): write = file_or_filename.write except AttributeError: # file_or_filename is a file name - if encoding == "unicode": - file = open(file_or_filename, "w") + if encoding.lower() == "unicode": + file = open(file_or_filename, "w", + errors="xmlcharrefreplace") else: file = open(file_or_filename, "w", encoding=encoding, errors="xmlcharrefreplace") with file: - yield file.write + yield file.write, file.encoding else: # file_or_filename is a file-like object # encoding determines if it is a text or binary writer - if encoding == "unicode": + if encoding.lower() == "unicode": # use a text writer as is - yield write + yield write, getattr(file_or_filename, "encoding", None) or "utf-8" else: # wrap a binary writer with TextIOWrapper with contextlib.ExitStack() as stack: @@ -805,7 +800,7 @@ def _get_writer(file_or_filename, encoding): # Keep the original file open when the TextIOWrapper is # destroyed stack.callback(file.detach) - yield file.write + yield file.write, encoding def _namespaces(elem, default_namespace=None): # identify namespaces used in this tree |