summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMiss Islington (bot) <31488909+miss-islington@users.noreply.github.com>2020-06-29 12:07:41 (GMT)
committerGitHub <noreply@github.com>2020-06-29 12:07:41 (GMT)
commit7731139b7af655b9f5df6d1b5493f8dfdf41d569 (patch)
tree54720f127940f2f0c02bef2fe86b36159a74e8ba
parent12bb0b69ec237a4d6d666bb385d87eb61dbb2bf5 (diff)
downloadcpython-7731139b7af655b9f5df6d1b5493f8dfdf41d569.zip
cpython-7731139b7af655b9f5df6d1b5493f8dfdf41d569.tar.gz
cpython-7731139b7af655b9f5df6d1b5493f8dfdf41d569.tar.bz2
bpo-41048: mimetypes should read the rule file using UTF-8, not the locale encoding (GH-20998)
(cherry picked from commit 7f569c9bc0079906012b3034d30fe8abc742e7fc) Co-authored-by: Srinivas Reddy Thatiparthy (శ్రీనివాస్ రెడ్డి తాటిపర్తి) <thatiparthysreenivas@gmail.com>
-rw-r--r--Lib/mimetypes.py2
-rw-r--r--Lib/test/test_mimetypes.py12
-rw-r--r--Misc/ACKS1
-rw-r--r--Misc/NEWS.d/next/Library/2020-06-20-10-16-57.bpo-41048.hEXB-B.rst2
4 files changed, 16 insertions, 1 deletions
diff --git a/Lib/mimetypes.py b/Lib/mimetypes.py
index 434f5b3..954bb0a 100644
--- a/Lib/mimetypes.py
+++ b/Lib/mimetypes.py
@@ -372,7 +372,7 @@ def init(files=None):
def read_mime_types(file):
try:
- f = open(file)
+ f = open(file, encoding='utf-8')
except OSError:
return None
with f:
diff --git a/Lib/test/test_mimetypes.py b/Lib/test/test_mimetypes.py
index 9cac6ce..683d393 100644
--- a/Lib/test/test_mimetypes.py
+++ b/Lib/test/test_mimetypes.py
@@ -67,6 +67,18 @@ class MimeTypesTestCase(unittest.TestCase):
mime_dict = mimetypes.read_mime_types(file)
eq(mime_dict[".pyunit"], "x-application/x-unittest")
+ # bpo-41048: read_mime_types should read the rule file with 'utf-8' encoding.
+ # Not with locale encoding. _bootlocale has been imported because io.open(...)
+ # uses it.
+ with support.temp_dir() as directory:
+ data = "application/no-mans-land Fran\u00E7ais"
+ file = pathlib.Path(directory, "sample.mimetype")
+ file.write_text(data, encoding='utf-8')
+ import _bootlocale
+ with support.swap_attr(_bootlocale, 'getpreferredencoding', lambda do_setlocale=True: 'ASCII'):
+ mime_dict = mimetypes.read_mime_types(file)
+ eq(mime_dict[".Français"], "application/no-mans-land")
+
def test_non_standard_types(self):
eq = self.assertEqual
# First try strict
diff --git a/Misc/ACKS b/Misc/ACKS
index 8098637..a08e917 100644
--- a/Misc/ACKS
+++ b/Misc/ACKS
@@ -1669,6 +1669,7 @@ Mikhail Terekhov
Victor Terrón
Pablo Galindo
Richard M. Tew
+Srinivas Reddy Thatiparthy
Tobias Thelen
Christian Theune
Févry Thibault
diff --git a/Misc/NEWS.d/next/Library/2020-06-20-10-16-57.bpo-41048.hEXB-B.rst b/Misc/NEWS.d/next/Library/2020-06-20-10-16-57.bpo-41048.hEXB-B.rst
new file mode 100644
index 0000000..2595900
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2020-06-20-10-16-57.bpo-41048.hEXB-B.rst
@@ -0,0 +1,2 @@
+:func:`mimetypes.read_mime_types` function reads the rule file using UTF-8 encoding, not the locale encoding.
+Patch by Srinivas Reddy Thatiparthy. \ No newline at end of file