From 7f569c9bc0079906012b3034d30fe8abc742e7fc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Srinivas=20Reddy=20Thatiparthy=20=28=E0=B0=B6=E0=B1=8D?= =?UTF-8?q?=E0=B0=B0=E0=B1=80=E0=B0=A8=E0=B0=BF=E0=B0=B5=E0=B0=BE=E0=B0=B8?= =?UTF-8?q?=E0=B1=8D=20=20=E0=B0=B0=E0=B1=86=E0=B0=A1=E0=B1=8D=E0=B0=A1?= =?UTF-8?q?=E0=B0=BF=20=E0=B0=A4=E0=B0=BE=E0=B0=9F=E0=B0=BF=E0=B0=AA?= =?UTF-8?q?=E0=B0=B0=E0=B1=8D=E0=B0=A4=E0=B0=BF=29?= Date: Mon, 29 Jun 2020 14:06:48 +0530 Subject: bpo-41048: mimetypes should read the rule file using UTF-8, not the locale encoding (GH-20998) --- Lib/mimetypes.py | 2 +- Lib/test/test_mimetypes.py | 12 ++++++++++++ Misc/ACKS | 1 + .../next/Library/2020-06-20-10-16-57.bpo-41048.hEXB-B.rst | 2 ++ 4 files changed, 16 insertions(+), 1 deletion(-) create mode 100644 Misc/NEWS.d/next/Library/2020-06-20-10-16-57.bpo-41048.hEXB-B.rst diff --git a/Lib/mimetypes.py b/Lib/mimetypes.py index 61bfff1..f3343c8 100644 --- a/Lib/mimetypes.py +++ b/Lib/mimetypes.py @@ -372,7 +372,7 @@ def init(files=None): def read_mime_types(file): try: - f = open(file) + f = open(file, encoding='utf-8') except OSError: return None with f: diff --git a/Lib/test/test_mimetypes.py b/Lib/test/test_mimetypes.py index 9cac6ce..683d393 100644 --- a/Lib/test/test_mimetypes.py +++ b/Lib/test/test_mimetypes.py @@ -67,6 +67,18 @@ class MimeTypesTestCase(unittest.TestCase): mime_dict = mimetypes.read_mime_types(file) eq(mime_dict[".pyunit"], "x-application/x-unittest") + # bpo-41048: read_mime_types should read the rule file with 'utf-8' encoding. + # Not with locale encoding. _bootlocale has been imported because io.open(...) + # uses it. + with support.temp_dir() as directory: + data = "application/no-mans-land Fran\u00E7ais" + file = pathlib.Path(directory, "sample.mimetype") + file.write_text(data, encoding='utf-8') + import _bootlocale + with support.swap_attr(_bootlocale, 'getpreferredencoding', lambda do_setlocale=True: 'ASCII'): + mime_dict = mimetypes.read_mime_types(file) + eq(mime_dict[".Français"], "application/no-mans-land") + def test_non_standard_types(self): eq = self.assertEqual # First try strict diff --git a/Misc/ACKS b/Misc/ACKS index 87f0ded..641ef0c 100644 --- a/Misc/ACKS +++ b/Misc/ACKS @@ -1706,6 +1706,7 @@ Mikhail Terekhov Victor Terrón Pablo Galindo Richard M. Tew +Srinivas Reddy Thatiparthy Tobias Thelen Christian Theune Févry Thibault diff --git a/Misc/NEWS.d/next/Library/2020-06-20-10-16-57.bpo-41048.hEXB-B.rst b/Misc/NEWS.d/next/Library/2020-06-20-10-16-57.bpo-41048.hEXB-B.rst new file mode 100644 index 0000000..2595900 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2020-06-20-10-16-57.bpo-41048.hEXB-B.rst @@ -0,0 +1,2 @@ +:func:`mimetypes.read_mime_types` function reads the rule file using UTF-8 encoding, not the locale encoding. +Patch by Srinivas Reddy Thatiparthy. \ No newline at end of file -- cgit v0.12