summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Lib/locale.py4
-rw-r--r--Lib/test/test_locale.py36
-rw-r--r--Misc/NEWS.d/next/Library/2019-07-13-13-40-12.bpo-18378.NHcojp.rst1
3 files changed, 41 insertions, 0 deletions
diff --git a/Lib/locale.py b/Lib/locale.py
index f3d3973..dd8a085 100644
--- a/Lib/locale.py
+++ b/Lib/locale.py
@@ -492,6 +492,10 @@ def _parse_localename(localename):
return tuple(code.split('.')[:2])
elif code == 'C':
return None, None
+ elif code == 'UTF-8':
+ # On macOS "LC_CTYPE=UTF-8" is a valid locale setting
+ # for getting UTF-8 handling for text.
+ return None, 'UTF-8'
raise ValueError('unknown locale: %s' % localename)
def _build_localename(localetuple):
diff --git a/Lib/test/test_locale.py b/Lib/test/test_locale.py
index 792a15c..c5d8e26 100644
--- a/Lib/test/test_locale.py
+++ b/Lib/test/test_locale.py
@@ -493,6 +493,42 @@ class NormalizeTest(unittest.TestCase):
class TestMiscellaneous(unittest.TestCase):
+ def test_defaults_UTF8(self):
+ # Issue #18378: on (at least) macOS setting LC_CTYPE to "UTF-8" is
+ # valid. Futhermore LC_CTYPE=UTF is used by the UTF-8 locale coercing
+ # during interpreter startup (on macOS).
+ import _locale
+ import os
+
+ self.assertEqual(locale._parse_localename('UTF-8'), (None, 'UTF-8'))
+
+ if hasattr(_locale, '_getdefaultlocale'):
+ orig_getlocale = _locale._getdefaultlocale
+ del _locale._getdefaultlocale
+ else:
+ orig_getlocale = None
+
+ orig_env = {}
+ try:
+ for key in ('LC_ALL', 'LC_CTYPE', 'LANG', 'LANGUAGE'):
+ if key in os.environ:
+ orig_env[key] = os.environ[key]
+ del os.environ[key]
+
+ os.environ['LC_CTYPE'] = 'UTF-8'
+
+ self.assertEqual(locale.getdefaultlocale(), (None, 'UTF-8'))
+
+ finally:
+ for k in orig_env:
+ os.environ[k] = orig_env[k]
+
+ if 'LC_CTYPE' not in orig_env:
+ del os.environ['LC_CTYPE']
+
+ if orig_getlocale is not None:
+ _locale._getdefaultlocale = orig_getlocale
+
def test_getpreferredencoding(self):
# Invoke getpreferredencoding to make sure it does not cause exceptions.
enc = locale.getpreferredencoding()
diff --git a/Misc/NEWS.d/next/Library/2019-07-13-13-40-12.bpo-18378.NHcojp.rst b/Misc/NEWS.d/next/Library/2019-07-13-13-40-12.bpo-18378.NHcojp.rst
new file mode 100644
index 0000000..6dda8ab
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2019-07-13-13-40-12.bpo-18378.NHcojp.rst
@@ -0,0 +1 @@
+Recognize "UTF-8" as a valid value for LC_CTYPE in locale._parse_localename.