diff options
author | Victor Stinner <vstinner@redhat.com> | 2018-08-29 17:32:47 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2018-08-29 17:32:47 (GMT) |
commit | c5989cd87659acbfd4d19dc00dbe99c3a0fc9bd2 (patch) | |
tree | 9bd325d65e1cee9696fb98998db0bdb2a2e21b41 | |
parent | 70fead25e503a742ad4c919b151b9b2b5facee36 (diff) | |
download | cpython-c5989cd87659acbfd4d19dc00dbe99c3a0fc9bd2.zip cpython-c5989cd87659acbfd4d19dc00dbe99c3a0fc9bd2.tar.gz cpython-c5989cd87659acbfd4d19dc00dbe99c3a0fc9bd2.tar.bz2 |
bpo-34523: Py_DecodeLocale() use UTF-8 on Windows (GH-8998)
Py_DecodeLocale() and Py_EncodeLocale() now use the UTF-8 encoding on
Windows if Py_LegacyWindowsFSEncodingFlag is zero.
pymain_read_conf() now sets Py_LegacyWindowsFSEncodingFlag in its
loop, but restore its value at exit.
-rw-r--r-- | Doc/c-api/sys.rst | 15 | ||||
-rw-r--r-- | Lib/test/test_embed.py | 39 | ||||
-rw-r--r-- | Misc/NEWS.d/next/C API/2018-08-29-18-48-47.bpo-34523.lLQ8rh.rst | 2 | ||||
-rw-r--r-- | Modules/main.c | 13 | ||||
-rw-r--r-- | Python/fileutils.c | 16 |
5 files changed, 55 insertions, 30 deletions
diff --git a/Doc/c-api/sys.rst b/Doc/c-api/sys.rst index 994509a..0eee35a 100644 --- a/Doc/c-api/sys.rst +++ b/Doc/c-api/sys.rst @@ -109,6 +109,7 @@ Operating System Utilities Encoding, highest priority to lowest priority: * ``UTF-8`` on macOS and Android; + * ``UTF-8`` on Windows if :c:data:`Py_LegacyWindowsFSEncodingFlag` is zero; * ``UTF-8`` if the Python UTF-8 mode is enabled; * ``ASCII`` if the ``LC_CTYPE`` locale is ``"C"``, ``nl_langinfo(CODESET)`` returns the ``ASCII`` encoding (or an alias), @@ -140,6 +141,10 @@ Operating System Utilities .. versionchanged:: 3.7 The function now uses the UTF-8 encoding in the UTF-8 mode. + .. versionchanged:: 3.8 + The function now uses the UTF-8 encoding on Windows if + :c:data:`Py_LegacyWindowsFSEncodingFlag` is zero; + .. c:function:: char* Py_EncodeLocale(const wchar_t *text, size_t *error_pos) @@ -150,6 +155,7 @@ Operating System Utilities Encoding, highest priority to lowest priority: * ``UTF-8`` on macOS and Android; + * ``UTF-8`` on Windows if :c:data:`Py_LegacyWindowsFSEncodingFlag` is zero; * ``UTF-8`` if the Python UTF-8 mode is enabled; * ``ASCII`` if the ``LC_CTYPE`` locale is ``"C"``, ``nl_langinfo(CODESET)`` returns the ``ASCII`` encoding (or an alias), @@ -169,9 +175,6 @@ Operating System Utilities Use the :c:func:`Py_DecodeLocale` function to decode the bytes string back to a wide character string. - .. versionchanged:: 3.7 - The function now uses the UTF-8 encoding in the UTF-8 mode. - .. seealso:: The :c:func:`PyUnicode_EncodeFSDefault` and @@ -180,7 +183,11 @@ Operating System Utilities .. versionadded:: 3.5 .. versionchanged:: 3.7 - The function now supports the UTF-8 mode. + The function now uses the UTF-8 encoding in the UTF-8 mode. + + .. versionchanged:: 3.8 + The function now uses the UTF-8 encoding on Windows if + :c:data:`Py_LegacyWindowsFSEncodingFlag` is zero; .. _systemfunctions: diff --git a/Lib/test/test_embed.py b/Lib/test/test_embed.py index b6311e4..9155c40 100644 --- a/Lib/test/test_embed.py +++ b/Lib/test/test_embed.py @@ -268,10 +268,10 @@ class InitConfigTests(EmbeddingTestsMixin, unittest.TestCase): 'dump_refs': 0, 'malloc_stats': 0, - # None means that the default encoding is read at runtime: - # see get_locale_encoding(). + # None means that the value is get by get_locale_encoding() 'filesystem_encoding': None, - 'filesystem_errors': sys.getfilesystemencodeerrors(), + 'filesystem_errors': None, + 'utf8_mode': 0, 'coerce_c_locale': 0, 'coerce_c_locale_warn': 0, @@ -294,7 +294,8 @@ class InitConfigTests(EmbeddingTestsMixin, unittest.TestCase): 'quiet': 0, 'user_site_directory': 1, 'buffered_stdio': 1, - # None means that check_config() gets the expected encoding at runtime + + # None means that the value is get by get_stdio_encoding() 'stdio_encoding': None, 'stdio_errors': None, @@ -303,7 +304,6 @@ class InitConfigTests(EmbeddingTestsMixin, unittest.TestCase): '_frozen': 0, } - def get_stdio_encoding(self, env): code = 'import sys; print(sys.stdout.encoding, sys.stdout.errors)' args = (sys.executable, '-c', code) @@ -315,18 +315,12 @@ class InitConfigTests(EmbeddingTestsMixin, unittest.TestCase): out = proc.stdout.rstrip() return out.split() - def get_locale_encoding(self, isolated): - if sys.platform in ('win32', 'darwin') or support.is_android: - # Windows, macOS and Android use UTF-8 - return "utf-8" - - code = ('import codecs, locale, sys', - 'locale.setlocale(locale.LC_CTYPE, "")', - 'enc = locale.nl_langinfo(locale.CODESET)', - 'enc = codecs.lookup(enc).name', - 'print(enc)') - args = (sys.executable, '-c', '; '.join(code)) - env = dict(os.environ) + def get_filesystem_encoding(self, isolated, env): + code = ('import codecs, locale, sys; ' + 'print(sys.getfilesystemencoding(), ' + 'sys.getfilesystemencodeerrors())') + args = (sys.executable, '-c', code) + env = dict(env) if not isolated: env['PYTHONCOERCECLOCALE'] = '0' env['PYTHONUTF8'] = '0' @@ -336,7 +330,8 @@ class InitConfigTests(EmbeddingTestsMixin, unittest.TestCase): if proc.returncode: raise Exception(f"failed to get the locale encoding: " f"stdout={proc.stdout!r} stderr={proc.stderr!r}") - return proc.stdout.rstrip() + out = proc.stdout.rstrip() + return out.split() def check_config(self, testname, expected): expected = dict(self.DEFAULT_CONFIG, **expected) @@ -356,8 +351,12 @@ class InitConfigTests(EmbeddingTestsMixin, unittest.TestCase): expected['stdio_encoding'] = res[0] if expected['stdio_errors'] is None: expected['stdio_errors'] = res[1] - if expected['filesystem_encoding'] is None: - expected['filesystem_encoding'] = self.get_locale_encoding(expected['isolated']) + if expected['filesystem_encoding'] is None or expected['filesystem_errors'] is None: + res = self.get_filesystem_encoding(expected['isolated'], env) + if expected['filesystem_encoding'] is None: + expected['filesystem_encoding'] = res[0] + if expected['filesystem_errors'] is None: + expected['filesystem_errors'] = res[1] for key, value in expected.items(): expected[key] = str(value) diff --git a/Misc/NEWS.d/next/C API/2018-08-29-18-48-47.bpo-34523.lLQ8rh.rst b/Misc/NEWS.d/next/C API/2018-08-29-18-48-47.bpo-34523.lLQ8rh.rst new file mode 100644 index 0000000..95368f1 --- /dev/null +++ b/Misc/NEWS.d/next/C API/2018-08-29-18-48-47.bpo-34523.lLQ8rh.rst @@ -0,0 +1,2 @@ +Py_DecodeLocale() and Py_EncodeLocale() now use the UTF-8 encoding on +Windows if Py_LegacyWindowsFSEncodingFlag is zero. diff --git a/Modules/main.c b/Modules/main.c index 2e9e23b..bf7290a 100644 --- a/Modules/main.c +++ b/Modules/main.c @@ -1287,6 +1287,9 @@ pymain_read_conf(_PyMain *pymain, _PyCoreConfig *config, _PyCmdline *cmdline) { int init_utf8_mode = Py_UTF8Mode; +#ifdef MS_WINDOWS + int init_legacy_encoding = Py_LegacyWindowsFSEncodingFlag; +#endif _PyCoreConfig save_config = _PyCoreConfig_INIT; int res = -1; @@ -1313,9 +1316,12 @@ pymain_read_conf(_PyMain *pymain, _PyCoreConfig *config, goto done; } - /* bpo-34207: Py_DecodeLocale(), Py_EncodeLocale() and similar - functions depend on Py_UTF8Mode. */ + /* bpo-34207: Py_DecodeLocale() and Py_EncodeLocale() depend + on Py_UTF8Mode and Py_LegacyWindowsFSEncodingFlag. */ Py_UTF8Mode = config->utf8_mode; +#ifdef MS_WINDOWS + Py_LegacyWindowsFSEncodingFlag = config->legacy_windows_fs_encoding; +#endif if (pymain_init_cmdline_argv(pymain, config, cmdline) < 0) { goto done; @@ -1380,6 +1386,9 @@ pymain_read_conf(_PyMain *pymain, _PyCoreConfig *config, done: _PyCoreConfig_Clear(&save_config); Py_UTF8Mode = init_utf8_mode ; +#ifdef MS_WINDOWS + Py_LegacyWindowsFSEncodingFlag = init_legacy_encoding; +#endif return res; } diff --git a/Python/fileutils.c b/Python/fileutils.c index e756c26..9a3c334 100644 --- a/Python/fileutils.c +++ b/Python/fileutils.c @@ -499,9 +499,13 @@ _Py_DecodeLocaleEx(const char* arg, wchar_t **wstr, size_t *wlen, return _Py_DecodeUTF8Ex(arg, strlen(arg), wstr, wlen, reason, surrogateescape); #else - if (Py_UTF8Mode == 1) { - return _Py_DecodeUTF8Ex(arg, strlen(arg), wstr, wlen, reason, - surrogateescape); + int use_utf8 = (Py_UTF8Mode == 1); +#ifdef MS_WINDOWS + use_utf8 |= !Py_LegacyWindowsFSEncodingFlag; +#endif + if (use_utf8) { + return _Py_DecodeUTF8Ex(arg, strlen(arg), wstr, wlen, + reason, surrogateescape); } #ifdef USE_FORCE_ASCII @@ -661,7 +665,11 @@ encode_locale_ex(const wchar_t *text, char **str, size_t *error_pos, return _Py_EncodeUTF8Ex(text, str, error_pos, reason, raw_malloc, surrogateescape); #else /* __APPLE__ */ - if (Py_UTF8Mode == 1) { + int use_utf8 = (Py_UTF8Mode == 1); +#ifdef MS_WINDOWS + use_utf8 |= !Py_LegacyWindowsFSEncodingFlag; +#endif + if (use_utf8) { return _Py_EncodeUTF8Ex(text, str, error_pos, reason, raw_malloc, surrogateescape); } |