summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorVictor Stinner <vstinner@redhat.com>2018-08-29 17:32:47 (GMT)
committerGitHub <noreply@github.com>2018-08-29 17:32:47 (GMT)
commitc5989cd87659acbfd4d19dc00dbe99c3a0fc9bd2 (patch)
tree9bd325d65e1cee9696fb98998db0bdb2a2e21b41
parent70fead25e503a742ad4c919b151b9b2b5facee36 (diff)
downloadcpython-c5989cd87659acbfd4d19dc00dbe99c3a0fc9bd2.zip
cpython-c5989cd87659acbfd4d19dc00dbe99c3a0fc9bd2.tar.gz
cpython-c5989cd87659acbfd4d19dc00dbe99c3a0fc9bd2.tar.bz2
bpo-34523: Py_DecodeLocale() use UTF-8 on Windows (GH-8998)
Py_DecodeLocale() and Py_EncodeLocale() now use the UTF-8 encoding on Windows if Py_LegacyWindowsFSEncodingFlag is zero. pymain_read_conf() now sets Py_LegacyWindowsFSEncodingFlag in its loop, but restore its value at exit.
-rw-r--r--Doc/c-api/sys.rst15
-rw-r--r--Lib/test/test_embed.py39
-rw-r--r--Misc/NEWS.d/next/C API/2018-08-29-18-48-47.bpo-34523.lLQ8rh.rst2
-rw-r--r--Modules/main.c13
-rw-r--r--Python/fileutils.c16
5 files changed, 55 insertions, 30 deletions
diff --git a/Doc/c-api/sys.rst b/Doc/c-api/sys.rst
index 994509a..0eee35a 100644
--- a/Doc/c-api/sys.rst
+++ b/Doc/c-api/sys.rst
@@ -109,6 +109,7 @@ Operating System Utilities
Encoding, highest priority to lowest priority:
* ``UTF-8`` on macOS and Android;
+ * ``UTF-8`` on Windows if :c:data:`Py_LegacyWindowsFSEncodingFlag` is zero;
* ``UTF-8`` if the Python UTF-8 mode is enabled;
* ``ASCII`` if the ``LC_CTYPE`` locale is ``"C"``,
``nl_langinfo(CODESET)`` returns the ``ASCII`` encoding (or an alias),
@@ -140,6 +141,10 @@ Operating System Utilities
.. versionchanged:: 3.7
The function now uses the UTF-8 encoding in the UTF-8 mode.
+ .. versionchanged:: 3.8
+ The function now uses the UTF-8 encoding on Windows if
+ :c:data:`Py_LegacyWindowsFSEncodingFlag` is zero;
+
.. c:function:: char* Py_EncodeLocale(const wchar_t *text, size_t *error_pos)
@@ -150,6 +155,7 @@ Operating System Utilities
Encoding, highest priority to lowest priority:
* ``UTF-8`` on macOS and Android;
+ * ``UTF-8`` on Windows if :c:data:`Py_LegacyWindowsFSEncodingFlag` is zero;
* ``UTF-8`` if the Python UTF-8 mode is enabled;
* ``ASCII`` if the ``LC_CTYPE`` locale is ``"C"``,
``nl_langinfo(CODESET)`` returns the ``ASCII`` encoding (or an alias),
@@ -169,9 +175,6 @@ Operating System Utilities
Use the :c:func:`Py_DecodeLocale` function to decode the bytes string back
to a wide character string.
- .. versionchanged:: 3.7
- The function now uses the UTF-8 encoding in the UTF-8 mode.
-
.. seealso::
The :c:func:`PyUnicode_EncodeFSDefault` and
@@ -180,7 +183,11 @@ Operating System Utilities
.. versionadded:: 3.5
.. versionchanged:: 3.7
- The function now supports the UTF-8 mode.
+ The function now uses the UTF-8 encoding in the UTF-8 mode.
+
+ .. versionchanged:: 3.8
+ The function now uses the UTF-8 encoding on Windows if
+ :c:data:`Py_LegacyWindowsFSEncodingFlag` is zero;
.. _systemfunctions:
diff --git a/Lib/test/test_embed.py b/Lib/test/test_embed.py
index b6311e4..9155c40 100644
--- a/Lib/test/test_embed.py
+++ b/Lib/test/test_embed.py
@@ -268,10 +268,10 @@ class InitConfigTests(EmbeddingTestsMixin, unittest.TestCase):
'dump_refs': 0,
'malloc_stats': 0,
- # None means that the default encoding is read at runtime:
- # see get_locale_encoding().
+ # None means that the value is get by get_locale_encoding()
'filesystem_encoding': None,
- 'filesystem_errors': sys.getfilesystemencodeerrors(),
+ 'filesystem_errors': None,
+
'utf8_mode': 0,
'coerce_c_locale': 0,
'coerce_c_locale_warn': 0,
@@ -294,7 +294,8 @@ class InitConfigTests(EmbeddingTestsMixin, unittest.TestCase):
'quiet': 0,
'user_site_directory': 1,
'buffered_stdio': 1,
- # None means that check_config() gets the expected encoding at runtime
+
+ # None means that the value is get by get_stdio_encoding()
'stdio_encoding': None,
'stdio_errors': None,
@@ -303,7 +304,6 @@ class InitConfigTests(EmbeddingTestsMixin, unittest.TestCase):
'_frozen': 0,
}
-
def get_stdio_encoding(self, env):
code = 'import sys; print(sys.stdout.encoding, sys.stdout.errors)'
args = (sys.executable, '-c', code)
@@ -315,18 +315,12 @@ class InitConfigTests(EmbeddingTestsMixin, unittest.TestCase):
out = proc.stdout.rstrip()
return out.split()
- def get_locale_encoding(self, isolated):
- if sys.platform in ('win32', 'darwin') or support.is_android:
- # Windows, macOS and Android use UTF-8
- return "utf-8"
-
- code = ('import codecs, locale, sys',
- 'locale.setlocale(locale.LC_CTYPE, "")',
- 'enc = locale.nl_langinfo(locale.CODESET)',
- 'enc = codecs.lookup(enc).name',
- 'print(enc)')
- args = (sys.executable, '-c', '; '.join(code))
- env = dict(os.environ)
+ def get_filesystem_encoding(self, isolated, env):
+ code = ('import codecs, locale, sys; '
+ 'print(sys.getfilesystemencoding(), '
+ 'sys.getfilesystemencodeerrors())')
+ args = (sys.executable, '-c', code)
+ env = dict(env)
if not isolated:
env['PYTHONCOERCECLOCALE'] = '0'
env['PYTHONUTF8'] = '0'
@@ -336,7 +330,8 @@ class InitConfigTests(EmbeddingTestsMixin, unittest.TestCase):
if proc.returncode:
raise Exception(f"failed to get the locale encoding: "
f"stdout={proc.stdout!r} stderr={proc.stderr!r}")
- return proc.stdout.rstrip()
+ out = proc.stdout.rstrip()
+ return out.split()
def check_config(self, testname, expected):
expected = dict(self.DEFAULT_CONFIG, **expected)
@@ -356,8 +351,12 @@ class InitConfigTests(EmbeddingTestsMixin, unittest.TestCase):
expected['stdio_encoding'] = res[0]
if expected['stdio_errors'] is None:
expected['stdio_errors'] = res[1]
- if expected['filesystem_encoding'] is None:
- expected['filesystem_encoding'] = self.get_locale_encoding(expected['isolated'])
+ if expected['filesystem_encoding'] is None or expected['filesystem_errors'] is None:
+ res = self.get_filesystem_encoding(expected['isolated'], env)
+ if expected['filesystem_encoding'] is None:
+ expected['filesystem_encoding'] = res[0]
+ if expected['filesystem_errors'] is None:
+ expected['filesystem_errors'] = res[1]
for key, value in expected.items():
expected[key] = str(value)
diff --git a/Misc/NEWS.d/next/C API/2018-08-29-18-48-47.bpo-34523.lLQ8rh.rst b/Misc/NEWS.d/next/C API/2018-08-29-18-48-47.bpo-34523.lLQ8rh.rst
new file mode 100644
index 0000000..95368f1
--- /dev/null
+++ b/Misc/NEWS.d/next/C API/2018-08-29-18-48-47.bpo-34523.lLQ8rh.rst
@@ -0,0 +1,2 @@
+Py_DecodeLocale() and Py_EncodeLocale() now use the UTF-8 encoding on
+Windows if Py_LegacyWindowsFSEncodingFlag is zero.
diff --git a/Modules/main.c b/Modules/main.c
index 2e9e23b..bf7290a 100644
--- a/Modules/main.c
+++ b/Modules/main.c
@@ -1287,6 +1287,9 @@ pymain_read_conf(_PyMain *pymain, _PyCoreConfig *config,
_PyCmdline *cmdline)
{
int init_utf8_mode = Py_UTF8Mode;
+#ifdef MS_WINDOWS
+ int init_legacy_encoding = Py_LegacyWindowsFSEncodingFlag;
+#endif
_PyCoreConfig save_config = _PyCoreConfig_INIT;
int res = -1;
@@ -1313,9 +1316,12 @@ pymain_read_conf(_PyMain *pymain, _PyCoreConfig *config,
goto done;
}
- /* bpo-34207: Py_DecodeLocale(), Py_EncodeLocale() and similar
- functions depend on Py_UTF8Mode. */
+ /* bpo-34207: Py_DecodeLocale() and Py_EncodeLocale() depend
+ on Py_UTF8Mode and Py_LegacyWindowsFSEncodingFlag. */
Py_UTF8Mode = config->utf8_mode;
+#ifdef MS_WINDOWS
+ Py_LegacyWindowsFSEncodingFlag = config->legacy_windows_fs_encoding;
+#endif
if (pymain_init_cmdline_argv(pymain, config, cmdline) < 0) {
goto done;
@@ -1380,6 +1386,9 @@ pymain_read_conf(_PyMain *pymain, _PyCoreConfig *config,
done:
_PyCoreConfig_Clear(&save_config);
Py_UTF8Mode = init_utf8_mode ;
+#ifdef MS_WINDOWS
+ Py_LegacyWindowsFSEncodingFlag = init_legacy_encoding;
+#endif
return res;
}
diff --git a/Python/fileutils.c b/Python/fileutils.c
index e756c26..9a3c334 100644
--- a/Python/fileutils.c
+++ b/Python/fileutils.c
@@ -499,9 +499,13 @@ _Py_DecodeLocaleEx(const char* arg, wchar_t **wstr, size_t *wlen,
return _Py_DecodeUTF8Ex(arg, strlen(arg), wstr, wlen, reason,
surrogateescape);
#else
- if (Py_UTF8Mode == 1) {
- return _Py_DecodeUTF8Ex(arg, strlen(arg), wstr, wlen, reason,
- surrogateescape);
+ int use_utf8 = (Py_UTF8Mode == 1);
+#ifdef MS_WINDOWS
+ use_utf8 |= !Py_LegacyWindowsFSEncodingFlag;
+#endif
+ if (use_utf8) {
+ return _Py_DecodeUTF8Ex(arg, strlen(arg), wstr, wlen,
+ reason, surrogateescape);
}
#ifdef USE_FORCE_ASCII
@@ -661,7 +665,11 @@ encode_locale_ex(const wchar_t *text, char **str, size_t *error_pos,
return _Py_EncodeUTF8Ex(text, str, error_pos, reason,
raw_malloc, surrogateescape);
#else /* __APPLE__ */
- if (Py_UTF8Mode == 1) {
+ int use_utf8 = (Py_UTF8Mode == 1);
+#ifdef MS_WINDOWS
+ use_utf8 |= !Py_LegacyWindowsFSEncodingFlag;
+#endif
+ if (use_utf8) {
return _Py_EncodeUTF8Ex(text, str, error_pos, reason,
raw_malloc, surrogateescape);
}