diff options
author | Victor Stinner <vstinner@redhat.com> | 2018-08-28 21:26:33 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2018-08-28 21:26:33 (GMT) |
commit | 9e4994d410970fb4e75168401d159ba47a8f7108 (patch) | |
tree | 31eb67e89ade21902bfe925d353f650ae95fb5d8 /Python | |
parent | d500e5307aec9c5d535f66d567fadb9c587a9a36 (diff) | |
download | cpython-9e4994d410970fb4e75168401d159ba47a8f7108.zip cpython-9e4994d410970fb4e75168401d159ba47a8f7108.tar.gz cpython-9e4994d410970fb4e75168401d159ba47a8f7108.tar.bz2 |
bpo-34485: Enhance init_sys_streams() (GH-8978)
Python now gets the locale encoding with C code to initialize the encoding
of standard streams like sys.stdout. Moreover, the encoding is now
initialized to the Python codec name to get a normalized encoding name and
to ensure that the codec is loaded. The change avoids importing
_bootlocale and _locale modules at startup by default.
When the PYTHONIOENCODING environment variable only contains an encoding,
the error handler is now is now set explicitly to "strict".
Rename also get_default_standard_stream_error_handler() to
get_stdio_errors().
Reduce the buffer to format the "cpXXX" string (Windows locale encoding).
Diffstat (limited to 'Python')
-rw-r--r-- | Python/pylifecycle.c | 86 |
1 files changed, 63 insertions, 23 deletions
diff --git a/Python/pylifecycle.c b/Python/pylifecycle.c index cc64cf9..29711df 100644 --- a/Python/pylifecycle.c +++ b/Python/pylifecycle.c @@ -244,22 +244,26 @@ error: return NULL; } -static char* -get_locale_encoding(void) +static _PyInitError +get_locale_encoding(char **locale_encoding) { -#if defined(HAVE_LANGINFO_H) && defined(CODESET) - char* codeset = nl_langinfo(CODESET); - if (!codeset || codeset[0] == '\0') { - PyErr_SetString(PyExc_ValueError, "CODESET is not set or empty"); - return NULL; - } - return get_codec_name(codeset); +#ifdef MS_WINDOWS + char encoding[20]; + PyOS_snprintf(encoding, sizeof(encoding), "cp%d", GetACP()); #elif defined(__ANDROID__) - return get_codec_name("UTF-8"); + const char *encoding = "UTF-8"; #else - PyErr_SetNone(PyExc_NotImplementedError); - return NULL; + const char *encoding = nl_langinfo(CODESET); + if (!encoding || encoding[0] == '\0') { + return _Py_INIT_USER_ERR("failed to get the locale encoding: " + "nl_langinfo(CODESET) failed"); + } #endif + *locale_encoding = _PyMem_RawStrdup(encoding); + if (*locale_encoding == NULL) { + return _Py_INIT_NO_MEMORY(); + } + return _Py_INIT_OK(); } static _PyInitError @@ -397,7 +401,7 @@ static _LocaleCoercionTarget _TARGET_LOCALES[] = { }; static const char * -get_default_standard_stream_error_handler(void) +get_stdio_errors(void) { const char *ctype_loc = setlocale(LC_CTYPE, NULL); if (ctype_loc != NULL) { @@ -417,8 +421,7 @@ get_default_standard_stream_error_handler(void) #endif } - /* Otherwise return NULL to request the typical default error handler */ - return NULL; + return "strict"; } #ifdef PY_COERCE_C_LOCALE @@ -1586,9 +1589,17 @@ initfsencoding(PyInterpreterState *interp) Py_HasFileSystemDefaultEncoding = 1; } else { - Py_FileSystemDefaultEncoding = get_locale_encoding(); + char *locale_encoding; + _PyInitError err = get_locale_encoding(&locale_encoding); + if (_Py_INIT_FAILED(err)) { + return err; + } + + Py_FileSystemDefaultEncoding = get_codec_name(locale_encoding); + PyMem_RawFree(locale_encoding); if (Py_FileSystemDefaultEncoding == NULL) { - return _Py_INIT_ERR("Unable to get the locale encoding"); + return _Py_INIT_ERR("failed to get the Python codec " + "of the locale encoding"); } Py_HasFileSystemDefaultEncoding = 0; @@ -1787,6 +1798,8 @@ init_sys_streams(PyInterpreterState *interp) PyObject * encoding_attr; char *pythonioencoding = NULL; const char *encoding, *errors; + char *locale_encoding = NULL; + char *codec_name = NULL; _PyInitError res = _Py_INIT_OK(); /* Hack to avoid a nasty recursion issue when Python is invoked @@ -1838,21 +1851,46 @@ init_sys_streams(PyInterpreterState *interp) errors = err; } } - if (*pythonioencoding && !encoding) { + if (!encoding && *pythonioencoding) { encoding = pythonioencoding; + if (!errors) { + errors = "strict"; + } } } - else if (interp->core_config.utf8_mode) { - encoding = "utf-8"; - errors = "surrogateescape"; + + if (interp->core_config.utf8_mode) { + if (!encoding) { + encoding = "utf-8"; + } + if (!errors) { + errors = "surrogateescape"; + } } - if (!errors && !pythonioencoding) { + if (!errors) { /* Choose the default error handler based on the current locale */ - errors = get_default_standard_stream_error_handler(); + errors = get_stdio_errors(); } } + if (encoding == NULL) { + _PyInitError err = get_locale_encoding(&locale_encoding); + if (_Py_INIT_FAILED(err)) { + return err; + } + encoding = locale_encoding; + } + + codec_name = get_codec_name(encoding); + if (codec_name == NULL) { + PyErr_SetString(PyExc_RuntimeError, + "failed to get the Python codec name " + "of stdio encoding"); + goto error; + } + encoding = codec_name; + /* Set sys.stdin */ fd = fileno(stdin); /* Under some conditions stdin, stdout and stderr may not be connected @@ -1928,6 +1966,8 @@ done: PyMem_SetAllocator(PYMEM_DOMAIN_RAW, &old_alloc); + PyMem_RawFree(locale_encoding); + PyMem_RawFree(codec_name); PyMem_Free(pythonioencoding); Py_XDECREF(bimod); Py_XDECREF(iomod); |