diff options
author | Victor Stinner <vstinner@redhat.com> | 2018-08-29 09:47:29 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2018-08-29 09:47:29 (GMT) |
commit | dfe0dc74536dfb6f331131d9b2b49557675bb6b7 (patch) | |
tree | 485dd3b5ddce8e6c2d7ebbd6d113e1c5ee6f3707 | |
parent | 177d921c8c03d30daa32994362023f777624b10d (diff) | |
download | cpython-dfe0dc74536dfb6f331131d9b2b49557675bb6b7.zip cpython-dfe0dc74536dfb6f331131d9b2b49557675bb6b7.tar.gz cpython-dfe0dc74536dfb6f331131d9b2b49557675bb6b7.tar.bz2 |
bpo-34485: Add _PyCoreConfig.stdio_encoding (GH-8881)
* Add stdio_encoding and stdio_errors fields to _PyCoreConfig.
* Add unit tests on stdio_encoding and stdio_errors.
-rw-r--r-- | Include/coreconfig.h | 12 | ||||
-rw-r--r-- | Include/pylifecycle.h | 3 | ||||
-rw-r--r-- | Lib/test/test_embed.py | 36 | ||||
-rw-r--r-- | Programs/_testembed.c | 8 | ||||
-rw-r--r-- | Python/coreconfig.c | 181 | ||||
-rw-r--r-- | Python/pylifecycle.c | 160 |
6 files changed, 265 insertions, 135 deletions
diff --git a/Include/coreconfig.h b/Include/coreconfig.h index b279907..ffba306 100644 --- a/Include/coreconfig.h +++ b/Include/coreconfig.h @@ -203,6 +203,18 @@ typedef struct { If set to -1 (default), it is set to !Py_UnbufferedStdioFlag. */ int buffered_stdio; + /* Encoding of sys.stdin, sys.stdout and sys.stderr. + Value set from PYTHONIOENCODING environment variable and + Py_SetStandardStreamEncoding() function. + See also 'stdio_errors' attribute. */ + char *stdio_encoding; + + /* Error handler of sys.stdin and sys.stdout. + Value set from PYTHONIOENCODING environment variable and + Py_SetStandardStreamEncoding() function. + See also 'stdio_encoding' attribute. */ + char *stdio_errors; + #ifdef MS_WINDOWS /* If greater than 1, use the "mbcs" encoding instead of the UTF-8 encoding for the filesystem encoding. diff --git a/Include/pylifecycle.h b/Include/pylifecycle.h index 2029827..b96db1e 100644 --- a/Include/pylifecycle.h +++ b/Include/pylifecycle.h @@ -179,6 +179,9 @@ PyAPI_FUNC(void) _Py_CoerceLegacyLocale(const _PyCoreConfig *config); PyAPI_FUNC(int) _Py_LegacyLocaleDetected(void); PyAPI_FUNC(char *) _Py_SetLocaleFromEnv(int category); #endif +#ifdef Py_BUILD_CORE +PyAPI_FUNC(int) _Py_IsLocaleCoercionTarget(const char *ctype_loc); +#endif #ifdef __cplusplus } diff --git a/Lib/test/test_embed.py b/Lib/test/test_embed.py index 3922447..2ec9cf3 100644 --- a/Lib/test/test_embed.py +++ b/Lib/test/test_embed.py @@ -288,13 +288,29 @@ class InitConfigTests(EmbeddingTestsMixin, unittest.TestCase): 'quiet': 0, 'user_site_directory': 1, 'buffered_stdio': 1, + # None means that check_config() gets the expected encoding at runtime + 'stdio_encoding': None, + 'stdio_errors': None, '_install_importlib': 1, '_check_hash_pycs_mode': 'default', '_frozen': 0, } + def get_stdio_encoding(self, env): + code = 'import sys; print(sys.stdout.encoding, sys.stdout.errors)' + args = (sys.executable, '-c', code) + proc = subprocess.run(args, env=env, text=True, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT) + if proc.returncode: + raise Exception(f"failed to get the stdio encoding: stdout={proc.stdout!r}") + out = proc.stdout.rstrip() + return out.split() + def check_config(self, testname, expected): + expected = dict(self.DEFAULT_CONFIG, **expected) + env = dict(os.environ) for key in list(env): if key.startswith('PYTHON'): @@ -303,13 +319,19 @@ class InitConfigTests(EmbeddingTestsMixin, unittest.TestCase): # on the current locale env['PYTHONCOERCECLOCALE'] = '0' env['PYTHONUTF8'] = '0' - out, err = self.run_embedded_interpreter(testname, env=env) - # Ignore err - expected = dict(self.DEFAULT_CONFIG, **expected) + if expected['stdio_encoding'] is None or expected['stdio_errors'] is None: + res = self.get_stdio_encoding(env) + if expected['stdio_encoding'] is None: + expected['stdio_encoding'] = res[0] + if expected['stdio_errors'] is None: + expected['stdio_errors'] = res[1] for key, value in expected.items(): expected[key] = str(value) + out, err = self.run_embedded_interpreter(testname, env=env) + # Ignore err + config = {} for line in out.splitlines(): key, value = line.split(' = ', 1) @@ -331,7 +353,11 @@ class InitConfigTests(EmbeddingTestsMixin, unittest.TestCase): 'verbose': 1, 'quiet': 1, 'buffered_stdio': 0, + 'utf8_mode': 1, + 'stdio_encoding': 'utf-8', + 'stdio_errors': 'surrogateescape', + 'user_site_directory': 0, '_frozen': 1, } @@ -350,6 +376,8 @@ class InitConfigTests(EmbeddingTestsMixin, unittest.TestCase): 'malloc_stats': 1, 'utf8_mode': 1, + 'stdio_encoding': 'iso8859-1', + 'stdio_errors': 'replace', 'pycache_prefix': 'conf_pycache_prefix', 'program_name': './conf_program_name', @@ -387,6 +415,8 @@ class InitConfigTests(EmbeddingTestsMixin, unittest.TestCase): 'write_bytecode': 0, 'verbose': 1, 'buffered_stdio': 0, + 'stdio_encoding': 'iso8859-1', + 'stdio_errors': 'replace', 'user_site_directory': 0, 'faulthandler': 1, 'dev_mode': 1, diff --git a/Programs/_testembed.c b/Programs/_testembed.c index d0c00cf..d569417 100644 --- a/Programs/_testembed.c +++ b/Programs/_testembed.c @@ -374,6 +374,8 @@ dump_config(void) printf("user_site_directory = %i\n", config->user_site_directory); printf("buffered_stdio = %i\n", config->buffered_stdio); ASSERT_EQUAL(config->buffered_stdio, !Py_UnbufferedStdioFlag); + printf("stdio_encoding = %s\n", config->stdio_encoding); + printf("stdio_errors = %s\n", config->stdio_errors); /* FIXME: test legacy_windows_fs_encoding */ /* FIXME: test legacy_windows_stdio */ @@ -532,6 +534,11 @@ static int test_init_from_config(void) Py_UnbufferedStdioFlag = 0; config.buffered_stdio = 0; + putenv("PYTHONIOENCODING=cp424"); + Py_SetStandardStreamEncoding("ascii", "ignore"); + config.stdio_encoding = "iso8859-1"; + config.stdio_errors = "replace"; + putenv("PYTHONNOUSERSITE="); Py_NoUserSiteDirectory = 0; config.user_site_directory = 0; @@ -569,6 +576,7 @@ static void test_init_env_putenvs(void) putenv("PYTHONNOUSERSITE=1"); putenv("PYTHONFAULTHANDLER=1"); putenv("PYTHONDEVMODE=1"); + putenv("PYTHONIOENCODING=iso8859-1:replace"); /* FIXME: test PYTHONWARNINGS */ /* FIXME: test PYTHONEXECUTABLE */ /* FIXME: test PYTHONHOME */ diff --git a/Python/coreconfig.c b/Python/coreconfig.c index 99d703c..00037d9 100644 --- a/Python/coreconfig.c +++ b/Python/coreconfig.c @@ -1,6 +1,9 @@ #include "Python.h" #include "internal/pystate.h" #include <locale.h> +#ifdef HAVE_LANGINFO_H +# include <langinfo.h> +#endif #define DECODE_LOCALE_ERR(NAME, LEN) \ @@ -89,8 +92,8 @@ _Py_wstrlist_copy(int len, wchar_t **list) * mechanism that attempts to figure out an appropriate IO encoding */ -char *_Py_StandardStreamEncoding = NULL; -char *_Py_StandardStreamErrors = NULL; +static char *_Py_StandardStreamEncoding = NULL; +static char *_Py_StandardStreamErrors = NULL; int Py_SetStandardStreamEncoding(const char *encoding, const char *errors) @@ -205,6 +208,9 @@ _PyCoreConfig_Clear(_PyCoreConfig *config) CLEAR(config->dll_path); #endif CLEAR(config->base_exec_prefix); + + CLEAR(config->stdio_encoding); + CLEAR(config->stdio_errors); #undef CLEAR #undef CLEAR_WSTRLIST } @@ -216,6 +222,15 @@ _PyCoreConfig_Copy(_PyCoreConfig *config, const _PyCoreConfig *config2) _PyCoreConfig_Clear(config); #define COPY_ATTR(ATTR) config->ATTR = config2->ATTR +#define COPY_STR_ATTR(ATTR) \ + do { \ + if (config2->ATTR != NULL) { \ + config->ATTR = _PyMem_RawStrdup(config2->ATTR); \ + if (config->ATTR == NULL) { \ + return -1; \ + } \ + } \ + } while (0) #define COPY_WSTR_ATTR(ATTR) \ do { \ if (config2->ATTR != NULL) { \ @@ -287,6 +302,8 @@ _PyCoreConfig_Copy(_PyCoreConfig *config, const _PyCoreConfig *config2) COPY_ATTR(quiet); COPY_ATTR(user_site_directory); COPY_ATTR(buffered_stdio); + COPY_STR_ATTR(stdio_encoding); + COPY_STR_ATTR(stdio_errors); #ifdef MS_WINDOWS COPY_ATTR(legacy_windows_fs_encoding); COPY_ATTR(legacy_windows_stdio); @@ -932,6 +949,161 @@ config_init_locale(_PyCoreConfig *config) } +static const char * +get_stdio_errors(const _PyCoreConfig *config) +{ +#ifndef MS_WINDOWS + const char *loc = setlocale(LC_CTYPE, NULL); + if (loc != NULL) { + /* surrogateescape is the default in the legacy C and POSIX locales */ + if (strcmp(loc, "C") == 0 || strcmp(loc, "POSIX") == 0) { + return "surrogateescape"; + } + +#ifdef PY_COERCE_C_LOCALE + /* surrogateescape is the default in locale coercion target locales */ + if (_Py_IsLocaleCoercionTarget(loc)) { + return "surrogateescape"; + } +#endif + } + + return "strict"; +#else + /* On Windows, always use surrogateescape by default */ + return "surrogateescape"; +#endif +} + + +_PyInitError +_Py_get_locale_encoding(char **locale_encoding) +{ +#ifdef MS_WINDOWS + char encoding[20]; + PyOS_snprintf(encoding, sizeof(encoding), "cp%d", GetACP()); +#elif defined(__ANDROID__) + const char *encoding = "UTF-8"; +#else + const char *encoding = nl_langinfo(CODESET); + if (!encoding || encoding[0] == '\0') { + return _Py_INIT_USER_ERR("failed to get the locale encoding: " + "nl_langinfo(CODESET) failed"); + } +#endif + *locale_encoding = _PyMem_RawStrdup(encoding); + if (*locale_encoding == NULL) { + return _Py_INIT_NO_MEMORY(); + } + return _Py_INIT_OK(); +} + + +static _PyInitError +config_init_stdio_encoding(_PyCoreConfig *config) +{ + /* If Py_SetStandardStreamEncoding() have been called, use these + parameters. */ + if (config->stdio_encoding == NULL && _Py_StandardStreamEncoding != NULL) { + config->stdio_encoding = _PyMem_RawStrdup(_Py_StandardStreamEncoding); + if (config->stdio_encoding == NULL) { + return _Py_INIT_NO_MEMORY(); + } + } + + if (config->stdio_errors == NULL && _Py_StandardStreamErrors != NULL) { + config->stdio_errors = _PyMem_RawStrdup(_Py_StandardStreamErrors); + if (config->stdio_errors == NULL) { + return _Py_INIT_NO_MEMORY(); + } + } + + if (config->stdio_encoding != NULL && config->stdio_errors != NULL) { + return _Py_INIT_OK(); + } + + /* PYTHONIOENCODING environment variable */ + const char *opt = _PyCoreConfig_GetEnv(config, "PYTHONIOENCODING"); + if (opt) { + char *pythonioencoding = _PyMem_RawStrdup(opt); + if (pythonioencoding == NULL) { + return _Py_INIT_NO_MEMORY(); + } + + char *err = strchr(pythonioencoding, ':'); + if (err) { + *err = '\0'; + err++; + if (!err[0]) { + err = NULL; + } + } + + /* Does PYTHONIOENCODING contain an encoding? */ + if (pythonioencoding[0]) { + if (config->stdio_encoding == NULL) { + config->stdio_encoding = _PyMem_RawStrdup(pythonioencoding); + if (config->stdio_encoding == NULL) { + PyMem_RawFree(pythonioencoding); + return _Py_INIT_NO_MEMORY(); + } + } + + /* If the encoding is set but not the error handler, + use "strict" error handler by default. + PYTHONIOENCODING=latin1 behaves as + PYTHONIOENCODING=latin1:strict. */ + if (!err) { + err = "strict"; + } + } + + if (config->stdio_errors == NULL && err != NULL) { + config->stdio_errors = _PyMem_RawStrdup(err); + if (config->stdio_errors == NULL) { + PyMem_RawFree(pythonioencoding); + return _Py_INIT_NO_MEMORY(); + } + } + + PyMem_RawFree(pythonioencoding); + } + + /* UTF-8 Mode uses UTF-8/surrogateescape */ + if (config->utf8_mode) { + if (config->stdio_encoding == NULL) { + config->stdio_encoding = _PyMem_RawStrdup("utf-8"); + if (config->stdio_encoding == NULL) { + return _Py_INIT_NO_MEMORY(); + } + } + if (config->stdio_errors == NULL) { + config->stdio_errors = _PyMem_RawStrdup("surrogateescape"); + if (config->stdio_errors == NULL) { + return _Py_INIT_NO_MEMORY(); + } + } + } + + /* Choose the default error handler based on the current locale. */ + if (config->stdio_encoding == NULL) { + _PyInitError err = _Py_get_locale_encoding(&config->stdio_encoding); + if (_Py_INIT_FAILED(err)) { + return err; + } + } + if (config->stdio_errors == NULL) { + const char *errors = get_stdio_errors(config); + config->stdio_errors = _PyMem_RawStrdup(errors); + if (config->stdio_errors == NULL) { + return _Py_INIT_NO_MEMORY(); + } + } + + return _Py_INIT_OK(); +} + + /* Read configuration settings from standard locations * * This function doesn't make any changes to the interpreter state - it @@ -1044,6 +1216,11 @@ _PyCoreConfig_Read(_PyCoreConfig *config) config->argc = 0; } + err = config_init_stdio_encoding(config); + if (_Py_INIT_FAILED(err)) { + return err; + } + assert(config->coerce_c_locale >= 0); assert(config->use_environment >= 0); diff --git a/Python/pylifecycle.c b/Python/pylifecycle.c index 88403f4..9f6757f 100644 --- a/Python/pylifecycle.c +++ b/Python/pylifecycle.c @@ -184,27 +184,6 @@ error: return NULL; } -static _PyInitError -get_locale_encoding(char **locale_encoding) -{ -#ifdef MS_WINDOWS - char encoding[20]; - PyOS_snprintf(encoding, sizeof(encoding), "cp%d", GetACP()); -#elif defined(__ANDROID__) - const char *encoding = "UTF-8"; -#else - const char *encoding = nl_langinfo(CODESET); - if (!encoding || encoding[0] == '\0') { - return _Py_INIT_USER_ERR("failed to get the locale encoding: " - "nl_langinfo(CODESET) failed"); - } -#endif - *locale_encoding = _PyMem_RawStrdup(encoding); - if (*locale_encoding == NULL) { - return _Py_INIT_NO_MEMORY(); - } - return _Py_INIT_OK(); -} static _PyInitError initimport(PyInterpreterState *interp, PyObject *sysmod) @@ -340,35 +319,20 @@ static _LocaleCoercionTarget _TARGET_LOCALES[] = { {NULL} }; -static const char * -get_stdio_errors(void) -{ -#ifndef MS_WINDOWS - const char *ctype_loc = setlocale(LC_CTYPE, NULL); - if (ctype_loc != NULL) { - /* surrogateescape is the default in the legacy C and POSIX locales */ - if (strcmp(ctype_loc, "C") == 0 || strcmp(ctype_loc, "POSIX") == 0) { - return "surrogateescape"; - } -#ifdef PY_COERCE_C_LOCALE - /* surrogateescape is the default in locale coercion target locales */ - const _LocaleCoercionTarget *target = NULL; - for (target = _TARGET_LOCALES; target->locale_name; target++) { - if (strcmp(ctype_loc, target->locale_name) == 0) { - return "surrogateescape"; - } +int +_Py_IsLocaleCoercionTarget(const char *ctype_loc) +{ + const _LocaleCoercionTarget *target = NULL; + for (target = _TARGET_LOCALES; target->locale_name; target++) { + if (strcmp(ctype_loc, target->locale_name) == 0) { + return 1; } -#endif } - - return "strict"; -#else - /* On Windows, always use surrogateescape by default */ - return "surrogateescape"; -#endif + return 0; } + #ifdef PY_COERCE_C_LOCALE static const char C_LOCALE_COERCION_WARNING[] = "Python detected LC_CTYPE=C: LC_CTYPE coerced to %.20s (set another locale " @@ -1533,8 +1497,10 @@ initfsencoding(PyInterpreterState *interp) Py_HasFileSystemDefaultEncoding = 1; } else { + extern _PyInitError _Py_get_locale_encoding(char **locale_encoding); + char *locale_encoding; - _PyInitError err = get_locale_encoding(&locale_encoding); + _PyInitError err = _Py_get_locale_encoding(&locale_encoding); if (_Py_INIT_FAILED(err)) { return err; } @@ -1740,13 +1706,16 @@ init_sys_streams(PyInterpreterState *interp) PyObject *std = NULL; int fd; PyObject * encoding_attr; - char *pythonioencoding = NULL; - const char *encoding, *errors; - char *locale_encoding = NULL; - char *codec_name = NULL; _PyInitError res = _Py_INIT_OK(); - extern char *_Py_StandardStreamEncoding; - extern char *_Py_StandardStreamErrors; + _PyCoreConfig *config = &interp->core_config; + + char *codec_name = get_codec_name(config->stdio_encoding); + if (codec_name == NULL) { + return _Py_INIT_ERR("failed to get the Python codec name " + "of the stdio encoding"); + } + PyMem_RawFree(config->stdio_encoding); + config->stdio_encoding = codec_name; /* Hack to avoid a nasty recursion issue when Python is invoked in verbose mode: pre-import the Latin-1 and UTF-8 codecs */ @@ -1778,85 +1747,15 @@ init_sys_streams(PyInterpreterState *interp) } Py_DECREF(wrapper); - encoding = _Py_StandardStreamEncoding; - errors = _Py_StandardStreamErrors; - if (!encoding || !errors) { - char *opt = Py_GETENV("PYTHONIOENCODING"); - if (opt && opt[0] != '\0') { - char *err; - pythonioencoding = _PyMem_Strdup(opt); - if (pythonioencoding == NULL) { - PyErr_NoMemory(); - goto error; - } - err = strchr(pythonioencoding, ':'); - if (err) { - *err = '\0'; - err++; - if (!err[0]) { - err = NULL; - } - } - - /* Does PYTHONIOENCODING contain an encoding? */ - if (pythonioencoding[0]) { - if (!encoding) { - encoding = pythonioencoding; - } - - /* If the encoding is set but not the error handler, - use "strict" error handler by default. - PYTHONIOENCODING=latin1 behaves as - PYTHONIOENCODING=latin1:strict. */ - if (!err) { - err = "strict"; - } - } - - if (!errors && err != NULL) { - errors = err; - } - } - - if (interp->core_config.utf8_mode) { - if (!encoding) { - encoding = "utf-8"; - } - if (!errors) { - errors = "surrogateescape"; - } - } - - if (!errors) { - /* Choose the default error handler based on the current locale */ - errors = get_stdio_errors(); - } - } - - if (encoding == NULL) { - _PyInitError err = get_locale_encoding(&locale_encoding); - if (_Py_INIT_FAILED(err)) { - return err; - } - encoding = locale_encoding; - } - - codec_name = get_codec_name(encoding); - if (codec_name == NULL) { - PyErr_SetString(PyExc_RuntimeError, - "failed to get the Python codec name " - "of stdio encoding"); - goto error; - } - encoding = codec_name; - /* Set sys.stdin */ fd = fileno(stdin); /* Under some conditions stdin, stdout and stderr may not be connected * and fileno() may point to an invalid file descriptor. For example * GUI apps don't have valid standard streams by default. */ - std = create_stdio(iomod, fd, 0, "<stdin>", encoding, errors); + std = create_stdio(iomod, fd, 0, "<stdin>", + config->stdio_encoding, + config->stdio_errors); if (std == NULL) goto error; PySys_SetObject("__stdin__", std); @@ -1865,7 +1764,9 @@ init_sys_streams(PyInterpreterState *interp) /* Set sys.stdout */ fd = fileno(stdout); - std = create_stdio(iomod, fd, 1, "<stdout>", encoding, errors); + std = create_stdio(iomod, fd, 1, "<stdout>", + config->stdio_encoding, + config->stdio_errors); if (std == NULL) goto error; PySys_SetObject("__stdout__", std); @@ -1875,7 +1776,9 @@ init_sys_streams(PyInterpreterState *interp) #if 1 /* Disable this if you have trouble debugging bootstrap stuff */ /* Set sys.stderr, replaces the preliminary stderr */ fd = fileno(stderr); - std = create_stdio(iomod, fd, 1, "<stderr>", encoding, "backslashreplace"); + std = create_stdio(iomod, fd, 1, "<stderr>", + config->stdio_encoding, + "backslashreplace"); if (std == NULL) goto error; @@ -1911,9 +1814,6 @@ error: done: _Py_ClearStandardStreamEncoding(); - PyMem_RawFree(locale_encoding); - PyMem_RawFree(codec_name); - PyMem_Free(pythonioencoding); Py_XDECREF(bimod); Py_XDECREF(iomod); return res; |