summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorVictor Stinner <vstinner@redhat.com>2018-08-29 09:47:29 (GMT)
committerGitHub <noreply@github.com>2018-08-29 09:47:29 (GMT)
commitdfe0dc74536dfb6f331131d9b2b49557675bb6b7 (patch)
tree485dd3b5ddce8e6c2d7ebbd6d113e1c5ee6f3707
parent177d921c8c03d30daa32994362023f777624b10d (diff)
downloadcpython-dfe0dc74536dfb6f331131d9b2b49557675bb6b7.zip
cpython-dfe0dc74536dfb6f331131d9b2b49557675bb6b7.tar.gz
cpython-dfe0dc74536dfb6f331131d9b2b49557675bb6b7.tar.bz2
bpo-34485: Add _PyCoreConfig.stdio_encoding (GH-8881)
* Add stdio_encoding and stdio_errors fields to _PyCoreConfig. * Add unit tests on stdio_encoding and stdio_errors.
-rw-r--r--Include/coreconfig.h12
-rw-r--r--Include/pylifecycle.h3
-rw-r--r--Lib/test/test_embed.py36
-rw-r--r--Programs/_testembed.c8
-rw-r--r--Python/coreconfig.c181
-rw-r--r--Python/pylifecycle.c160
6 files changed, 265 insertions, 135 deletions
diff --git a/Include/coreconfig.h b/Include/coreconfig.h
index b279907..ffba306 100644
--- a/Include/coreconfig.h
+++ b/Include/coreconfig.h
@@ -203,6 +203,18 @@ typedef struct {
If set to -1 (default), it is set to !Py_UnbufferedStdioFlag. */
int buffered_stdio;
+ /* Encoding of sys.stdin, sys.stdout and sys.stderr.
+ Value set from PYTHONIOENCODING environment variable and
+ Py_SetStandardStreamEncoding() function.
+ See also 'stdio_errors' attribute. */
+ char *stdio_encoding;
+
+ /* Error handler of sys.stdin and sys.stdout.
+ Value set from PYTHONIOENCODING environment variable and
+ Py_SetStandardStreamEncoding() function.
+ See also 'stdio_encoding' attribute. */
+ char *stdio_errors;
+
#ifdef MS_WINDOWS
/* If greater than 1, use the "mbcs" encoding instead of the UTF-8
encoding for the filesystem encoding.
diff --git a/Include/pylifecycle.h b/Include/pylifecycle.h
index 2029827..b96db1e 100644
--- a/Include/pylifecycle.h
+++ b/Include/pylifecycle.h
@@ -179,6 +179,9 @@ PyAPI_FUNC(void) _Py_CoerceLegacyLocale(const _PyCoreConfig *config);
PyAPI_FUNC(int) _Py_LegacyLocaleDetected(void);
PyAPI_FUNC(char *) _Py_SetLocaleFromEnv(int category);
#endif
+#ifdef Py_BUILD_CORE
+PyAPI_FUNC(int) _Py_IsLocaleCoercionTarget(const char *ctype_loc);
+#endif
#ifdef __cplusplus
}
diff --git a/Lib/test/test_embed.py b/Lib/test/test_embed.py
index 3922447..2ec9cf3 100644
--- a/Lib/test/test_embed.py
+++ b/Lib/test/test_embed.py
@@ -288,13 +288,29 @@ class InitConfigTests(EmbeddingTestsMixin, unittest.TestCase):
'quiet': 0,
'user_site_directory': 1,
'buffered_stdio': 1,
+ # None means that check_config() gets the expected encoding at runtime
+ 'stdio_encoding': None,
+ 'stdio_errors': None,
'_install_importlib': 1,
'_check_hash_pycs_mode': 'default',
'_frozen': 0,
}
+ def get_stdio_encoding(self, env):
+ code = 'import sys; print(sys.stdout.encoding, sys.stdout.errors)'
+ args = (sys.executable, '-c', code)
+ proc = subprocess.run(args, env=env, text=True,
+ stdout=subprocess.PIPE,
+ stderr=subprocess.STDOUT)
+ if proc.returncode:
+ raise Exception(f"failed to get the stdio encoding: stdout={proc.stdout!r}")
+ out = proc.stdout.rstrip()
+ return out.split()
+
def check_config(self, testname, expected):
+ expected = dict(self.DEFAULT_CONFIG, **expected)
+
env = dict(os.environ)
for key in list(env):
if key.startswith('PYTHON'):
@@ -303,13 +319,19 @@ class InitConfigTests(EmbeddingTestsMixin, unittest.TestCase):
# on the current locale
env['PYTHONCOERCECLOCALE'] = '0'
env['PYTHONUTF8'] = '0'
- out, err = self.run_embedded_interpreter(testname, env=env)
- # Ignore err
- expected = dict(self.DEFAULT_CONFIG, **expected)
+ if expected['stdio_encoding'] is None or expected['stdio_errors'] is None:
+ res = self.get_stdio_encoding(env)
+ if expected['stdio_encoding'] is None:
+ expected['stdio_encoding'] = res[0]
+ if expected['stdio_errors'] is None:
+ expected['stdio_errors'] = res[1]
for key, value in expected.items():
expected[key] = str(value)
+ out, err = self.run_embedded_interpreter(testname, env=env)
+ # Ignore err
+
config = {}
for line in out.splitlines():
key, value = line.split(' = ', 1)
@@ -331,7 +353,11 @@ class InitConfigTests(EmbeddingTestsMixin, unittest.TestCase):
'verbose': 1,
'quiet': 1,
'buffered_stdio': 0,
+
'utf8_mode': 1,
+ 'stdio_encoding': 'utf-8',
+ 'stdio_errors': 'surrogateescape',
+
'user_site_directory': 0,
'_frozen': 1,
}
@@ -350,6 +376,8 @@ class InitConfigTests(EmbeddingTestsMixin, unittest.TestCase):
'malloc_stats': 1,
'utf8_mode': 1,
+ 'stdio_encoding': 'iso8859-1',
+ 'stdio_errors': 'replace',
'pycache_prefix': 'conf_pycache_prefix',
'program_name': './conf_program_name',
@@ -387,6 +415,8 @@ class InitConfigTests(EmbeddingTestsMixin, unittest.TestCase):
'write_bytecode': 0,
'verbose': 1,
'buffered_stdio': 0,
+ 'stdio_encoding': 'iso8859-1',
+ 'stdio_errors': 'replace',
'user_site_directory': 0,
'faulthandler': 1,
'dev_mode': 1,
diff --git a/Programs/_testembed.c b/Programs/_testembed.c
index d0c00cf..d569417 100644
--- a/Programs/_testembed.c
+++ b/Programs/_testembed.c
@@ -374,6 +374,8 @@ dump_config(void)
printf("user_site_directory = %i\n", config->user_site_directory);
printf("buffered_stdio = %i\n", config->buffered_stdio);
ASSERT_EQUAL(config->buffered_stdio, !Py_UnbufferedStdioFlag);
+ printf("stdio_encoding = %s\n", config->stdio_encoding);
+ printf("stdio_errors = %s\n", config->stdio_errors);
/* FIXME: test legacy_windows_fs_encoding */
/* FIXME: test legacy_windows_stdio */
@@ -532,6 +534,11 @@ static int test_init_from_config(void)
Py_UnbufferedStdioFlag = 0;
config.buffered_stdio = 0;
+ putenv("PYTHONIOENCODING=cp424");
+ Py_SetStandardStreamEncoding("ascii", "ignore");
+ config.stdio_encoding = "iso8859-1";
+ config.stdio_errors = "replace";
+
putenv("PYTHONNOUSERSITE=");
Py_NoUserSiteDirectory = 0;
config.user_site_directory = 0;
@@ -569,6 +576,7 @@ static void test_init_env_putenvs(void)
putenv("PYTHONNOUSERSITE=1");
putenv("PYTHONFAULTHANDLER=1");
putenv("PYTHONDEVMODE=1");
+ putenv("PYTHONIOENCODING=iso8859-1:replace");
/* FIXME: test PYTHONWARNINGS */
/* FIXME: test PYTHONEXECUTABLE */
/* FIXME: test PYTHONHOME */
diff --git a/Python/coreconfig.c b/Python/coreconfig.c
index 99d703c..00037d9 100644
--- a/Python/coreconfig.c
+++ b/Python/coreconfig.c
@@ -1,6 +1,9 @@
#include "Python.h"
#include "internal/pystate.h"
#include <locale.h>
+#ifdef HAVE_LANGINFO_H
+# include <langinfo.h>
+#endif
#define DECODE_LOCALE_ERR(NAME, LEN) \
@@ -89,8 +92,8 @@ _Py_wstrlist_copy(int len, wchar_t **list)
* mechanism that attempts to figure out an appropriate IO encoding
*/
-char *_Py_StandardStreamEncoding = NULL;
-char *_Py_StandardStreamErrors = NULL;
+static char *_Py_StandardStreamEncoding = NULL;
+static char *_Py_StandardStreamErrors = NULL;
int
Py_SetStandardStreamEncoding(const char *encoding, const char *errors)
@@ -205,6 +208,9 @@ _PyCoreConfig_Clear(_PyCoreConfig *config)
CLEAR(config->dll_path);
#endif
CLEAR(config->base_exec_prefix);
+
+ CLEAR(config->stdio_encoding);
+ CLEAR(config->stdio_errors);
#undef CLEAR
#undef CLEAR_WSTRLIST
}
@@ -216,6 +222,15 @@ _PyCoreConfig_Copy(_PyCoreConfig *config, const _PyCoreConfig *config2)
_PyCoreConfig_Clear(config);
#define COPY_ATTR(ATTR) config->ATTR = config2->ATTR
+#define COPY_STR_ATTR(ATTR) \
+ do { \
+ if (config2->ATTR != NULL) { \
+ config->ATTR = _PyMem_RawStrdup(config2->ATTR); \
+ if (config->ATTR == NULL) { \
+ return -1; \
+ } \
+ } \
+ } while (0)
#define COPY_WSTR_ATTR(ATTR) \
do { \
if (config2->ATTR != NULL) { \
@@ -287,6 +302,8 @@ _PyCoreConfig_Copy(_PyCoreConfig *config, const _PyCoreConfig *config2)
COPY_ATTR(quiet);
COPY_ATTR(user_site_directory);
COPY_ATTR(buffered_stdio);
+ COPY_STR_ATTR(stdio_encoding);
+ COPY_STR_ATTR(stdio_errors);
#ifdef MS_WINDOWS
COPY_ATTR(legacy_windows_fs_encoding);
COPY_ATTR(legacy_windows_stdio);
@@ -932,6 +949,161 @@ config_init_locale(_PyCoreConfig *config)
}
+static const char *
+get_stdio_errors(const _PyCoreConfig *config)
+{
+#ifndef MS_WINDOWS
+ const char *loc = setlocale(LC_CTYPE, NULL);
+ if (loc != NULL) {
+ /* surrogateescape is the default in the legacy C and POSIX locales */
+ if (strcmp(loc, "C") == 0 || strcmp(loc, "POSIX") == 0) {
+ return "surrogateescape";
+ }
+
+#ifdef PY_COERCE_C_LOCALE
+ /* surrogateescape is the default in locale coercion target locales */
+ if (_Py_IsLocaleCoercionTarget(loc)) {
+ return "surrogateescape";
+ }
+#endif
+ }
+
+ return "strict";
+#else
+ /* On Windows, always use surrogateescape by default */
+ return "surrogateescape";
+#endif
+}
+
+
+_PyInitError
+_Py_get_locale_encoding(char **locale_encoding)
+{
+#ifdef MS_WINDOWS
+ char encoding[20];
+ PyOS_snprintf(encoding, sizeof(encoding), "cp%d", GetACP());
+#elif defined(__ANDROID__)
+ const char *encoding = "UTF-8";
+#else
+ const char *encoding = nl_langinfo(CODESET);
+ if (!encoding || encoding[0] == '\0') {
+ return _Py_INIT_USER_ERR("failed to get the locale encoding: "
+ "nl_langinfo(CODESET) failed");
+ }
+#endif
+ *locale_encoding = _PyMem_RawStrdup(encoding);
+ if (*locale_encoding == NULL) {
+ return _Py_INIT_NO_MEMORY();
+ }
+ return _Py_INIT_OK();
+}
+
+
+static _PyInitError
+config_init_stdio_encoding(_PyCoreConfig *config)
+{
+ /* If Py_SetStandardStreamEncoding() have been called, use these
+ parameters. */
+ if (config->stdio_encoding == NULL && _Py_StandardStreamEncoding != NULL) {
+ config->stdio_encoding = _PyMem_RawStrdup(_Py_StandardStreamEncoding);
+ if (config->stdio_encoding == NULL) {
+ return _Py_INIT_NO_MEMORY();
+ }
+ }
+
+ if (config->stdio_errors == NULL && _Py_StandardStreamErrors != NULL) {
+ config->stdio_errors = _PyMem_RawStrdup(_Py_StandardStreamErrors);
+ if (config->stdio_errors == NULL) {
+ return _Py_INIT_NO_MEMORY();
+ }
+ }
+
+ if (config->stdio_encoding != NULL && config->stdio_errors != NULL) {
+ return _Py_INIT_OK();
+ }
+
+ /* PYTHONIOENCODING environment variable */
+ const char *opt = _PyCoreConfig_GetEnv(config, "PYTHONIOENCODING");
+ if (opt) {
+ char *pythonioencoding = _PyMem_RawStrdup(opt);
+ if (pythonioencoding == NULL) {
+ return _Py_INIT_NO_MEMORY();
+ }
+
+ char *err = strchr(pythonioencoding, ':');
+ if (err) {
+ *err = '\0';
+ err++;
+ if (!err[0]) {
+ err = NULL;
+ }
+ }
+
+ /* Does PYTHONIOENCODING contain an encoding? */
+ if (pythonioencoding[0]) {
+ if (config->stdio_encoding == NULL) {
+ config->stdio_encoding = _PyMem_RawStrdup(pythonioencoding);
+ if (config->stdio_encoding == NULL) {
+ PyMem_RawFree(pythonioencoding);
+ return _Py_INIT_NO_MEMORY();
+ }
+ }
+
+ /* If the encoding is set but not the error handler,
+ use "strict" error handler by default.
+ PYTHONIOENCODING=latin1 behaves as
+ PYTHONIOENCODING=latin1:strict. */
+ if (!err) {
+ err = "strict";
+ }
+ }
+
+ if (config->stdio_errors == NULL && err != NULL) {
+ config->stdio_errors = _PyMem_RawStrdup(err);
+ if (config->stdio_errors == NULL) {
+ PyMem_RawFree(pythonioencoding);
+ return _Py_INIT_NO_MEMORY();
+ }
+ }
+
+ PyMem_RawFree(pythonioencoding);
+ }
+
+ /* UTF-8 Mode uses UTF-8/surrogateescape */
+ if (config->utf8_mode) {
+ if (config->stdio_encoding == NULL) {
+ config->stdio_encoding = _PyMem_RawStrdup("utf-8");
+ if (config->stdio_encoding == NULL) {
+ return _Py_INIT_NO_MEMORY();
+ }
+ }
+ if (config->stdio_errors == NULL) {
+ config->stdio_errors = _PyMem_RawStrdup("surrogateescape");
+ if (config->stdio_errors == NULL) {
+ return _Py_INIT_NO_MEMORY();
+ }
+ }
+ }
+
+ /* Choose the default error handler based on the current locale. */
+ if (config->stdio_encoding == NULL) {
+ _PyInitError err = _Py_get_locale_encoding(&config->stdio_encoding);
+ if (_Py_INIT_FAILED(err)) {
+ return err;
+ }
+ }
+ if (config->stdio_errors == NULL) {
+ const char *errors = get_stdio_errors(config);
+ config->stdio_errors = _PyMem_RawStrdup(errors);
+ if (config->stdio_errors == NULL) {
+ return _Py_INIT_NO_MEMORY();
+ }
+ }
+
+ return _Py_INIT_OK();
+}
+
+
/* Read configuration settings from standard locations
*
* This function doesn't make any changes to the interpreter state - it
@@ -1044,6 +1216,11 @@ _PyCoreConfig_Read(_PyCoreConfig *config)
config->argc = 0;
}
+ err = config_init_stdio_encoding(config);
+ if (_Py_INIT_FAILED(err)) {
+ return err;
+ }
+
assert(config->coerce_c_locale >= 0);
assert(config->use_environment >= 0);
diff --git a/Python/pylifecycle.c b/Python/pylifecycle.c
index 88403f4..9f6757f 100644
--- a/Python/pylifecycle.c
+++ b/Python/pylifecycle.c
@@ -184,27 +184,6 @@ error:
return NULL;
}
-static _PyInitError
-get_locale_encoding(char **locale_encoding)
-{
-#ifdef MS_WINDOWS
- char encoding[20];
- PyOS_snprintf(encoding, sizeof(encoding), "cp%d", GetACP());
-#elif defined(__ANDROID__)
- const char *encoding = "UTF-8";
-#else
- const char *encoding = nl_langinfo(CODESET);
- if (!encoding || encoding[0] == '\0') {
- return _Py_INIT_USER_ERR("failed to get the locale encoding: "
- "nl_langinfo(CODESET) failed");
- }
-#endif
- *locale_encoding = _PyMem_RawStrdup(encoding);
- if (*locale_encoding == NULL) {
- return _Py_INIT_NO_MEMORY();
- }
- return _Py_INIT_OK();
-}
static _PyInitError
initimport(PyInterpreterState *interp, PyObject *sysmod)
@@ -340,35 +319,20 @@ static _LocaleCoercionTarget _TARGET_LOCALES[] = {
{NULL}
};
-static const char *
-get_stdio_errors(void)
-{
-#ifndef MS_WINDOWS
- const char *ctype_loc = setlocale(LC_CTYPE, NULL);
- if (ctype_loc != NULL) {
- /* surrogateescape is the default in the legacy C and POSIX locales */
- if (strcmp(ctype_loc, "C") == 0 || strcmp(ctype_loc, "POSIX") == 0) {
- return "surrogateescape";
- }
-#ifdef PY_COERCE_C_LOCALE
- /* surrogateescape is the default in locale coercion target locales */
- const _LocaleCoercionTarget *target = NULL;
- for (target = _TARGET_LOCALES; target->locale_name; target++) {
- if (strcmp(ctype_loc, target->locale_name) == 0) {
- return "surrogateescape";
- }
+int
+_Py_IsLocaleCoercionTarget(const char *ctype_loc)
+{
+ const _LocaleCoercionTarget *target = NULL;
+ for (target = _TARGET_LOCALES; target->locale_name; target++) {
+ if (strcmp(ctype_loc, target->locale_name) == 0) {
+ return 1;
}
-#endif
}
-
- return "strict";
-#else
- /* On Windows, always use surrogateescape by default */
- return "surrogateescape";
-#endif
+ return 0;
}
+
#ifdef PY_COERCE_C_LOCALE
static const char C_LOCALE_COERCION_WARNING[] =
"Python detected LC_CTYPE=C: LC_CTYPE coerced to %.20s (set another locale "
@@ -1533,8 +1497,10 @@ initfsencoding(PyInterpreterState *interp)
Py_HasFileSystemDefaultEncoding = 1;
}
else {
+ extern _PyInitError _Py_get_locale_encoding(char **locale_encoding);
+
char *locale_encoding;
- _PyInitError err = get_locale_encoding(&locale_encoding);
+ _PyInitError err = _Py_get_locale_encoding(&locale_encoding);
if (_Py_INIT_FAILED(err)) {
return err;
}
@@ -1740,13 +1706,16 @@ init_sys_streams(PyInterpreterState *interp)
PyObject *std = NULL;
int fd;
PyObject * encoding_attr;
- char *pythonioencoding = NULL;
- const char *encoding, *errors;
- char *locale_encoding = NULL;
- char *codec_name = NULL;
_PyInitError res = _Py_INIT_OK();
- extern char *_Py_StandardStreamEncoding;
- extern char *_Py_StandardStreamErrors;
+ _PyCoreConfig *config = &interp->core_config;
+
+ char *codec_name = get_codec_name(config->stdio_encoding);
+ if (codec_name == NULL) {
+ return _Py_INIT_ERR("failed to get the Python codec name "
+ "of the stdio encoding");
+ }
+ PyMem_RawFree(config->stdio_encoding);
+ config->stdio_encoding = codec_name;
/* Hack to avoid a nasty recursion issue when Python is invoked
in verbose mode: pre-import the Latin-1 and UTF-8 codecs */
@@ -1778,85 +1747,15 @@ init_sys_streams(PyInterpreterState *interp)
}
Py_DECREF(wrapper);
- encoding = _Py_StandardStreamEncoding;
- errors = _Py_StandardStreamErrors;
- if (!encoding || !errors) {
- char *opt = Py_GETENV("PYTHONIOENCODING");
- if (opt && opt[0] != '\0') {
- char *err;
- pythonioencoding = _PyMem_Strdup(opt);
- if (pythonioencoding == NULL) {
- PyErr_NoMemory();
- goto error;
- }
- err = strchr(pythonioencoding, ':');
- if (err) {
- *err = '\0';
- err++;
- if (!err[0]) {
- err = NULL;
- }
- }
-
- /* Does PYTHONIOENCODING contain an encoding? */
- if (pythonioencoding[0]) {
- if (!encoding) {
- encoding = pythonioencoding;
- }
-
- /* If the encoding is set but not the error handler,
- use "strict" error handler by default.
- PYTHONIOENCODING=latin1 behaves as
- PYTHONIOENCODING=latin1:strict. */
- if (!err) {
- err = "strict";
- }
- }
-
- if (!errors && err != NULL) {
- errors = err;
- }
- }
-
- if (interp->core_config.utf8_mode) {
- if (!encoding) {
- encoding = "utf-8";
- }
- if (!errors) {
- errors = "surrogateescape";
- }
- }
-
- if (!errors) {
- /* Choose the default error handler based on the current locale */
- errors = get_stdio_errors();
- }
- }
-
- if (encoding == NULL) {
- _PyInitError err = get_locale_encoding(&locale_encoding);
- if (_Py_INIT_FAILED(err)) {
- return err;
- }
- encoding = locale_encoding;
- }
-
- codec_name = get_codec_name(encoding);
- if (codec_name == NULL) {
- PyErr_SetString(PyExc_RuntimeError,
- "failed to get the Python codec name "
- "of stdio encoding");
- goto error;
- }
- encoding = codec_name;
-
/* Set sys.stdin */
fd = fileno(stdin);
/* Under some conditions stdin, stdout and stderr may not be connected
* and fileno() may point to an invalid file descriptor. For example
* GUI apps don't have valid standard streams by default.
*/
- std = create_stdio(iomod, fd, 0, "<stdin>", encoding, errors);
+ std = create_stdio(iomod, fd, 0, "<stdin>",
+ config->stdio_encoding,
+ config->stdio_errors);
if (std == NULL)
goto error;
PySys_SetObject("__stdin__", std);
@@ -1865,7 +1764,9 @@ init_sys_streams(PyInterpreterState *interp)
/* Set sys.stdout */
fd = fileno(stdout);
- std = create_stdio(iomod, fd, 1, "<stdout>", encoding, errors);
+ std = create_stdio(iomod, fd, 1, "<stdout>",
+ config->stdio_encoding,
+ config->stdio_errors);
if (std == NULL)
goto error;
PySys_SetObject("__stdout__", std);
@@ -1875,7 +1776,9 @@ init_sys_streams(PyInterpreterState *interp)
#if 1 /* Disable this if you have trouble debugging bootstrap stuff */
/* Set sys.stderr, replaces the preliminary stderr */
fd = fileno(stderr);
- std = create_stdio(iomod, fd, 1, "<stderr>", encoding, "backslashreplace");
+ std = create_stdio(iomod, fd, 1, "<stderr>",
+ config->stdio_encoding,
+ "backslashreplace");
if (std == NULL)
goto error;
@@ -1911,9 +1814,6 @@ error:
done:
_Py_ClearStandardStreamEncoding();
- PyMem_RawFree(locale_encoding);
- PyMem_RawFree(codec_name);
- PyMem_Free(pythonioencoding);
Py_XDECREF(bimod);
Py_XDECREF(iomod);
return res;