diff options
author | Victor Stinner <vstinner@redhat.com> | 2018-08-29 11:25:36 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2018-08-29 11:25:36 (GMT) |
commit | b2457efc78b74a1d6d1b77d11a939e886b8a4e2c (patch) | |
tree | b715b8061d730f07584d13e4475660d61fd261f5 /Python | |
parent | dfe0dc74536dfb6f331131d9b2b49557675bb6b7 (diff) | |
download | cpython-b2457efc78b74a1d6d1b77d11a939e886b8a4e2c.zip cpython-b2457efc78b74a1d6d1b77d11a939e886b8a4e2c.tar.gz cpython-b2457efc78b74a1d6d1b77d11a939e886b8a4e2c.tar.bz2 |
bpo-34523: Add _PyCoreConfig.filesystem_encoding (GH-8963)
_PyCoreConfig_Read() is now responsible to choose the filesystem
encoding and error handler. Using Py_Main(), the encoding is now
chosen even before calling Py_Initialize().
_PyCoreConfig.filesystem_encoding is now the reference, instead of
Py_FileSystemDefaultEncoding, for the Python filesystem encoding.
Changes:
* Add filesystem_encoding and filesystem_errors to _PyCoreConfig
* _PyCoreConfig_Read() now reads the locale encoding for the file
system encoding.
* PyUnicode_EncodeFSDefault() and PyUnicode_DecodeFSDefaultAndSize()
now use the interpreter configuration rather than
Py_FileSystemDefaultEncoding and Py_FileSystemDefaultEncodeErrors
global configuration variables.
* Add _Py_SetFileSystemEncoding() and _Py_ClearFileSystemEncoding()
private functions to only modify Py_FileSystemDefaultEncoding and
Py_FileSystemDefaultEncodeErrors in coreconfig.c.
* _Py_CoerceLegacyLocale() now takes an int rather than
_PyCoreConfig for the warning.
Diffstat (limited to 'Python')
-rw-r--r-- | Python/coreconfig.c | 145 | ||||
-rw-r--r-- | Python/pylifecycle.c | 80 | ||||
-rw-r--r-- | Python/sysmodule.c | 42 |
3 files changed, 196 insertions, 71 deletions
diff --git a/Python/coreconfig.c b/Python/coreconfig.c index 00037d9..0ec4640 100644 --- a/Python/coreconfig.c +++ b/Python/coreconfig.c @@ -5,6 +5,11 @@ # include <langinfo.h> #endif +#include <locale.h> /* setlocale() */ +#ifdef HAVE_LANGINFO_H +#include <langinfo.h> /* nl_langinfo(CODESET) */ +#endif + #define DECODE_LOCALE_ERR(NAME, LEN) \ (((LEN) == -2) \ @@ -32,6 +37,8 @@ const char *Py_FileSystemDefaultEncoding = NULL; /* set by initfsencoding() */ int Py_HasFileSystemDefaultEncoding = 0; #endif const char *Py_FileSystemDefaultEncodeErrors = "surrogateescape"; +static int _Py_HasFileSystemDefaultEncodeErrors = 1; + /* UTF-8 mode (PEP 540): if equals to 1, use the UTF-8 encoding, and change stdin and stdout error handler to "surrogateescape". It is equal to -1 by default: unknown, will be set by Py_Main() */ @@ -88,6 +95,47 @@ _Py_wstrlist_copy(int len, wchar_t **list) } +void +_Py_ClearFileSystemEncoding(void) +{ + if (!Py_HasFileSystemDefaultEncoding && Py_FileSystemDefaultEncoding) { + PyMem_RawFree((char*)Py_FileSystemDefaultEncoding); + Py_FileSystemDefaultEncoding = NULL; + } + if (!_Py_HasFileSystemDefaultEncodeErrors && Py_FileSystemDefaultEncodeErrors) { + PyMem_RawFree((char*)Py_FileSystemDefaultEncodeErrors); + Py_FileSystemDefaultEncodeErrors = NULL; + } +} + + +/* Set Py_FileSystemDefaultEncoding and Py_FileSystemDefaultEncodeErrors + global configuration variables. */ +int +_Py_SetFileSystemEncoding(const char *encoding, const char *errors) +{ + char *encoding2 = _PyMem_RawStrdup(encoding); + if (encoding2 == NULL) { + return -1; + } + + char *errors2 = _PyMem_RawStrdup(errors); + if (errors2 == NULL) { + PyMem_RawFree(encoding2); + return -1; + } + + _Py_ClearFileSystemEncoding(); + + Py_FileSystemDefaultEncoding = encoding2; + Py_HasFileSystemDefaultEncoding = 0; + + Py_FileSystemDefaultEncodeErrors = errors2; + _Py_HasFileSystemDefaultEncodeErrors = 0; + return 0; +} + + /* Helper to allow an embedding application to override the normal * mechanism that attempts to figure out an appropriate IO encoding */ @@ -209,6 +257,8 @@ _PyCoreConfig_Clear(_PyCoreConfig *config) #endif CLEAR(config->base_exec_prefix); + CLEAR(config->filesystem_encoding); + CLEAR(config->filesystem_errors); CLEAR(config->stdio_encoding); CLEAR(config->stdio_errors); #undef CLEAR @@ -302,6 +352,8 @@ _PyCoreConfig_Copy(_PyCoreConfig *config, const _PyCoreConfig *config2) COPY_ATTR(quiet); COPY_ATTR(user_site_directory); COPY_ATTR(buffered_stdio); + COPY_STR_ATTR(filesystem_encoding); + COPY_STR_ATTR(filesystem_errors); COPY_STR_ATTR(stdio_encoding); COPY_STR_ATTR(stdio_errors); #ifdef MS_WINDOWS @@ -312,6 +364,7 @@ _PyCoreConfig_Copy(_PyCoreConfig *config, const _PyCoreConfig *config2) COPY_ATTR(_frozen); #undef COPY_ATTR +#undef COPY_STR_ATTR #undef COPY_WSTR_ATTR #undef COPY_WSTRLIST return 0; @@ -976,8 +1029,8 @@ get_stdio_errors(const _PyCoreConfig *config) } -_PyInitError -_Py_get_locale_encoding(char **locale_encoding) +static _PyInitError +get_locale_encoding(char **locale_encoding) { #ifdef MS_WINDOWS char encoding[20]; @@ -1087,7 +1140,7 @@ config_init_stdio_encoding(_PyCoreConfig *config) /* Choose the default error handler based on the current locale. */ if (config->stdio_encoding == NULL) { - _PyInitError err = _Py_get_locale_encoding(&config->stdio_encoding); + _PyInitError err = get_locale_encoding(&config->stdio_encoding); if (_Py_INIT_FAILED(err)) { return err; } @@ -1104,6 +1157,81 @@ config_init_stdio_encoding(_PyCoreConfig *config) } +static _PyInitError +config_init_fs_encoding(_PyCoreConfig *config) +{ +#ifdef MS_WINDOWS + if (config->legacy_windows_fs_encoding) { + /* Legacy Windows filesystem encoding: mbcs/replace */ + if (config->filesystem_encoding == NULL) { + config->filesystem_encoding = _PyMem_RawStrdup("mbcs"); + if (config->filesystem_encoding == NULL) { + return _Py_INIT_NO_MEMORY(); + } + } + if (config->filesystem_errors == NULL) { + config->filesystem_errors = _PyMem_RawStrdup("replace"); + if (config->filesystem_errors == NULL) { + return _Py_INIT_NO_MEMORY(); + } + } + } + + /* Windows defaults to utf-8/surrogatepass (PEP 529) */ + if (config->filesystem_encoding == NULL) { + config->filesystem_encoding = _PyMem_RawStrdup("utf-8"); + if (config->filesystem_encoding == NULL) { + return _Py_INIT_NO_MEMORY(); + } + } + if (config->filesystem_errors == NULL) { + config->filesystem_errors = _PyMem_RawStrdup("surrogatepass"); + if (config->filesystem_errors == NULL) { + return _Py_INIT_NO_MEMORY(); + } + } +#else + if (config->utf8_mode) { + /* UTF-8 Mode use: utf-8/surrogateescape */ + if (config->filesystem_encoding == NULL) { + config->filesystem_encoding = _PyMem_RawStrdup("utf-8"); + if (config->filesystem_encoding == NULL) { + return _Py_INIT_NO_MEMORY(); + } + } + /* errors defaults to surrogateescape above */ + } + + if (config->filesystem_encoding == NULL) { + /* macOS and Android use UTF-8, other platforms use + the locale encoding. */ + char *locale_encoding; +#if defined(__APPLE__) || defined(__ANDROID__) + locale_encoding = "UTF-8"; +#else + _PyInitError err = get_locale_encoding(&locale_encoding); + if (_Py_INIT_FAILED(err)) { + return err; + } +#endif + config->filesystem_encoding = _PyMem_RawStrdup(locale_encoding); + if (config->filesystem_encoding == NULL) { + return _Py_INIT_NO_MEMORY(); + } + } + + if (config->filesystem_errors == NULL) { + /* by default, use the "surrogateescape" error handler */ + config->filesystem_errors = _PyMem_RawStrdup("surrogateescape"); + if (config->filesystem_errors == NULL) { + return _Py_INIT_NO_MEMORY(); + } + } +#endif + return _Py_INIT_OK(); +} + + /* Read configuration settings from standard locations * * This function doesn't make any changes to the interpreter state - it @@ -1216,6 +1344,13 @@ _PyCoreConfig_Read(_PyCoreConfig *config) config->argc = 0; } + if (config->filesystem_encoding == NULL && config->filesystem_errors == NULL) { + err = config_init_fs_encoding(config); + if (_Py_INIT_FAILED(err)) { + return err; + } + } + err = config_init_stdio_encoding(config); if (_Py_INIT_FAILED(err)) { return err; @@ -1223,6 +1358,10 @@ _PyCoreConfig_Read(_PyCoreConfig *config) assert(config->coerce_c_locale >= 0); assert(config->use_environment >= 0); + assert(config->filesystem_encoding != NULL); + assert(config->filesystem_errors != NULL); + assert(config->stdio_encoding != NULL); + assert(config->stdio_errors != NULL); return _Py_INIT_OK(); } diff --git a/Python/pylifecycle.c b/Python/pylifecycle.c index 9f6757f..6d97f2f 100644 --- a/Python/pylifecycle.c +++ b/Python/pylifecycle.c @@ -339,7 +339,7 @@ static const char C_LOCALE_COERCION_WARNING[] = "or PYTHONCOERCECLOCALE=0 to disable this locale coercion behavior).\n"; static void -_coerce_default_locale_settings(const _PyCoreConfig *config, const _LocaleCoercionTarget *target) +_coerce_default_locale_settings(int warn, const _LocaleCoercionTarget *target) { const char *newloc = target->locale_name; @@ -352,7 +352,7 @@ _coerce_default_locale_settings(const _PyCoreConfig *config, const _LocaleCoerci "Error setting LC_CTYPE, skipping C locale coercion\n"); return; } - if (config->coerce_c_locale_warn) { + if (warn) { fprintf(stderr, C_LOCALE_COERCION_WARNING, newloc); } @@ -362,7 +362,7 @@ _coerce_default_locale_settings(const _PyCoreConfig *config, const _LocaleCoerci #endif void -_Py_CoerceLegacyLocale(const _PyCoreConfig *config) +_Py_CoerceLegacyLocale(int warn) { #ifdef PY_COERCE_C_LOCALE const char *locale_override = getenv("LC_ALL"); @@ -385,7 +385,7 @@ defined(HAVE_LANGINFO_H) && defined(CODESET) } #endif /* Successfully configured locale, so make it the default */ - _coerce_default_locale_settings(config, target); + _coerce_default_locale_settings(warn, target); return; } } @@ -1162,11 +1162,7 @@ Py_FinalizeEx(void) /* Cleanup Unicode implementation */ _PyUnicode_Fini(); - /* reset file system default encoding */ - if (!Py_HasFileSystemDefaultEncoding && Py_FileSystemDefaultEncoding) { - PyMem_RawFree((char*)Py_FileSystemDefaultEncoding); - Py_FileSystemDefaultEncoding = NULL; - } + _Py_ClearFileSystemEncoding(); /* XXX Still allocated: - various static ad-hoc pointers to interned strings @@ -1475,59 +1471,31 @@ add_main_module(PyInterpreterState *interp) static _PyInitError initfsencoding(PyInterpreterState *interp) { - PyObject *codec; + _PyCoreConfig *config = &interp->core_config; -#ifdef MS_WINDOWS - if (Py_LegacyWindowsFSEncodingFlag) { - Py_FileSystemDefaultEncoding = "mbcs"; - Py_FileSystemDefaultEncodeErrors = "replace"; - } - else { - Py_FileSystemDefaultEncoding = "utf-8"; - Py_FileSystemDefaultEncodeErrors = "surrogatepass"; + char *encoding = get_codec_name(config->filesystem_encoding); + if (encoding == NULL) { + /* Such error can only occurs in critical situations: no more + memory, import a module of the standard library failed, etc. */ + return _Py_INIT_ERR("failed to get the Python codec " + "of the filesystem encoding"); } -#else - if (Py_FileSystemDefaultEncoding == NULL) { - if (interp->core_config.utf8_mode) { - Py_FileSystemDefaultEncoding = "utf-8"; - Py_HasFileSystemDefaultEncoding = 1; - } - else if (_Py_GetForceASCII()) { - Py_FileSystemDefaultEncoding = "ascii"; - Py_HasFileSystemDefaultEncoding = 1; - } - else { - extern _PyInitError _Py_get_locale_encoding(char **locale_encoding); - char *locale_encoding; - _PyInitError err = _Py_get_locale_encoding(&locale_encoding); - if (_Py_INIT_FAILED(err)) { - return err; - } - - Py_FileSystemDefaultEncoding = get_codec_name(locale_encoding); - PyMem_RawFree(locale_encoding); - if (Py_FileSystemDefaultEncoding == NULL) { - return _Py_INIT_ERR("failed to get the Python codec " - "of the locale encoding"); - } + /* Update the filesystem encoding to the normalized Python codec name. + For example, replace "ANSI_X3.4-1968" (locale encoding) with "ascii" + (Python codec name). */ + PyMem_RawFree(config->filesystem_encoding); + config->filesystem_encoding = encoding; - Py_HasFileSystemDefaultEncoding = 0; - interp->fscodec_initialized = 1; - return _Py_INIT_OK(); - } + /* Set Py_FileSystemDefaultEncoding and Py_FileSystemDefaultEncodeErrors + global configuration variables. */ + if (_Py_SetFileSystemEncoding(config->filesystem_encoding, + config->filesystem_errors) < 0) { + return _Py_INIT_NO_MEMORY(); } -#endif - /* the encoding is mbcs, utf-8 or ascii */ - codec = _PyCodec_Lookup(Py_FileSystemDefaultEncoding); - if (!codec) { - /* Such error can only occurs in critical situations: no more - * memory, import a module of the standard library failed, - * etc. */ - return _Py_INIT_ERR("unable to load the file system codec"); - } - Py_DECREF(codec); + /* PyUnicode can now use the Python codec rather than C implementation + for the filesystem encoding */ interp->fscodec_initialized = 1; return _Py_INIT_OK(); } diff --git a/Python/sysmodule.c b/Python/sysmodule.c index 177b830..91df4b0 100644 --- a/Python/sysmodule.c +++ b/Python/sysmodule.c @@ -389,11 +389,9 @@ implementation." static PyObject * sys_getfilesystemencoding(PyObject *self, PyObject *Py_UNUSED(ignored)) { - if (Py_FileSystemDefaultEncoding) - return PyUnicode_FromString(Py_FileSystemDefaultEncoding); - PyErr_SetString(PyExc_RuntimeError, - "filesystem encoding is not initialized"); - return NULL; + PyInterpreterState *interp = _PyInterpreterState_GET_UNSAFE(); + const _PyCoreConfig *config = &interp->core_config; + return PyUnicode_FromString(config->filesystem_encoding); } PyDoc_STRVAR(getfilesystemencoding_doc, @@ -406,11 +404,9 @@ operating system filenames." static PyObject * sys_getfilesystemencodeerrors(PyObject *self, PyObject *Py_UNUSED(ignored)) { - if (Py_FileSystemDefaultEncodeErrors) - return PyUnicode_FromString(Py_FileSystemDefaultEncodeErrors); - PyErr_SetString(PyExc_RuntimeError, - "filesystem encoding is not initialized"); - return NULL; + PyInterpreterState *interp = _PyInterpreterState_GET_UNSAFE(); + const _PyCoreConfig *config = &interp->core_config; + return PyUnicode_FromString(config->filesystem_errors); } PyDoc_STRVAR(getfilesystemencodeerrors_doc, @@ -1150,8 +1146,30 @@ environment variable before launching Python." static PyObject * sys_enablelegacywindowsfsencoding(PyObject *self) { - Py_FileSystemDefaultEncoding = "mbcs"; - Py_FileSystemDefaultEncodeErrors = "replace"; + PyInterpreterState *interp = _PyInterpreterState_GET_UNSAFE(); + _PyCoreConfig *config = &interp->core_config; + + /* Set the filesystem encoding to mbcs/replace (PEP 529) */ + char *encoding = _PyMem_RawStrdup("mbcs"); + char *errors = _PyMem_RawStrdup("replace"); + if (encoding == NULL || errors == NULL) { + PyMem_Free(encoding); + PyMem_Free(errors); + PyErr_NoMemory(); + return NULL; + } + + PyMem_RawFree(config->filesystem_encoding); + config->filesystem_encoding = encoding; + PyMem_RawFree(config->filesystem_errors); + config->filesystem_errors = errors; + + if (_Py_SetFileSystemEncoding(config->filesystem_encoding, + config->filesystem_errors) < 0) { + PyErr_NoMemory(); + return NULL; + } + Py_RETURN_NONE; } |