diff options
author | Victor Stinner <vstinner@redhat.com> | 2019-05-02 18:56:30 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2019-05-02 18:56:30 (GMT) |
commit | 709d23dee69e700b87d5a4cb59e149d0e1af7993 (patch) | |
tree | b06aafe79f83137a3c85649bcebf1fbfd2ea1240 /Python | |
parent | 6ae2bbbdfcb8969d1d362b17c2fbd5a684fa4f9d (diff) | |
download | cpython-709d23dee69e700b87d5a4cb59e149d0e1af7993.zip cpython-709d23dee69e700b87d5a4cb59e149d0e1af7993.tar.gz cpython-709d23dee69e700b87d5a4cb59e149d0e1af7993.tar.bz2 |
bpo-36775: _PyCoreConfig only uses wchar_t* (GH-13062)
_PyCoreConfig: Change filesystem_encoding, filesystem_errors,
stdio_encoding and stdio_errors fields type from char* to wchar_t*.
Changes:
* PyInterpreterState: replace fscodec_initialized (int) with fs_codec
structure.
* Add get_error_handler_wide() and unicode_encode_utf8() helper
functions.
* Add error_handler parameter to unicode_encode_locale()
and unicode_decode_locale().
* Remove _PyCoreConfig_SetString().
* Rename _PyCoreConfig_SetWideString() to _PyCoreConfig_SetString().
* Rename _PyCoreConfig_SetWideStringFromString()
to _PyCoreConfig_DecodeLocale().
Diffstat (limited to 'Python')
-rw-r--r-- | Python/coreconfig.c | 169 | ||||
-rw-r--r-- | Python/preconfig.c | 5 | ||||
-rw-r--r-- | Python/pylifecycle.c | 25 | ||||
-rw-r--r-- | Python/sysmodule.c | 27 |
4 files changed, 93 insertions, 133 deletions
diff --git a/Python/coreconfig.c b/Python/coreconfig.c index c40c1f8..15643be 100644 --- a/Python/coreconfig.c +++ b/Python/coreconfig.c @@ -523,27 +523,7 @@ _PyCoreConfig_Clear(_PyCoreConfig *config) /* Copy str into *config_str (duplicate the string) */ _PyInitError -_PyCoreConfig_SetString(char **config_str, const char *str) -{ - char *str2; - if (str != NULL) { - str2 = _PyMem_RawStrdup(str); - if (str2 == NULL) { - return _Py_INIT_NO_MEMORY(); - } - } - else { - str2 = NULL; - } - PyMem_RawFree(*config_str); - *config_str = str2; - return _Py_INIT_OK(); -} - - -/* Copy str into *config_str (duplicate the string) */ -_PyInitError -_PyCoreConfig_SetWideString(wchar_t **config_str, const wchar_t *str) +_PyCoreConfig_SetString(wchar_t **config_str, const wchar_t *str) { wchar_t *str2; if (str != NULL) { @@ -563,8 +543,8 @@ _PyCoreConfig_SetWideString(wchar_t **config_str, const wchar_t *str) /* Decode str using Py_DecodeLocale() and set the result into *config_str */ static _PyInitError -_PyCoreConfig_SetWideStringFromStringErr(wchar_t **config_str, const char *str, - const char *decode_err_msg) +_PyCoreConfig_DecodeLocaleErr(wchar_t **config_str, const char *str, + const char *decode_err_msg) { wchar_t *str2; if (str != NULL) { @@ -588,19 +568,17 @@ _PyCoreConfig_SetWideStringFromStringErr(wchar_t **config_str, const char *str, } +#define CONFIG_DECODE_LOCALE(config_str, str, NAME) \ + _PyCoreConfig_DecodeLocaleErr(config_str, str, "cannot decode " NAME) + + _PyInitError -_PyCoreConfig_SetWideStringFromString(wchar_t **config_str, const char *str) +_PyCoreConfig_DecodeLocale(wchar_t **config_str, const char *str) { - return _PyCoreConfig_SetWideStringFromStringErr( - config_str, str, "cannot decode string"); + return CONFIG_DECODE_LOCALE(config_str, str, "string"); } -#define CONFIG_DECODE_LOCALE(config_str, str, NAME) \ - _PyCoreConfig_SetWideStringFromStringErr(config_str, str, \ - "cannot decode " NAME) - - _PyInitError _PyCoreConfig_Copy(_PyCoreConfig *config, const _PyCoreConfig *config2) { @@ -608,16 +586,9 @@ _PyCoreConfig_Copy(_PyCoreConfig *config, const _PyCoreConfig *config2) _PyCoreConfig_Clear(config); #define COPY_ATTR(ATTR) config->ATTR = config2->ATTR -#define COPY_STR_ATTR(ATTR) \ - do { \ - err = _PyCoreConfig_SetString(&config->ATTR, config2->ATTR); \ - if (_Py_INIT_FAILED(err)) { \ - return err; \ - } \ - } while (0) #define COPY_WSTR_ATTR(ATTR) \ do { \ - err = _PyCoreConfig_SetWideString(&config->ATTR, config2->ATTR); \ + err = _PyCoreConfig_SetString(&config->ATTR, config2->ATTR); \ if (_Py_INIT_FAILED(err)) { \ return err; \ } \ @@ -676,10 +647,10 @@ _PyCoreConfig_Copy(_PyCoreConfig *config, const _PyCoreConfig *config2) COPY_ATTR(quiet); COPY_ATTR(user_site_directory); COPY_ATTR(buffered_stdio); - COPY_STR_ATTR(filesystem_encoding); - COPY_STR_ATTR(filesystem_errors); - COPY_STR_ATTR(stdio_encoding); - COPY_STR_ATTR(stdio_errors); + COPY_WSTR_ATTR(filesystem_encoding); + COPY_WSTR_ATTR(filesystem_errors); + COPY_WSTR_ATTR(stdio_encoding); + COPY_WSTR_ATTR(stdio_errors); #ifdef MS_WINDOWS COPY_ATTR(legacy_windows_stdio); #endif @@ -692,7 +663,6 @@ _PyCoreConfig_Copy(_PyCoreConfig *config, const _PyCoreConfig *config2) COPY_ATTR(_init_main); #undef COPY_ATTR -#undef COPY_STR_ATTR #undef COPY_WSTR_ATTR #undef COPY_WSTRLIST return _Py_INIT_OK(); @@ -721,16 +691,10 @@ _PyCoreConfig_AsDict(const _PyCoreConfig *config) goto fail; \ } \ } while (0) -#define FROM_STRING(STR) \ - ((STR != NULL) ? \ - PyUnicode_FromString(STR) \ - : (Py_INCREF(Py_None), Py_None)) #define SET_ITEM_INT(ATTR) \ SET_ITEM(#ATTR, PyLong_FromLong(config->ATTR)) #define SET_ITEM_UINT(ATTR) \ SET_ITEM(#ATTR, PyLong_FromUnsignedLong(config->ATTR)) -#define SET_ITEM_STR(ATTR) \ - SET_ITEM(#ATTR, FROM_STRING(config->ATTR)) #define FROM_WSTRING(STR) \ ((STR != NULL) ? \ PyUnicode_FromWideChar(STR, -1) \ @@ -753,8 +717,8 @@ _PyCoreConfig_AsDict(const _PyCoreConfig *config) SET_ITEM_INT(show_alloc_count); SET_ITEM_INT(dump_refs); SET_ITEM_INT(malloc_stats); - SET_ITEM_STR(filesystem_encoding); - SET_ITEM_STR(filesystem_errors); + SET_ITEM_WSTR(filesystem_encoding); + SET_ITEM_WSTR(filesystem_errors); SET_ITEM_WSTR(pycache_prefix); SET_ITEM_WSTR(program_name); SET_ITEM_WSTRLIST(argv); @@ -783,8 +747,8 @@ _PyCoreConfig_AsDict(const _PyCoreConfig *config) SET_ITEM_INT(quiet); SET_ITEM_INT(user_site_directory); SET_ITEM_INT(buffered_stdio); - SET_ITEM_STR(stdio_encoding); - SET_ITEM_STR(stdio_errors); + SET_ITEM_WSTR(stdio_encoding); + SET_ITEM_WSTR(stdio_errors); #ifdef MS_WINDOWS SET_ITEM_INT(legacy_windows_stdio); #endif @@ -803,12 +767,10 @@ fail: Py_DECREF(dict); return NULL; -#undef FROM_STRING #undef FROM_WSTRING #undef SET_ITEM #undef SET_ITEM_INT #undef SET_ITEM_UINT -#undef SET_ITEM_STR #undef SET_ITEM_WSTR #undef SET_ITEM_WSTRLIST } @@ -845,7 +807,7 @@ _PyCoreConfig_GetEnvDup(const _PyCoreConfig *config, return _Py_INIT_OK(); } - return _PyCoreConfig_SetWideString(dest, var); + return _PyCoreConfig_SetString(dest, var); #else const char *var = getenv(name); if (!var || var[0] == '\0') { @@ -853,7 +815,7 @@ _PyCoreConfig_GetEnvDup(const _PyCoreConfig *config, return _Py_INIT_OK(); } - return _PyCoreConfig_SetWideStringFromStringErr(dest, var, decode_err_msg); + return _PyCoreConfig_DecodeLocaleErr(dest, var, decode_err_msg); #endif } @@ -996,8 +958,7 @@ config_init_program_name(_PyCoreConfig *config) /* Use argv[0] by default, if available */ if (config->program != NULL) { - err = _PyCoreConfig_SetWideString(&config->program_name, - config->program); + err = _PyCoreConfig_SetString(&config->program_name, config->program); if (_Py_INIT_FAILED(err)) { return err; } @@ -1010,7 +971,7 @@ config_init_program_name(_PyCoreConfig *config) #else const wchar_t *default_program_name = L"python3"; #endif - err = _PyCoreConfig_SetWideString(&config->program_name, default_program_name); + err = _PyCoreConfig_SetString(&config->program_name, default_program_name); if (_Py_INIT_FAILED(err)) { return err; } @@ -1025,8 +986,8 @@ config_init_executable(_PyCoreConfig *config) /* If Py_SetProgramFullPath() was called, use its value */ const wchar_t *program_full_path = _Py_path_config.program_full_path; if (program_full_path != NULL) { - _PyInitError err = _PyCoreConfig_SetWideString(&config->executable, - program_full_path); + _PyInitError err = _PyCoreConfig_SetString(&config->executable, + program_full_path); if (_Py_INIT_FAILED(err)) { return err; } @@ -1051,7 +1012,7 @@ config_init_home(_PyCoreConfig *config) /* If Py_SetPythonHome() was called, use its value */ wchar_t *home = _Py_path_config.home; if (home) { - _PyInitError err = _PyCoreConfig_SetWideString(&config->home, home); + _PyInitError err = _PyCoreConfig_SetString(&config->home, home); if (_Py_INIT_FAILED(err)) { return err; } @@ -1280,7 +1241,7 @@ config_read_complex_options(_PyCoreConfig *config) } -static const char * +static const wchar_t * config_get_stdio_errors(const _PyCoreConfig *config) { #ifndef MS_WINDOWS @@ -1288,43 +1249,44 @@ config_get_stdio_errors(const _PyCoreConfig *config) if (loc != NULL) { /* surrogateescape is the default in the legacy C and POSIX locales */ if (strcmp(loc, "C") == 0 || strcmp(loc, "POSIX") == 0) { - return "surrogateescape"; + return L"surrogateescape"; } #ifdef PY_COERCE_C_LOCALE /* surrogateescape is the default in locale coercion target locales */ if (_Py_IsLocaleCoercionTarget(loc)) { - return "surrogateescape"; + return L"surrogateescape"; } #endif } - return "strict"; + return L"strict"; #else /* On Windows, always use surrogateescape by default */ - return "surrogateescape"; + return L"surrogateescape"; #endif } static _PyInitError -config_get_locale_encoding(char **locale_encoding) +config_get_locale_encoding(wchar_t **locale_encoding) { #ifdef MS_WINDOWS char encoding[20]; PyOS_snprintf(encoding, sizeof(encoding), "cp%u", GetACP()); + return _PyCoreConfig_DecodeLocale(locale_encoding, encoding); #elif defined(_Py_FORCE_UTF8_LOCALE) - const char *encoding = "UTF-8"; + return _PyCoreConfig_SetString(locale_encoding, L"utf-8"); #else const char *encoding = nl_langinfo(CODESET); if (!encoding || encoding[0] == '\0') { return _Py_INIT_ERR("failed to get the locale encoding: " "nl_langinfo(CODESET) failed"); } + /* nl_langinfo(CODESET) is decoded by Py_DecodeLocale() */ + return CONFIG_DECODE_LOCALE(locale_encoding, encoding, + "nl_langinfo(CODESET)"); #endif - - assert(*locale_encoding == NULL); - return _PyCoreConfig_SetString(locale_encoding, encoding); } @@ -1337,16 +1299,18 @@ config_init_stdio_encoding(_PyCoreConfig *config, /* If Py_SetStandardStreamEncoding() have been called, use these parameters. */ if (config->stdio_encoding == NULL && _Py_StandardStreamEncoding != NULL) { - err = _PyCoreConfig_SetString(&config->stdio_encoding, - _Py_StandardStreamEncoding); + err = CONFIG_DECODE_LOCALE(&config->stdio_encoding, + _Py_StandardStreamEncoding, + "_Py_StandardStreamEncoding"); if (_Py_INIT_FAILED(err)) { return err; } } if (config->stdio_errors == NULL && _Py_StandardStreamErrors != NULL) { - err = _PyCoreConfig_SetString(&config->stdio_errors, - _Py_StandardStreamErrors); + err = CONFIG_DECODE_LOCALE(&config->stdio_errors, + _Py_StandardStreamErrors, + "_Py_StandardStreamErrors"); if (_Py_INIT_FAILED(err)) { return err; } @@ -1359,11 +1323,9 @@ config_init_stdio_encoding(_PyCoreConfig *config, /* PYTHONIOENCODING environment variable */ const char *opt = _PyCoreConfig_GetEnv(config, "PYTHONIOENCODING"); if (opt) { - /* _PyCoreConfig_SetString() requires dest to be initialized to NULL */ - char *pythonioencoding = NULL; - err = _PyCoreConfig_SetString(&pythonioencoding, opt); - if (_Py_INIT_FAILED(err)) { - return err; + char *pythonioencoding = _PyMem_RawStrdup(opt); + if (pythonioencoding == NULL) { + return _Py_INIT_NO_MEMORY(); } char *errors = strchr(pythonioencoding, ':'); @@ -1378,8 +1340,9 @@ config_init_stdio_encoding(_PyCoreConfig *config, /* Does PYTHONIOENCODING contain an encoding? */ if (pythonioencoding[0]) { if (config->stdio_encoding == NULL) { - err = _PyCoreConfig_SetString(&config->stdio_encoding, - pythonioencoding); + err = CONFIG_DECODE_LOCALE(&config->stdio_encoding, + pythonioencoding, + "PYTHONIOENCODING environment variable"); if (_Py_INIT_FAILED(err)) { PyMem_RawFree(pythonioencoding); return err; @@ -1396,7 +1359,9 @@ config_init_stdio_encoding(_PyCoreConfig *config, } if (config->stdio_errors == NULL && errors != NULL) { - err = _PyCoreConfig_SetString(&config->stdio_errors, errors); + err = CONFIG_DECODE_LOCALE(&config->stdio_errors, + errors, + "PYTHONIOENCODING environment variable"); if (_Py_INIT_FAILED(err)) { PyMem_RawFree(pythonioencoding); return err; @@ -1409,15 +1374,14 @@ config_init_stdio_encoding(_PyCoreConfig *config, /* UTF-8 Mode uses UTF-8/surrogateescape */ if (preconfig->utf8_mode) { if (config->stdio_encoding == NULL) { - err = _PyCoreConfig_SetString(&config->stdio_encoding, - "utf-8"); + err = _PyCoreConfig_SetString(&config->stdio_encoding, L"utf-8"); if (_Py_INIT_FAILED(err)) { return err; } } if (config->stdio_errors == NULL) { err = _PyCoreConfig_SetString(&config->stdio_errors, - "surrogateescape"); + L"surrogateescape"); if (_Py_INIT_FAILED(err)) { return err; } @@ -1432,7 +1396,7 @@ config_init_stdio_encoding(_PyCoreConfig *config, } } if (config->stdio_errors == NULL) { - const char *errors = config_get_stdio_errors(config); + const wchar_t *errors = config_get_stdio_errors(config); assert(errors != NULL); err = _PyCoreConfig_SetString(&config->stdio_errors, errors); @@ -1452,33 +1416,32 @@ config_init_fs_encoding(_PyCoreConfig *config, const _PyPreConfig *preconfig) if (config->filesystem_encoding == NULL) { #ifdef _Py_FORCE_UTF8_FS_ENCODING - err = _PyCoreConfig_SetString(&config->filesystem_encoding, - "utf-8"); + err = _PyCoreConfig_SetString(&config->filesystem_encoding, L"utf-8"); #else #ifdef MS_WINDOWS if (preconfig->legacy_windows_fs_encoding) { /* Legacy Windows filesystem encoding: mbcs/replace */ err = _PyCoreConfig_SetString(&config->filesystem_encoding, - "mbcs"); + L"mbcs"); } else #endif if (preconfig->utf8_mode) { err = _PyCoreConfig_SetString(&config->filesystem_encoding, - "utf-8"); + L"utf-8"); } #ifndef MS_WINDOWS else if (_Py_GetForceASCII()) { err = _PyCoreConfig_SetString(&config->filesystem_encoding, - "ascii"); + L"ascii"); } #endif else { #ifdef MS_WINDOWS /* Windows defaults to utf-8/surrogatepass (PEP 529). */ err = _PyCoreConfig_SetString(&config->filesystem_encoding, - "utf-8"); + L"utf-8"); #else err = config_get_locale_encoding(&config->filesystem_encoding); #endif @@ -1491,16 +1454,16 @@ config_init_fs_encoding(_PyCoreConfig *config, const _PyPreConfig *preconfig) } if (config->filesystem_errors == NULL) { - const char *errors; + const wchar_t *errors; #ifdef MS_WINDOWS if (preconfig->legacy_windows_fs_encoding) { - errors = "replace"; + errors = L"replace"; } else { - errors = "surrogatepass"; + errors = L"surrogatepass"; } #else - errors = "surrogateescape"; + errors = L"surrogateescape"; #endif err = _PyCoreConfig_SetString(&config->filesystem_errors, errors); if (_Py_INIT_FAILED(err)) { @@ -1745,8 +1708,8 @@ config_parse_cmdline(_PyCoreConfig *config, _PyPreCmdline *precmdline, || wcscmp(_PyOS_optarg, L"never") == 0 || wcscmp(_PyOS_optarg, L"default") == 0) { - err = _PyCoreConfig_SetWideString(&config->check_hash_pycs_mode, - _PyOS_optarg); + err = _PyCoreConfig_SetString(&config->check_hash_pycs_mode, + _PyOS_optarg); if (_Py_INIT_FAILED(err)) { return err; } @@ -2119,7 +2082,7 @@ config_read_cmdline(_PyCoreConfig *config, _PyPreCmdline *precmdline) } if (config->check_hash_pycs_mode == NULL) { - err = _PyCoreConfig_SetWideString(&config->check_hash_pycs_mode, L"default"); + err = _PyCoreConfig_SetString(&config->check_hash_pycs_mode, L"default"); if (_Py_INIT_FAILED(err)) { goto done; } diff --git a/Python/preconfig.c b/Python/preconfig.c index 108cbc6..48b9e83 100644 --- a/Python/preconfig.c +++ b/Python/preconfig.c @@ -14,7 +14,10 @@ /* --- File system encoding/errors -------------------------------- */ /* The filesystem encoding is chosen by config_init_fs_encoding(), - see also initfsencoding(). */ + see also initfsencoding(). + + Py_FileSystemDefaultEncoding and Py_FileSystemDefaultEncodeErrors + are encoded to UTF-8. */ const char *Py_FileSystemDefaultEncoding = NULL; int Py_HasFileSystemDefaultEncoding = 0; const char *Py_FileSystemDefaultEncodeErrors = NULL; diff --git a/Python/pylifecycle.c b/Python/pylifecycle.c index 01ef027..2a633cf 100644 --- a/Python/pylifecycle.c +++ b/Python/pylifecycle.c @@ -1668,7 +1668,7 @@ is_valid_fd(int fd) static PyObject* create_stdio(const _PyCoreConfig *config, PyObject* io, int fd, int write_mode, const char* name, - const char* encoding, const char* errors) + const wchar_t* encoding, const wchar_t* errors) { PyObject *buf = NULL, *stream = NULL, *text = NULL, *raw = NULL, *res; const char* mode; @@ -1718,7 +1718,7 @@ create_stdio(const _PyCoreConfig *config, PyObject* io, #ifdef MS_WINDOWS /* Windows console IO is always UTF-8 encoded */ if (PyWindowsConsoleIO_Check(raw)) - encoding = "utf-8"; + encoding = L"utf-8"; #endif text = PyUnicode_FromString(name); @@ -1754,10 +1754,25 @@ create_stdio(const _PyCoreConfig *config, PyObject* io, newline = "\n"; #endif - stream = _PyObject_CallMethodId(io, &PyId_TextIOWrapper, "OsssOO", - buf, encoding, errors, + PyObject *encoding_str = PyUnicode_FromWideChar(encoding, -1); + if (encoding_str == NULL) { + Py_CLEAR(buf); + goto error; + } + + PyObject *errors_str = PyUnicode_FromWideChar(errors, -1); + if (errors_str == NULL) { + Py_CLEAR(buf); + Py_CLEAR(encoding_str); + goto error; + } + + stream = _PyObject_CallMethodId(io, &PyId_TextIOWrapper, "OOOsOO", + buf, encoding_str, errors_str, newline, line_buffering, write_through); Py_CLEAR(buf); + Py_CLEAR(encoding_str); + Py_CLEAR(errors_str); if (stream == NULL) goto error; @@ -1874,7 +1889,7 @@ init_sys_streams(PyInterpreterState *interp) fd = fileno(stderr); std = create_stdio(config, iomod, fd, 1, "<stderr>", config->stdio_encoding, - "backslashreplace"); + L"backslashreplace"); if (std == NULL) goto error; diff --git a/Python/sysmodule.c b/Python/sysmodule.c index 0f7af2c..fbdeb9b 100644 --- a/Python/sysmodule.c +++ b/Python/sysmodule.c @@ -424,7 +424,7 @@ sys_getfilesystemencoding_impl(PyObject *module) { PyInterpreterState *interp = _PyInterpreterState_GET_UNSAFE(); const _PyCoreConfig *config = &interp->core_config; - return PyUnicode_FromString(config->filesystem_encoding); + return PyUnicode_FromWideChar(config->filesystem_encoding, -1); } /*[clinic input] @@ -439,7 +439,7 @@ sys_getfilesystemencodeerrors_impl(PyObject *module) { PyInterpreterState *interp = _PyInterpreterState_GET_UNSAFE(); const _PyCoreConfig *config = &interp->core_config; - return PyUnicode_FromString(config->filesystem_errors); + return PyUnicode_FromWideChar(config->filesystem_errors, -1); } /*[clinic input] @@ -1211,30 +1211,9 @@ static PyObject * sys__enablelegacywindowsfsencoding_impl(PyObject *module) /*[clinic end generated code: output=f5c3855b45e24fe9 input=2bfa931a20704492]*/ { - PyInterpreterState *interp = _PyInterpreterState_GET_UNSAFE(); - _PyCoreConfig *config = &interp->core_config; - - /* Set the filesystem encoding to mbcs/replace (PEP 529) */ - char *encoding = _PyMem_RawStrdup("mbcs"); - char *errors = _PyMem_RawStrdup("replace"); - if (encoding == NULL || errors == NULL) { - PyMem_Free(encoding); - PyMem_Free(errors); - PyErr_NoMemory(); - return NULL; - } - - PyMem_RawFree(config->filesystem_encoding); - config->filesystem_encoding = encoding; - PyMem_RawFree(config->filesystem_errors); - config->filesystem_errors = errors; - - if (_Py_SetFileSystemEncoding(config->filesystem_encoding, - config->filesystem_errors) < 0) { - PyErr_NoMemory(); + if (_PyUnicode_EnableLegacyWindowsFSEncoding() < 0) { return NULL; } - Py_RETURN_NONE; } |