summaryrefslogtreecommitdiffstats
path: root/Python
diff options
context:
space:
mode:
Diffstat (limited to 'Python')
-rw-r--r--Python/coreconfig.c354
-rw-r--r--Python/preconfig.c400
-rw-r--r--Python/pylifecycle.c2
-rw-r--r--Python/sysmodule.c2
4 files changed, 417 insertions, 341 deletions
diff --git a/Python/coreconfig.c b/Python/coreconfig.c
index a6aa89b..e372de4 100644
--- a/Python/coreconfig.c
+++ b/Python/coreconfig.c
@@ -531,10 +531,6 @@ _PyCoreConfig_Copy(_PyCoreConfig *config, const _PyCoreConfig *config2)
COPY_ATTR(dump_refs);
COPY_ATTR(malloc_stats);
- COPY_ATTR(coerce_c_locale);
- COPY_ATTR(coerce_c_locale_warn);
- COPY_ATTR(utf8_mode);
-
COPY_WSTR_ATTR(pycache_prefix);
COPY_WSTR_ATTR(module_search_path_env);
COPY_WSTR_ATTR(home);
@@ -571,7 +567,6 @@ _PyCoreConfig_Copy(_PyCoreConfig *config, const _PyCoreConfig *config2)
COPY_STR_ATTR(stdio_encoding);
COPY_STR_ATTR(stdio_errors);
#ifdef MS_WINDOWS
- COPY_ATTR(legacy_windows_fs_encoding);
COPY_ATTR(legacy_windows_stdio);
#endif
COPY_ATTR(skip_source_first_line);
@@ -592,19 +587,7 @@ _PyCoreConfig_Copy(_PyCoreConfig *config, const _PyCoreConfig *config2)
const char*
_PyCoreConfig_GetEnv(const _PyCoreConfig *config, const char *name)
{
- assert(config->preconfig.use_environment >= 0);
-
- if (!config->preconfig.use_environment) {
- return NULL;
- }
-
- const char *var = getenv(name);
- if (var && var[0] != '\0') {
- return var;
- }
- else {
- return NULL;
- }
+ return _PyPreConfig_GetEnv(&config->preconfig, name);
}
@@ -670,7 +653,6 @@ _PyCoreConfig_GetGlobalConfig(_PyCoreConfig *config)
config->ATTR = !(VALUE); \
}
- COPY_FLAG(utf8_mode, Py_UTF8Mode);
COPY_FLAG(bytes_warning, Py_BytesWarningFlag);
COPY_FLAG(inspect, Py_InspectFlag);
COPY_FLAG(interactive, Py_InteractiveFlag);
@@ -679,7 +661,6 @@ _PyCoreConfig_GetGlobalConfig(_PyCoreConfig *config)
COPY_FLAG(verbose, Py_VerboseFlag);
COPY_FLAG(quiet, Py_QuietFlag);
#ifdef MS_WINDOWS
- COPY_FLAG(legacy_windows_fs_encoding, Py_LegacyWindowsFSEncodingFlag);
COPY_FLAG(legacy_windows_stdio, Py_LegacyWindowsStdioFlag);
#endif
COPY_FLAG(_frozen, Py_FrozenFlag);
@@ -709,7 +690,6 @@ _PyCoreConfig_SetGlobalConfig(const _PyCoreConfig *config)
VAR = !config->ATTR; \
}
- COPY_FLAG(utf8_mode, Py_UTF8Mode);
COPY_FLAG(bytes_warning, Py_BytesWarningFlag);
COPY_FLAG(inspect, Py_InspectFlag);
COPY_FLAG(interactive, Py_InteractiveFlag);
@@ -718,7 +698,6 @@ _PyCoreConfig_SetGlobalConfig(const _PyCoreConfig *config)
COPY_FLAG(verbose, Py_VerboseFlag);
COPY_FLAG(quiet, Py_QuietFlag);
#ifdef MS_WINDOWS
- COPY_FLAG(legacy_windows_fs_encoding, Py_LegacyWindowsFSEncodingFlag);
COPY_FLAG(legacy_windows_stdio, Py_LegacyWindowsStdioFlag);
#endif
COPY_FLAG(_frozen, Py_FrozenFlag);
@@ -838,23 +817,7 @@ config_init_executable(_PyCoreConfig *config)
static const wchar_t*
config_get_xoption(const _PyCoreConfig *config, wchar_t *name)
{
- int nxoption = config->nxoption;
- wchar_t **xoptions = config->xoptions;
- for (int i=0; i < nxoption; i++) {
- wchar_t *option = xoptions[i];
- size_t len;
- wchar_t *sep = wcschr(option, L'=');
- if (sep != NULL) {
- len = (sep - option);
- }
- else {
- len = wcslen(option);
- }
- if (wcsncmp(option, name, len) == 0 && name[len] == L'\0') {
- return option;
- }
- }
- return NULL;
+ return _Py_get_xoption(config->nxoption, config->xoptions, name);
}
@@ -915,67 +878,6 @@ config_init_hash_seed(_PyCoreConfig *config)
}
-static _PyInitError
-config_init_utf8_mode(_PyCoreConfig *config)
-{
- const wchar_t *xopt = config_get_xoption(config, L"utf8");
- if (xopt) {
- wchar_t *sep = wcschr(xopt, L'=');
- if (sep) {
- xopt = sep + 1;
- if (wcscmp(xopt, L"1") == 0) {
- config->utf8_mode = 1;
- }
- else if (wcscmp(xopt, L"0") == 0) {
- config->utf8_mode = 0;
- }
- else {
- return _Py_INIT_USER_ERR("invalid -X utf8 option value");
- }
- }
- else {
- config->utf8_mode = 1;
- }
- return _Py_INIT_OK();
- }
-
- const char *opt = _PyCoreConfig_GetEnv(config, "PYTHONUTF8");
- if (opt) {
- if (strcmp(opt, "1") == 0) {
- config->utf8_mode = 1;
- }
- else if (strcmp(opt, "0") == 0) {
- config->utf8_mode = 0;
- }
- else {
- return _Py_INIT_USER_ERR("invalid PYTHONUTF8 environment "
- "variable value");
- }
- return _Py_INIT_OK();
- }
-
- return _Py_INIT_OK();
-}
-
-
-static int
-config_str_to_int(const char *str, int *result)
-{
- const char *endptr = str;
- errno = 0;
- long value = strtol(str, (char **)&endptr, 10);
- if (*endptr != '\0' || errno == ERANGE) {
- return -1;
- }
- if (value < INT_MIN || value > INT_MAX) {
- return -1;
- }
-
- *result = (int)value;
- return 0;
-}
-
-
static int
config_wstr_to_int(const wchar_t *wstr, int *result)
{
@@ -994,27 +896,12 @@ config_wstr_to_int(const wchar_t *wstr, int *result)
}
-static void
-get_env_flag(_PyCoreConfig *config, int *flag, const char *name)
-{
- const char *var = _PyCoreConfig_GetEnv(config, name);
- if (!var) {
- return;
- }
- int value;
- if (config_str_to_int(var, &value) < 0 || value < 0) {
- /* PYTHONDEBUG=text and PYTHONDEBUG=-2 behave as PYTHONDEBUG=1 */
- value = 1;
- }
- if (*flag < value) {
- *flag = value;
- }
-}
-
-
static _PyInitError
config_read_env_vars(_PyCoreConfig *config)
{
+#define get_env_flag(CONFIG, ATTR, NAME) \
+ _Py_get_env_flag(&(CONFIG)->preconfig, (ATTR), (NAME))
+
/* Get environment variables */
get_env_flag(config, &config->parser_debug, "PYTHONDEBUG");
get_env_flag(config, &config->verbose, "PYTHONVERBOSE");
@@ -1040,8 +927,6 @@ config_read_env_vars(_PyCoreConfig *config)
}
#ifdef MS_WINDOWS
- get_env_flag(config, &config->legacy_windows_fs_encoding,
- "PYTHONLEGACYWINDOWSFSENCODING");
get_env_flag(config, &config->legacy_windows_stdio,
"PYTHONLEGACYWINDOWSSTDIO");
#endif
@@ -1057,23 +942,6 @@ config_read_env_vars(_PyCoreConfig *config)
config->malloc_stats = 1;
}
- const char *env = _PyCoreConfig_GetEnv(config, "PYTHONCOERCECLOCALE");
- if (env) {
- if (strcmp(env, "0") == 0) {
- if (config->coerce_c_locale < 0) {
- config->coerce_c_locale = 0;
- }
- }
- else if (strcmp(env, "warn") == 0) {
- config->coerce_c_locale_warn = 1;
- }
- else {
- if (config->coerce_c_locale < 0) {
- config->coerce_c_locale = 1;
- }
- }
- }
-
wchar_t *path;
int res = _PyCoreConfig_GetEnvDup(config, &path,
L"PYTHONPATH", "PYTHONPATH");
@@ -1090,6 +958,8 @@ config_read_env_vars(_PyCoreConfig *config)
}
return _Py_INIT_OK();
+
+#undef get_env_flag
}
@@ -1101,7 +971,7 @@ config_init_tracemalloc(_PyCoreConfig *config)
const char *env = _PyCoreConfig_GetEnv(config, "PYTHONTRACEMALLOC");
if (env) {
- if (!config_str_to_int(env, &nframe)) {
+ if (!_Py_str_to_int(env, &nframe)) {
valid = (nframe >= 0);
}
else {
@@ -1213,37 +1083,6 @@ config_read_complex_options(_PyCoreConfig *config)
}
-static void
-config_init_locale(_PyCoreConfig *config)
-{
- /* Test also if coerce_c_locale equals 1: PYTHONCOERCECLOCALE=1 doesn't
- imply that the C locale is always coerced. It is only coerced if
- if the LC_CTYPE locale is "C". */
- if (config->coerce_c_locale != 0) {
- /* The C locale enables the C locale coercion (PEP 538) */
- if (_Py_LegacyLocaleDetected()) {
- config->coerce_c_locale = 1;
- }
- else {
- config->coerce_c_locale = 0;
- }
- }
-
-#ifndef MS_WINDOWS
- if (config->utf8_mode < 0) {
- /* The C locale and the POSIX locale enable the UTF-8 Mode (PEP 540) */
- const char *ctype_loc = setlocale(LC_CTYPE, NULL);
- if (ctype_loc != NULL
- && (strcmp(ctype_loc, "C") == 0
- || strcmp(ctype_loc, "POSIX") == 0))
- {
- config->utf8_mode = 1;
- }
- }
-#endif
-}
-
-
static const char *
get_stdio_errors(const _PyCoreConfig *config)
{
@@ -1365,7 +1204,7 @@ config_init_stdio_encoding(_PyCoreConfig *config)
}
/* UTF-8 Mode uses UTF-8/surrogateescape */
- if (config->utf8_mode) {
+ if (config->preconfig.utf8_mode) {
if (config->stdio_encoding == NULL) {
config->stdio_encoding = _PyMem_RawStrdup("utf-8");
if (config->stdio_encoding == NULL) {
@@ -1403,7 +1242,7 @@ static _PyInitError
config_init_fs_encoding(_PyCoreConfig *config)
{
#ifdef MS_WINDOWS
- if (config->legacy_windows_fs_encoding) {
+ if (config->preconfig.legacy_windows_fs_encoding) {
/* Legacy Windows filesystem encoding: mbcs/replace */
if (config->filesystem_encoding == NULL) {
config->filesystem_encoding = _PyMem_RawStrdup("mbcs");
@@ -1438,7 +1277,7 @@ config_init_fs_encoding(_PyCoreConfig *config)
}
#else
if (config->filesystem_encoding == NULL) {
- if (config->utf8_mode) {
+ if (config->preconfig.utf8_mode) {
/* UTF-8 Mode use: utf-8/surrogateescape */
config->filesystem_encoding = _PyMem_RawStrdup("utf-8");
/* errors defaults to surrogateescape above */
@@ -1539,12 +1378,6 @@ _PyCoreConfig_Read(_PyCoreConfig *config, const _PyPreConfig *preconfig)
config->user_site_directory = 0;
}
-#ifdef MS_WINDOWS
- if (config->legacy_windows_fs_encoding) {
- config->utf8_mode = 0;
- }
-#endif
-
if (config->preconfig.use_environment) {
err = config_read_env_vars(config);
if (_Py_INIT_FAILED(err)) {
@@ -1565,13 +1398,6 @@ _PyCoreConfig_Read(_PyCoreConfig *config, const _PyPreConfig *preconfig)
return err;
}
- if (config->utf8_mode < 0) {
- err = config_init_utf8_mode(config);
- if (_Py_INIT_FAILED(err)) {
- return err;
- }
- }
-
if (config->home == NULL) {
err = config_init_home(config);
if (_Py_INIT_FAILED(err)) {
@@ -1593,10 +1419,6 @@ _PyCoreConfig_Read(_PyCoreConfig *config, const _PyPreConfig *preconfig)
}
}
- if (config->coerce_c_locale != 0 || config->utf8_mode < 0) {
- config_init_locale(config);
- }
-
if (config->_install_importlib) {
err = _PyCoreConfig_InitPathConfig(config);
if (_Py_INIT_FAILED(err)) {
@@ -1623,12 +1445,6 @@ _PyCoreConfig_Read(_PyCoreConfig *config, const _PyPreConfig *preconfig)
if (config->tracemalloc < 0) {
config->tracemalloc = 0;
}
- if (config->coerce_c_locale < 0) {
- config->coerce_c_locale = 0;
- }
- if (config->utf8_mode < 0) {
- config->utf8_mode = 0;
- }
if (config->argc < 0) {
config->argc = 0;
}
@@ -1645,7 +1461,6 @@ _PyCoreConfig_Read(_PyCoreConfig *config, const _PyPreConfig *preconfig)
return err;
}
- assert(config->coerce_c_locale >= 0);
assert(config->preconfig.use_environment >= 0);
assert(config->filesystem_encoding != NULL);
assert(config->filesystem_errors != NULL);
@@ -1703,9 +1518,6 @@ config_init_stdio(const _PyCoreConfig *config)
void
_PyCoreConfig_Write(const _PyCoreConfig *config)
{
- if (config->coerce_c_locale) {
- _Py_CoerceLegacyLocale(config->coerce_c_locale_warn);
- }
_PyCoreConfig_SetGlobalConfig(config);
config_init_stdio(config);
}
@@ -1769,11 +1581,8 @@ _PyCoreConfig_AsDict(const _PyCoreConfig *config)
SET_ITEM_INT(show_alloc_count);
SET_ITEM_INT(dump_refs);
SET_ITEM_INT(malloc_stats);
- SET_ITEM_INT(coerce_c_locale);
- SET_ITEM_INT(coerce_c_locale_warn);
SET_ITEM_STR(filesystem_encoding);
SET_ITEM_STR(filesystem_errors);
- SET_ITEM_INT(utf8_mode);
SET_ITEM_WSTR(pycache_prefix);
SET_ITEM_WSTR(program_name);
SET_ITEM_WSTRLIST(argc, argv);
@@ -1805,7 +1614,6 @@ _PyCoreConfig_AsDict(const _PyCoreConfig *config)
SET_ITEM_STR(stdio_encoding);
SET_ITEM_STR(stdio_errors);
#ifdef MS_WINDOWS
- SET_ITEM_INT(legacy_windows_fs_encoding);
SET_ITEM_INT(legacy_windows_stdio);
#endif
SET_ITEM_INT(skip_source_first_line);
@@ -2318,33 +2126,6 @@ config_from_cmdline(_PyCoreConfig *config, _PyCmdline *cmdline,
}
-static _PyInitError
-config_read_from_argv_impl(_PyCoreConfig *config, const _PyArgv *args,
- const _PyPreConfig *preconfig)
-{
- _PyInitError err;
-
- _PyCmdline cmdline;
- memset(&cmdline, 0, sizeof(cmdline));
- cmdline.args = args;
-
- err = _PyArgv_Decode(cmdline.args, &cmdline.argv);
- if (_Py_INIT_FAILED(err)) {
- goto done;
- }
-
- err = config_from_cmdline(config, &cmdline, preconfig);
- if (_Py_INIT_FAILED(err)) {
- goto done;
- }
- err = _Py_INIT_OK();
-
-done:
- cmdline_clear(&cmdline);
- return err;
-}
-
-
/* Read the configuration into _PyCoreConfig and initialize the LC_CTYPE
locale: enable UTF-8 mode (PEP 540) and/or coerce the C locale (PEP 538).
@@ -2358,118 +2139,23 @@ _PyCoreConfig_ReadFromArgv(_PyCoreConfig *config, const _PyArgv *args,
const _PyPreConfig *preconfig)
{
_PyInitError err;
- int init_utf8_mode = Py_UTF8Mode;
-#ifdef MS_WINDOWS
- int init_legacy_encoding = Py_LegacyWindowsFSEncodingFlag;
-#endif
- _PyCoreConfig save_config = _PyCoreConfig_INIT;
- int locale_coerced = 0;
- int loops = 0;
- char *init_ctype_locale = NULL;
- /* copy LC_CTYPE locale */
- const char *loc = setlocale(LC_CTYPE, NULL);
- if (loc == NULL) {
- err = _Py_INIT_ERR("failed to LC_CTYPE locale");
- goto done;
- }
- init_ctype_locale = _PyMem_RawStrdup(loc);
- if (init_ctype_locale == NULL) {
- err = _Py_INIT_NO_MEMORY();
- goto done;
- }
+ _PyCmdline cmdline;
+ memset(&cmdline, 0, sizeof(cmdline));
+ cmdline.args = args;
- if (_PyCoreConfig_Copy(&save_config, config) < 0) {
- err = _Py_INIT_NO_MEMORY();
+ err = _PyArgv_Decode(cmdline.args, &cmdline.argv);
+ if (_Py_INIT_FAILED(err)) {
goto done;
}
- /* Set LC_CTYPE to the user preferred locale */
- _Py_SetLocaleFromEnv(LC_CTYPE);
-
- while (1) {
- int utf8_mode = config->utf8_mode;
- int encoding_changed = 0;
-
- /* Watchdog to prevent an infinite loop */
- loops++;
- if (loops == 3) {
- err = _Py_INIT_ERR("Encoding changed twice while "
- "reading the configuration");
- goto done;
- }
-
- /* bpo-34207: Py_DecodeLocale() and Py_EncodeLocale() depend
- on Py_UTF8Mode and Py_LegacyWindowsFSEncodingFlag. */
- Py_UTF8Mode = config->utf8_mode;
-#ifdef MS_WINDOWS
- Py_LegacyWindowsFSEncodingFlag = config->legacy_windows_fs_encoding;
-#endif
-
- err = config_read_from_argv_impl(config, args, preconfig);
- if (_Py_INIT_FAILED(err)) {
- goto done;
- }
- if (locale_coerced) {
- config->coerce_c_locale = 1;
- }
-
- /* The legacy C locale assumes ASCII as the default text encoding, which
- * causes problems not only for the CPython runtime, but also other
- * components like GNU readline.
- *
- * Accordingly, when the CLI detects it, it attempts to coerce it to a
- * more capable UTF-8 based alternative.
- *
- * See the documentation of the PYTHONCOERCECLOCALE setting for more
- * details.
- */
- if (config->coerce_c_locale && !locale_coerced) {
- locale_coerced = 1;
- _Py_CoerceLegacyLocale(0);
- encoding_changed = 1;
- }
-
- if (utf8_mode == -1) {
- if (config->utf8_mode == 1) {
- /* UTF-8 Mode enabled */
- encoding_changed = 1;
- }
- }
- else {
- if (config->utf8_mode != utf8_mode) {
- encoding_changed = 1;
- }
- }
-
- if (!encoding_changed) {
- break;
- }
-
- /* Reset the configuration before reading again the configuration,
- just keep UTF-8 Mode value. */
- int new_utf8_mode = config->utf8_mode;
- int new_coerce_c_locale = config->coerce_c_locale;
- if (_PyCoreConfig_Copy(config, &save_config) < 0) {
- err = _Py_INIT_NO_MEMORY();
- goto done;
- }
- config->utf8_mode = new_utf8_mode;
- config->coerce_c_locale = new_coerce_c_locale;
-
- /* The encoding changed: read again the configuration
- with the new encoding */
+ err = config_from_cmdline(config, &cmdline, preconfig);
+ if (_Py_INIT_FAILED(err)) {
+ goto done;
}
err = _Py_INIT_OK();
done:
- if (init_ctype_locale != NULL) {
- setlocale(LC_CTYPE, init_ctype_locale);
- }
- _PyCoreConfig_Clear(&save_config);
- Py_UTF8Mode = init_utf8_mode ;
-#ifdef MS_WINDOWS
- Py_LegacyWindowsFSEncodingFlag = init_legacy_encoding;
-#endif
+ cmdline_clear(&cmdline);
return err;
}
diff --git a/Python/preconfig.c b/Python/preconfig.c
index af70f38..3befecf 100644
--- a/Python/preconfig.c
+++ b/Python/preconfig.c
@@ -1,6 +1,8 @@
#include "Python.h"
#include "pycore_coreconfig.h"
#include "pycore_getopt.h"
+#include "pycore_pystate.h" /* _PyRuntime_Initialize() */
+#include <locale.h> /* setlocale() */
#define DECODE_LOCALE_ERR(NAME, LEN) \
@@ -99,6 +101,8 @@ typedef struct {
const _PyArgv *args;
int argc;
wchar_t **argv;
+ int nxoption; /* Number of -X options */
+ wchar_t **xoptions; /* -X options */
} _PyPreCmdline;
@@ -109,6 +113,10 @@ precmdline_clear(_PyPreCmdline *cmdline)
_Py_wstrlist_clear(cmdline->args->argc, cmdline->argv);
}
cmdline->argv = NULL;
+
+ _Py_wstrlist_clear(cmdline->nxoption, cmdline->xoptions);
+ cmdline->nxoption = 0;
+ cmdline->xoptions = NULL;
}
@@ -129,6 +137,12 @@ _PyPreConfig_Copy(_PyPreConfig *config, const _PyPreConfig *config2)
COPY_ATTR(isolated);
COPY_ATTR(use_environment);
+ COPY_ATTR(coerce_c_locale);
+ COPY_ATTR(coerce_c_locale_warn);
+#ifdef MS_WINDOWS
+ COPY_ATTR(legacy_windows_fs_encoding);
+#endif
+ COPY_ATTR(utf8_mode);
#undef COPY_ATTR
return 0;
@@ -149,6 +163,10 @@ _PyPreConfig_GetGlobalConfig(_PyPreConfig *config)
COPY_FLAG(isolated, Py_IsolatedFlag);
COPY_NOT_FLAG(use_environment, Py_IgnoreEnvironmentFlag);
+#ifdef MS_WINDOWS
+ COPY_FLAG(legacy_windows_fs_encoding, Py_LegacyWindowsFSEncodingFlag);
+#endif
+ COPY_FLAG(utf8_mode, Py_UTF8Mode);
#undef COPY_FLAG
#undef COPY_NOT_FLAG
@@ -169,14 +187,161 @@ _PyPreConfig_SetGlobalConfig(const _PyPreConfig *config)
COPY_FLAG(isolated, Py_IsolatedFlag);
COPY_NOT_FLAG(use_environment, Py_IgnoreEnvironmentFlag);
+#ifdef MS_WINDOWS
+ COPY_FLAG(legacy_windows_fs_encoding, Py_LegacyWindowsFSEncodingFlag);
+#endif
+ COPY_FLAG(utf8_mode, Py_UTF8Mode);
#undef COPY_FLAG
#undef COPY_NOT_FLAG
}
-_PyInitError
-_PyPreConfig_Read(_PyPreConfig *config)
+const char*
+_PyPreConfig_GetEnv(const _PyPreConfig *config, const char *name)
+{
+ assert(config->use_environment >= 0);
+
+ if (!config->use_environment) {
+ return NULL;
+ }
+
+ const char *var = getenv(name);
+ if (var && var[0] != '\0') {
+ return var;
+ }
+ else {
+ return NULL;
+ }
+}
+
+
+int
+_Py_str_to_int(const char *str, int *result)
+{
+ const char *endptr = str;
+ errno = 0;
+ long value = strtol(str, (char **)&endptr, 10);
+ if (*endptr != '\0' || errno == ERANGE) {
+ return -1;
+ }
+ if (value < INT_MIN || value > INT_MAX) {
+ return -1;
+ }
+
+ *result = (int)value;
+ return 0;
+}
+
+
+void
+_Py_get_env_flag(_PyPreConfig *config, int *flag, const char *name)
+{
+ const char *var = _PyPreConfig_GetEnv(config, name);
+ if (!var) {
+ return;
+ }
+ int value;
+ if (_Py_str_to_int(var, &value) < 0 || value < 0) {
+ /* PYTHONDEBUG=text and PYTHONDEBUG=-2 behave as PYTHONDEBUG=1 */
+ value = 1;
+ }
+ if (*flag < value) {
+ *flag = value;
+ }
+}
+
+
+const wchar_t*
+_Py_get_xoption(int nxoption, wchar_t * const *xoptions, const wchar_t *name)
+{
+ for (int i=0; i < nxoption; i++) {
+ const wchar_t *option = xoptions[i];
+ size_t len;
+ wchar_t *sep = wcschr(option, L'=');
+ if (sep != NULL) {
+ len = (sep - option);
+ }
+ else {
+ len = wcslen(option);
+ }
+ if (wcsncmp(option, name, len) == 0 && name[len] == L'\0') {
+ return option;
+ }
+ }
+ return NULL;
+}
+
+
+static _PyInitError
+preconfig_init_utf8_mode(_PyPreConfig *config, const _PyPreCmdline *cmdline)
+{
+ const wchar_t *xopt;
+ if (cmdline) {
+ xopt = _Py_get_xoption(cmdline->nxoption, cmdline->xoptions, L"utf8");
+ }
+ else {
+ xopt = NULL;
+ }
+ if (xopt) {
+ wchar_t *sep = wcschr(xopt, L'=');
+ if (sep) {
+ xopt = sep + 1;
+ if (wcscmp(xopt, L"1") == 0) {
+ config->utf8_mode = 1;
+ }
+ else if (wcscmp(xopt, L"0") == 0) {
+ config->utf8_mode = 0;
+ }
+ else {
+ return _Py_INIT_USER_ERR("invalid -X utf8 option value");
+ }
+ }
+ else {
+ config->utf8_mode = 1;
+ }
+ return _Py_INIT_OK();
+ }
+
+ const char *opt = _PyPreConfig_GetEnv(config, "PYTHONUTF8");
+ if (opt) {
+ if (strcmp(opt, "1") == 0) {
+ config->utf8_mode = 1;
+ }
+ else if (strcmp(opt, "0") == 0) {
+ config->utf8_mode = 0;
+ }
+ else {
+ return _Py_INIT_USER_ERR("invalid PYTHONUTF8 environment "
+ "variable value");
+ }
+ return _Py_INIT_OK();
+ }
+
+ return _Py_INIT_OK();
+}
+
+
+static void
+preconfig_init_locale(_PyPreConfig *config)
+{
+ /* Test also if coerce_c_locale equals 1: PYTHONCOERCECLOCALE=1 doesn't
+ imply that the C locale is always coerced. It is only coerced if
+ if the LC_CTYPE locale is "C". */
+ if (config->coerce_c_locale != 0) {
+ /* The C locale enables the C locale coercion (PEP 538) */
+ if (_Py_LegacyLocaleDetected()) {
+ config->coerce_c_locale = 1;
+ }
+ else {
+ config->coerce_c_locale = 0;
+ }
+ }
+}
+
+
+static _PyInitError
+preconfig_read(_PyPreConfig *config, const _PyPreCmdline *cmdline)
{
_PyPreConfig_GetGlobalConfig(config);
@@ -189,6 +354,69 @@ _PyPreConfig_Read(_PyPreConfig *config)
config->use_environment = 0;
}
+ if (config->use_environment) {
+#ifdef MS_WINDOWS
+ _Py_get_env_flag(config, &config->legacy_windows_fs_encoding,
+ "PYTHONLEGACYWINDOWSFSENCODING");
+#endif
+
+ const char *env = _PyPreConfig_GetEnv(config, "PYTHONCOERCECLOCALE");
+ if (env) {
+ if (strcmp(env, "0") == 0) {
+ if (config->coerce_c_locale < 0) {
+ config->coerce_c_locale = 0;
+ }
+ }
+ else if (strcmp(env, "warn") == 0) {
+ config->coerce_c_locale_warn = 1;
+ }
+ else {
+ if (config->coerce_c_locale < 0) {
+ config->coerce_c_locale = 1;
+ }
+ }
+ }
+ }
+
+#ifdef MS_WINDOWS
+ if (config->legacy_windows_fs_encoding) {
+ config->utf8_mode = 0;
+ }
+#endif
+
+ if (config->utf8_mode < 0) {
+ _PyInitError err = preconfig_init_utf8_mode(config, cmdline);
+ if (_Py_INIT_FAILED(err)) {
+ return err;
+ }
+ }
+
+ if (config->coerce_c_locale != 0) {
+ preconfig_init_locale(config);
+ }
+
+#ifndef MS_WINDOWS
+ if (config->utf8_mode < 0) {
+ /* The C locale and the POSIX locale enable the UTF-8 Mode (PEP 540) */
+ const char *ctype_loc = setlocale(LC_CTYPE, NULL);
+ if (ctype_loc != NULL
+ && (strcmp(ctype_loc, "C") == 0
+ || strcmp(ctype_loc, "POSIX") == 0))
+ {
+ config->utf8_mode = 1;
+ }
+ }
+#endif
+
+ if (config->coerce_c_locale < 0) {
+ config->coerce_c_locale = 0;
+ }
+ if (config->utf8_mode < 0) {
+ config->utf8_mode = 0;
+ }
+
+ assert(config->coerce_c_locale >= 0);
+ assert(config->utf8_mode >= 0);
assert(config->isolated >= 0);
assert(config->use_environment >= 0);
@@ -196,6 +424,13 @@ _PyPreConfig_Read(_PyPreConfig *config)
}
+_PyInitError
+_PyPreConfig_Read(_PyPreConfig *config)
+{
+ return preconfig_read(config, NULL);
+}
+
+
int
_PyPreConfig_AsDict(const _PyPreConfig *config, PyObject *dict)
{
@@ -216,6 +451,12 @@ _PyPreConfig_AsDict(const _PyPreConfig *config, PyObject *dict)
SET_ITEM_INT(isolated);
SET_ITEM_INT(use_environment);
+ SET_ITEM_INT(coerce_c_locale);
+ SET_ITEM_INT(coerce_c_locale_warn);
+ SET_ITEM_INT(utf8_mode);
+#ifdef MS_WINDOWS
+ SET_ITEM_INT(legacy_windows_fs_encoding);
+#endif
return 0;
fail:
@@ -251,6 +492,18 @@ preconfig_parse_cmdline(_PyPreConfig *config, _PyPreCmdline *cmdline)
config->isolated++;
break;
+ case 'X':
+ {
+ _PyInitError err;
+ err = _Py_wstrlist_append(&cmdline->nxoption,
+ &cmdline->xoptions,
+ _PyOS_optarg);
+ if (_Py_INIT_FAILED(err)) {
+ return err;
+ }
+ break;
+ }
+
default:
/* ignore other argument:
handled by _PyCoreConfig_ReadFromArgv() */
@@ -262,8 +515,8 @@ preconfig_parse_cmdline(_PyPreConfig *config, _PyPreCmdline *cmdline)
}
-_PyInitError
-_PyPreConfig_ReadFromArgv(_PyPreConfig *config, const _PyArgv *args)
+static _PyInitError
+preconfig_from_argv(_PyPreConfig *config, const _PyArgv *args)
{
_PyInitError err;
@@ -281,7 +534,7 @@ _PyPreConfig_ReadFromArgv(_PyPreConfig *config, const _PyArgv *args)
goto done;
}
- err = _PyPreConfig_Read(config);
+ err = preconfig_read(config, &cmdline);
if (_Py_INIT_FAILED(err)) {
goto done;
}
@@ -293,7 +546,144 @@ done:
}
+/* Read the preconfiguration. */
+_PyInitError
+_PyPreConfig_ReadFromArgv(_PyPreConfig *config, const _PyArgv *args)
+{
+ _PyInitError err;
+
+ err = _PyRuntime_Initialize();
+ if (_Py_INIT_FAILED(err)) {
+ return err;
+ }
+
+ char *init_ctype_locale = NULL;
+ int init_utf8_mode = Py_UTF8Mode;
+#ifdef MS_WINDOWS
+ int init_legacy_encoding = Py_LegacyWindowsFSEncodingFlag;
+#endif
+ _PyPreConfig save_config = _PyPreConfig_INIT;
+ int locale_coerced = 0;
+ int loops = 0;
+
+ /* copy LC_CTYPE locale */
+ const char *loc = setlocale(LC_CTYPE, NULL);
+ if (loc == NULL) {
+ err = _Py_INIT_ERR("failed to LC_CTYPE locale");
+ goto done;
+ }
+ init_ctype_locale = _PyMem_RawStrdup(loc);
+ if (init_ctype_locale == NULL) {
+ err = _Py_INIT_NO_MEMORY();
+ goto done;
+ }
+
+ if (_PyPreConfig_Copy(&save_config, config) < 0) {
+ err = _Py_INIT_NO_MEMORY();
+ goto done;
+ }
+
+ /* Set LC_CTYPE to the user preferred locale */
+ _Py_SetLocaleFromEnv(LC_CTYPE);
+
+ while (1) {
+ int utf8_mode = config->utf8_mode;
+
+ /* Watchdog to prevent an infinite loop */
+ loops++;
+ if (loops == 3) {
+ err = _Py_INIT_ERR("Encoding changed twice while "
+ "reading the configuration");
+ goto done;
+ }
+
+ /* bpo-34207: Py_DecodeLocale() and Py_EncodeLocale() depend
+ on Py_UTF8Mode and Py_LegacyWindowsFSEncodingFlag. */
+ Py_UTF8Mode = config->utf8_mode;
+#ifdef MS_WINDOWS
+ Py_LegacyWindowsFSEncodingFlag = config->legacy_windows_fs_encoding;
+#endif
+
+ err = preconfig_from_argv(config, args);
+ if (_Py_INIT_FAILED(err)) {
+ goto done;
+ }
+
+ if (locale_coerced) {
+ config->coerce_c_locale = 1;
+ }
+
+ /* The legacy C locale assumes ASCII as the default text encoding, which
+ * causes problems not only for the CPython runtime, but also other
+ * components like GNU readline.
+ *
+ * Accordingly, when the CLI detects it, it attempts to coerce it to a
+ * more capable UTF-8 based alternative.
+ *
+ * See the documentation of the PYTHONCOERCECLOCALE setting for more
+ * details.
+ */
+ int encoding_changed = 0;
+ if (config->coerce_c_locale && !locale_coerced) {
+ locale_coerced = 1;
+ _Py_CoerceLegacyLocale(0);
+ encoding_changed = 1;
+ }
+
+ if (utf8_mode == -1) {
+ if (config->utf8_mode == 1) {
+ /* UTF-8 Mode enabled */
+ encoding_changed = 1;
+ }
+ }
+ else {
+ if (config->utf8_mode != utf8_mode) {
+ encoding_changed = 1;
+ }
+ }
+
+ if (!encoding_changed) {
+ break;
+ }
+
+ /* Reset the configuration before reading again the configuration,
+ just keep UTF-8 Mode value. */
+ int new_utf8_mode = config->utf8_mode;
+ int new_coerce_c_locale = config->coerce_c_locale;
+ if (_PyPreConfig_Copy(config, &save_config) < 0) {
+ err = _Py_INIT_NO_MEMORY();
+ goto done;
+ }
+ config->utf8_mode = new_utf8_mode;
+ config->coerce_c_locale = new_coerce_c_locale;
+
+ /* The encoding changed: read again the configuration
+ with the new encoding */
+ }
+ err = _Py_INIT_OK();
+
+done:
+ if (init_ctype_locale != NULL) {
+ setlocale(LC_CTYPE, init_ctype_locale);
+ }
+ _PyPreConfig_Clear(&save_config);
+ Py_UTF8Mode = init_utf8_mode ;
+#ifdef MS_WINDOWS
+ Py_LegacyWindowsFSEncodingFlag = init_legacy_encoding;
+#endif
+ return err;
+}
+
+
void
_PyPreConfig_Write(const _PyPreConfig *config)
{
+ _PyPreConfig_SetGlobalConfig(config);
+
+ if (config->coerce_c_locale) {
+ _Py_CoerceLegacyLocale(config->coerce_c_locale_warn);
+ }
+
+ /* Set LC_CTYPE to the user preferred locale */
+ _Py_SetLocaleFromEnv(LC_CTYPE);
}
diff --git a/Python/pylifecycle.c b/Python/pylifecycle.c
index 7cf4a6d..dec8904 100644
--- a/Python/pylifecycle.c
+++ b/Python/pylifecycle.c
@@ -287,7 +287,7 @@ static const char *_C_LOCALE_WARNING =
static void
_emit_stderr_warning_for_legacy_locale(const _PyCoreConfig *core_config)
{
- if (core_config->coerce_c_locale_warn && _Py_LegacyLocaleDetected()) {
+ if (core_config->preconfig.coerce_c_locale_warn && _Py_LegacyLocaleDetected()) {
PySys_FormatStderr("%s", _C_LOCALE_WARNING);
}
}
diff --git a/Python/sysmodule.c b/Python/sysmodule.c
index 4b12280..50ba1a7 100644
--- a/Python/sysmodule.c
+++ b/Python/sysmodule.c
@@ -2181,7 +2181,7 @@ make_flags(void)
SetFlag(config->use_hash_seed == 0 || config->hash_seed != 0);
SetFlag(config->preconfig.isolated);
PyStructSequence_SET_ITEM(seq, pos++, PyBool_FromLong(config->dev_mode));
- SetFlag(config->utf8_mode);
+ SetFlag(config->preconfig.utf8_mode);
#undef SetFlag
if (PyErr_Occurred()) {