summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorVictor Stinner <victor.stinner@gmail.com>2017-12-16 03:54:22 (GMT)
committerGitHub <noreply@github.com>2017-12-16 03:54:22 (GMT)
commit9454060e84a669dde63824d9e2fcaf295e34f687 (patch)
tree4c40a6e1bd11aa75819acb7efce4981fc6ba7611
parente796b2fe26f220107ac50667de6cc86c82b465e3 (diff)
downloadcpython-9454060e84a669dde63824d9e2fcaf295e34f687.zip
cpython-9454060e84a669dde63824d9e2fcaf295e34f687.tar.gz
cpython-9454060e84a669dde63824d9e2fcaf295e34f687.tar.bz2
bpo-29240, bpo-32030: Py_Main() re-reads config if encoding changes (#4899)
bpo-29240, bpo-32030: If the encoding change (C locale coerced or UTF-8 Mode changed), Py_Main() now reads again the configuration with the new encoding. Changes: * Add _Py_UnixMain() called by main(). * Rename pymain_free_pymain() to pymain_clear_pymain(), it can now be called multipled times. * Rename pymain_parse_cmdline_envvars() to pymain_read_conf(). * Py_Main() now clears orig_argc and orig_argv at exit. * Remove argv_copy2, Py_Main() doesn't modify argv anymore. There is no need anymore to get two copies of the wchar_t** argv. * _PyCoreConfig: add coerce_c_locale and coerce_c_locale_warn. * Py_UTF8Mode is now initialized to -1. * Locale coercion (PEP 538) now respects -I and -E options.
-rw-r--r--Doc/using/cmdline.rst4
-rw-r--r--Include/pylifecycle.h5
-rw-r--r--Include/pystate.h8
-rw-r--r--Lib/test/test_c_locale_coercion.py5
-rw-r--r--Lib/test/test_cmd_line.py2
-rw-r--r--Lib/test/test_utf8_mode.py30
-rw-r--r--Modules/getpath.c2
-rw-r--r--Modules/main.c300
-rw-r--r--Programs/python.c91
-rw-r--r--Python/bltinmodule.c7
-rw-r--r--Python/fileutils.c4
-rw-r--r--Python/pylifecycle.c80
12 files changed, 324 insertions, 214 deletions
diff --git a/Doc/using/cmdline.rst b/Doc/using/cmdline.rst
index 3915f33..b1bd47f 100644
--- a/Doc/using/cmdline.rst
+++ b/Doc/using/cmdline.rst
@@ -779,9 +779,7 @@ conflict.
If set to the value ``0``, causes the main Python command line application
to skip coercing the legacy ASCII-based C locale to a more capable UTF-8
- based alternative. Note that this setting is checked even when the
- :option:`-E` or :option:`-I` options are used, as it is handled prior to
- the processing of command line options.
+ based alternative.
If this variable is *not* set, or is set to a value other than ``0``, and
the current locale reported for the ``LC_CTYPE`` category is the default
diff --git a/Include/pylifecycle.h b/Include/pylifecycle.h
index dcb7fcb..3ea8ad6 100644
--- a/Include/pylifecycle.h
+++ b/Include/pylifecycle.h
@@ -105,6 +105,9 @@ PyAPI_FUNC(int) Py_FdIsInteractive(FILE *, const char *);
/* Bootstrap __main__ (defined in Modules/main.c) */
PyAPI_FUNC(int) Py_Main(int argc, wchar_t **argv);
+#ifdef Py_BUILD_CORE
+PyAPI_FUNC(int) _Py_UnixMain(int argc, char **argv);
+#endif
/* In getpath.c */
PyAPI_FUNC(wchar_t *) Py_GetProgramFullPath(void);
@@ -194,7 +197,7 @@ PyAPI_FUNC(int) _PyOS_URandomNonblock(void *buffer, Py_ssize_t size);
/* Legacy locale support */
#ifndef Py_LIMITED_API
-PyAPI_FUNC(void) _Py_CoerceLegacyLocale(void);
+PyAPI_FUNC(void) _Py_CoerceLegacyLocale(const _PyCoreConfig *config);
PyAPI_FUNC(int) _Py_LegacyLocaleDetected(void);
PyAPI_FUNC(char *) _Py_SetLocaleFromEnv(int category);
#endif
diff --git a/Include/pystate.h b/Include/pystate.h
index a56c9b4..fff134a 100644
--- a/Include/pystate.h
+++ b/Include/pystate.h
@@ -38,7 +38,10 @@ typedef struct {
int show_alloc_count; /* -X showalloccount */
int dump_refs; /* PYTHONDUMPREFS */
int malloc_stats; /* PYTHONMALLOCSTATS */
- int utf8_mode; /* -X utf8 or PYTHONUTF8 environment variable */
+ int coerce_c_locale; /* PYTHONCOERCECLOCALE, -1 means unknown */
+ int coerce_c_locale_warn; /* PYTHONCOERCECLOCALE=warn */
+ int utf8_mode; /* -X utf8 or PYTHONUTF8 environment variable,
+ -1 means unknown */
wchar_t *module_search_path_env; /* PYTHONPATH environment variable */
wchar_t *home; /* PYTHONHOME environment variable,
@@ -46,7 +49,8 @@ typedef struct {
wchar_t *program_name; /* Program name, see also Py_GetProgramName() */
} _PyCoreConfig;
-#define _PyCoreConfig_INIT (_PyCoreConfig){.use_hash_seed = -1}
+#define _PyCoreConfig_INIT \
+ (_PyCoreConfig){.use_hash_seed = -1, .coerce_c_locale = -1, .utf8_mode = -1}
/* Note: _PyCoreConfig_INIT sets other fields to 0/NULL */
/* Placeholders while working on the new configuration API
diff --git a/Lib/test/test_c_locale_coercion.py b/Lib/test/test_c_locale_coercion.py
index c0845d7..37dd834 100644
--- a/Lib/test/test_c_locale_coercion.py
+++ b/Lib/test/test_c_locale_coercion.py
@@ -65,7 +65,7 @@ def _set_locale_in_subprocess(locale_name):
# If there's no valid CODESET, we expect coercion to be skipped
cmd_fmt += "; import sys; sys.exit(not locale.nl_langinfo(locale.CODESET))"
cmd = cmd_fmt.format(locale_name)
- result, py_cmd = run_python_until_end("-c", cmd, __isolated=True)
+ result, py_cmd = run_python_until_end("-c", cmd, PYTHONCOERCECLOCALE='')
return result.rc == 0
@@ -131,7 +131,6 @@ class EncodingDetails(_EncodingDetails):
"""
result, py_cmd = run_python_until_end(
"-X", "utf8=0", "-c", cls.CHILD_PROCESS_SCRIPT,
- __isolated=True,
**env_vars
)
if not result.rc == 0:
@@ -236,6 +235,7 @@ class LocaleConfigurationTests(_LocaleHandlingTestCase):
"LANG": "",
"LC_CTYPE": "",
"LC_ALL": "",
+ "PYTHONCOERCECLOCALE": "",
}
for env_var in ("LANG", "LC_CTYPE"):
for locale_to_set in AVAILABLE_TARGETS:
@@ -294,6 +294,7 @@ class LocaleCoercionTests(_LocaleHandlingTestCase):
"LANG": "",
"LC_CTYPE": "",
"LC_ALL": "",
+ "PYTHONCOERCECLOCALE": "",
}
base_var_dict.update(extra_vars)
for env_var in ("LANG", "LC_CTYPE"):
diff --git a/Lib/test/test_cmd_line.py b/Lib/test/test_cmd_line.py
index 2b14c30..54ea377 100644
--- a/Lib/test/test_cmd_line.py
+++ b/Lib/test/test_cmd_line.py
@@ -551,7 +551,7 @@ class CmdLineTest(unittest.TestCase):
self.assertEqual(out, "True")
# Warnings
- code = ("import sys, warnings; "
+ code = ("import warnings; "
"print(' '.join('%s::%s' % (f[0], f[2].__name__) "
"for f in warnings.filters))")
if Py_DEBUG:
diff --git a/Lib/test/test_utf8_mode.py b/Lib/test/test_utf8_mode.py
index 275a6ea..73d1bd4 100644
--- a/Lib/test/test_utf8_mode.py
+++ b/Lib/test/test_utf8_mode.py
@@ -7,6 +7,7 @@ import os
import sys
import textwrap
import unittest
+from test import support
from test.support.script_helper import assert_python_ok, assert_python_failure
@@ -14,9 +15,11 @@ MS_WINDOWS = (sys.platform == 'win32')
class UTF8ModeTests(unittest.TestCase):
- # Override PYTHONUTF8 and PYTHONLEGACYWINDOWSFSENCODING environment
- # variables by default
- DEFAULT_ENV = {'PYTHONUTF8': '', 'PYTHONLEGACYWINDOWSFSENCODING': ''}
+ DEFAULT_ENV = {
+ 'PYTHONUTF8': '',
+ 'PYTHONLEGACYWINDOWSFSENCODING': '',
+ 'PYTHONCOERCECLOCALE': '0',
+ }
def posix_locale(self):
loc = locale.setlocale(locale.LC_CTYPE, None)
@@ -53,7 +56,7 @@ class UTF8ModeTests(unittest.TestCase):
self.assertEqual(out, '0')
if MS_WINDOWS:
- # PYTHONLEGACYWINDOWSFSENCODING disables the UTF-8
+ # PYTHONLEGACYWINDOWSFSENCODING disables the UTF-8 Mode
# and has the priority over -X utf8
out = self.get_output('-X', 'utf8', '-c', code,
PYTHONLEGACYWINDOWSFSENCODING='1')
@@ -201,6 +204,25 @@ class UTF8ModeTests(unittest.TestCase):
out = self.get_output('-X', 'utf8', '-c', code, LC_ALL='C')
self.assertEqual(out, 'UTF-8 UTF-8')
+ @unittest.skipIf(MS_WINDOWS, 'test specific to Unix')
+ def test_cmd_line(self):
+ arg = 'h\xe9\u20ac'.encode('utf-8')
+ arg_utf8 = arg.decode('utf-8')
+ arg_ascii = arg.decode('ascii', 'surrogateescape')
+ code = 'import locale, sys; print("%s:%s" % (locale.getpreferredencoding(), ascii(sys.argv[1:])))'
+
+ def check(utf8_opt, expected, **kw):
+ out = self.get_output('-X', utf8_opt, '-c', code, arg, **kw)
+ args = out.partition(':')[2].rstrip()
+ self.assertEqual(args, ascii(expected), out)
+
+ check('utf8', [arg_utf8])
+ if sys.platform == 'darwin' or support.is_android:
+ c_arg = arg_utf8
+ else:
+ c_arg = arg_ascii
+ check('utf8=0', [c_arg], LC_ALL='C')
+
if __name__ == "__main__":
unittest.main()
diff --git a/Modules/getpath.c b/Modules/getpath.c
index 6208a17..b4b3343 100644
--- a/Modules/getpath.c
+++ b/Modules/getpath.c
@@ -112,7 +112,7 @@ extern "C" {
#define DECODE_LOCALE_ERR(NAME, LEN) \
((LEN) == (size_t)-2) \
- ? _Py_INIT_USER_ERR("cannot decode " #NAME) \
+ ? _Py_INIT_USER_ERR("cannot decode " NAME) \
: _Py_INIT_NO_MEMORY()
typedef struct {
diff --git a/Modules/main.c b/Modules/main.c
index 6b602cf..00de7f0 100644
--- a/Modules/main.c
+++ b/Modules/main.c
@@ -38,14 +38,14 @@ extern "C" {
#define DECODE_LOCALE_ERR(NAME, LEN) \
(((LEN) == -2) \
- ? _Py_INIT_USER_ERR("cannot decode " #NAME) \
+ ? _Py_INIT_USER_ERR("cannot decode " NAME) \
: _Py_INIT_NO_MEMORY())
#define SET_DECODE_ERROR(NAME, LEN) \
do { \
if ((LEN) == (size_t)-2) { \
- pymain->err = _Py_INIT_USER_ERR("cannot decode " #NAME); \
+ pymain->err = _Py_INIT_USER_ERR("cannot decode " NAME); \
} \
else { \
pymain->err = _Py_INIT_NO_MEMORY(); \
@@ -53,8 +53,8 @@ extern "C" {
} while (0)
/* For Py_GetArgcArgv(); set by main() */
-static wchar_t **orig_argv;
-static int orig_argc;
+static wchar_t **orig_argv = NULL;
+static int orig_argc = 0;
/* command line options */
#define BASE_OPTS L"bBc:dEhiIJm:OqRsStuvVW:xX:?"
@@ -427,8 +427,11 @@ typedef struct {
_PyInitError err;
/* PYTHONWARNINGS env var */
_Py_OptList env_warning_options;
+
int argc;
wchar_t **argv;
+ int use_bytes_argv;
+ char **bytes_argv;
int sys_argc;
wchar_t **sys_argv;
@@ -466,7 +469,6 @@ pymain_free_globals(_PyMain *pymain)
{
_PyPathConfig_Clear(&_Py_path_config);
_PyImport_Fini2();
- _PyCoreConfig_Clear(&pymain->core_config);
#ifdef __INSURE__
/* Insure++ is a memory analysis tool that aids in discovering
@@ -483,22 +485,69 @@ pymain_free_globals(_PyMain *pymain)
}
+/* Clear argv allocated by pymain_decode_bytes_argv() */
+static void
+pymain_clear_bytes_argv(_PyMain *pymain, int argc)
+{
+ if (pymain->use_bytes_argv && pymain->argv != NULL) {
+ for (int i = 0; i < argc; i++) {
+ PyMem_RawFree(pymain->argv[i]);
+ }
+ PyMem_RawFree(pymain->argv);
+ pymain->argv = NULL;
+ }
+}
+
+
+static int
+pymain_decode_bytes_argv(_PyMain *pymain)
+{
+ assert(pymain->argv == NULL);
+
+ /* +1 for a the NULL terminator */
+ size_t size = sizeof(wchar_t*) * (pymain->argc + 1);
+ pymain->argv = (wchar_t **)PyMem_RawMalloc(size);
+ if (pymain->argv == NULL) {
+ pymain->err = _Py_INIT_NO_MEMORY();
+ return -1;
+ }
+
+ for (int i = 0; i < pymain->argc; i++) {
+ size_t len;
+ pymain->argv[i] = Py_DecodeLocale(pymain->bytes_argv[i], &len);
+ if (pymain->argv[i] == NULL) {
+ pymain_clear_bytes_argv(pymain, i);
+ pymain->err = DECODE_LOCALE_ERR("command line arguments",
+ (Py_ssize_t)len);
+ return -1;
+ }
+ }
+ pymain->argv[pymain->argc] = NULL;
+ return 0;
+}
+
+
static void
-pymain_free_pymain(_PyMain *pymain)
+pymain_clear_pymain(_PyMain *pymain)
{
_Py_CommandLineDetails *cmdline = &pymain->cmdline;
pymain_optlist_clear(&cmdline->warning_options);
pymain_optlist_clear(&cmdline->xoptions);
PyMem_RawFree(cmdline->command);
+ cmdline->command = NULL;
PyMem_RawFree(pymain->sys_argv);
+ pymain->sys_argv = NULL;
pymain_optlist_clear(&pymain->env_warning_options);
+ pymain_clear_bytes_argv(pymain, pymain->argc);
+
+ _PyCoreConfig_Clear(&pymain->core_config);
}
/* Clear Python ojects */
static void
-pymain_free_python(_PyMain *pymain)
+pymain_clear_python(_PyMain *pymain)
{
Py_CLEAR(pymain->main_importer_path);
@@ -509,12 +558,12 @@ pymain_free_python(_PyMain *pymain)
static void
pymain_free(_PyMain *pymain)
{
- /* Force the allocator used by pymain_parse_cmdline_envvars() */
+ /* Force the allocator used by pymain_read_conf() */
PyMemAllocatorEx old_alloc;
_PyMem_SetDefaultAllocator(PYMEM_DOMAIN_RAW, &old_alloc);
- pymain_free_python(pymain);
- pymain_free_pymain(pymain);
+ pymain_clear_python(pymain);
+ pymain_clear_pymain(pymain);
pymain_free_globals(pymain);
PyMem_SetAllocator(PYMEM_DOMAIN_RAW, &old_alloc);
@@ -743,6 +792,9 @@ pymain_parse_cmdline_impl(_PyMain *pymain)
cmdline->filename = pymain->argv[_PyOS_optind];
}
+ pymain->run_code = (cmdline->command != NULL || cmdline->filename != NULL
+ || cmdline->module != NULL);
+
/* -c and -m options are exclusive */
assert(!(cmdline->command != NULL && cmdline->module != NULL));
@@ -1434,8 +1486,6 @@ pymain_repl(_PyMain *pymain)
static int
pymain_parse_cmdline(_PyMain *pymain)
{
- _Py_CommandLineDetails *cmdline = &pymain->cmdline;
-
int res = pymain_parse_cmdline_impl(pymain);
if (res < 0) {
return -1;
@@ -1446,21 +1496,6 @@ pymain_parse_cmdline(_PyMain *pymain)
return 1;
}
- if (cmdline->print_help) {
- pymain_usage(0, pymain->argv[0]);
- pymain->status = 0;
- return 1;
- }
-
- if (cmdline->print_version) {
- printf("Python %s\n",
- (cmdline->print_version >= 2) ? Py_GetVersion() : PY_VERSION);
- return 1;
- }
-
- pymain->run_code = (cmdline->command != NULL || cmdline->filename != NULL
- || cmdline->module != NULL);
-
return 0;
}
@@ -1852,6 +1887,19 @@ pymain_parse_envvars(_PyMain *pymain)
pymain->core_config.malloc_stats = 1;
}
+ const char* env = pymain_get_env_var("PYTHONCOERCECLOCALE");
+ if (env) {
+ if (strcmp(env, "0") == 0) {
+ pymain->core_config.coerce_c_locale = 0;
+ }
+ else if (strcmp(env, "warn") == 0) {
+ pymain->core_config.coerce_c_locale_warn = 1;
+ }
+ else {
+ pymain->core_config.coerce_c_locale = 1;
+ }
+ }
+
if (pymain_init_utf8_mode(pymain) < 0) {
return -1;
}
@@ -1867,23 +1915,19 @@ pymain_parse_envvars(_PyMain *pymain)
Return 1 if Python is done and must exit.
Set pymain->err and return -1 on error. */
static int
-pymain_parse_cmdline_envvars_impl(_PyMain *pymain)
+pymain_read_conf_impl(_PyMain *pymain)
{
int res = pymain_parse_cmdline(pymain);
- if (res < 0) {
- return -1;
- }
- if (res > 0) {
- return 1;
+ if (res != 0) {
+ return res;
}
- /* Set Py_IgnoreEnvironmentFlag needed by Py_GETENV() */
- pymain_set_global_config(pymain);
+ /* Set Py_IgnoreEnvironmentFlag for Py_GETENV() */
+ Py_IgnoreEnvironmentFlag = pymain->core_config.ignore_environment;
if (pymain_parse_envvars(pymain) < 0) {
return -1;
}
- /* FIXME: if utf8_mode value changed, parse again cmdline */
if (pymain_init_sys_argv(pymain) < 0) {
return -1;
@@ -1899,14 +1943,101 @@ pymain_parse_cmdline_envvars_impl(_PyMain *pymain)
static int
-pymain_parse_cmdline_envvars(_PyMain *pymain)
+pymain_read_conf(_PyMain *pymain)
{
+ int res = -1;
+
/* Force default allocator, since pymain_free() must use the same allocator
than this function. */
PyMemAllocatorEx old_alloc;
_PyMem_SetDefaultAllocator(PYMEM_DOMAIN_RAW, &old_alloc);
- int res = pymain_parse_cmdline_envvars_impl(pymain);
+ char *oldloc = _PyMem_RawStrdup(setlocale(LC_ALL, NULL));
+ if (oldloc == NULL) {
+ pymain->err = _Py_INIT_NO_MEMORY();
+ goto done;
+ }
+
+ /* Reconfigure the locale to the default for this process */
+ _Py_SetLocaleFromEnv(LC_ALL);
+
+ int locale_coerced = 0;
+ int loops = 0;
+ int init_ignore_env = pymain->core_config.ignore_environment;
+
+ while (1) {
+ int utf8_mode = pymain->core_config.utf8_mode;
+ int encoding_changed = 0;
+
+ /* Watchdog to prevent an infinite loop */
+ loops++;
+ if (loops == 3) {
+ pymain->err = _Py_INIT_ERR("Encoding changed twice while "
+ "reading the configuration");
+ goto done;
+ }
+
+ if (pymain->use_bytes_argv) {
+ if (pymain_decode_bytes_argv(pymain) < 0) {
+ goto done;
+ }
+ }
+
+ res = pymain_read_conf_impl(pymain);
+ if (res != 0) {
+ goto done;
+ }
+
+ /* The legacy C locale assumes ASCII as the default text encoding, which
+ * causes problems not only for the CPython runtime, but also other
+ * components like GNU readline.
+ *
+ * Accordingly, when the CLI detects it, it attempts to coerce it to a
+ * more capable UTF-8 based alternative.
+ *
+ * See the documentation of the PYTHONCOERCECLOCALE setting for more
+ * details.
+ */
+ if (pymain->core_config.coerce_c_locale == 1 && !locale_coerced) {
+ locale_coerced = 1;
+ _Py_CoerceLegacyLocale(&pymain->core_config);
+ encoding_changed = 1;
+ }
+
+ if (utf8_mode == -1) {
+ if (pymain->core_config.utf8_mode == 1) {
+ /* UTF-8 Mode enabled */
+ encoding_changed = 1;
+ }
+ }
+ else {
+ if (pymain->core_config.utf8_mode != utf8_mode) {
+ encoding_changed = 1;
+ }
+ }
+
+ if (!encoding_changed) {
+ break;
+ }
+
+ /* Reset the configuration, except UTF-8 Mode. Set Py_UTF8Mode for
+ Py_DecodeLocale(). Reset Py_IgnoreEnvironmentFlag, modified by
+ pymain_read_conf_impl(). */
+ Py_UTF8Mode = pymain->core_config.utf8_mode;
+ Py_IgnoreEnvironmentFlag = init_ignore_env;
+ pymain_clear_pymain(pymain);
+ pymain_get_global_config(pymain);
+
+ /* The encoding changed: read again the configuration
+ with the new encoding */
+ }
+ res = 0;
+
+done:
+ if (oldloc != NULL) {
+ setlocale(LC_ALL, oldloc);
+ PyMem_RawFree(oldloc);
+ }
PyMem_SetAllocator(PYMEM_DOMAIN_RAW, &old_alloc);
return res;
@@ -1940,6 +2071,24 @@ _PyCoreConfig_Read(_PyCoreConfig *config)
}
}
+ if (config->utf8_mode < 0 || config->coerce_c_locale < 0) {
+ if (_Py_LegacyLocaleDetected()) {
+ if (config->utf8_mode < 0) {
+ config->utf8_mode = 1;
+ }
+ if (config->coerce_c_locale < 0) {
+ config->coerce_c_locale = 1;
+ }
+ }
+
+ if (config->coerce_c_locale < 0) {
+ config->coerce_c_locale = 0;
+ }
+ if (config->utf8_mode < 0) {
+ config->utf8_mode = 0;
+ }
+ }
+
return _Py_INIT_OK();
}
@@ -2247,17 +2396,24 @@ pymain_run_python(_PyMain *pymain)
static int
pymain_init(_PyMain *pymain)
{
+ /* 754 requires that FP exceptions run in "no stop" mode by default,
+ * and until C vendors implement C99's ways to control FP exceptions,
+ * Python requires non-stop mode. Alas, some platforms enable FP
+ * exceptions by default. Here we disable them.
+ */
+#ifdef __FreeBSD__
+ fedisableexcept(FE_OVERFLOW);
+#endif
+
pymain->err = _PyRuntime_Initialize();
if (_Py_INIT_FAILED(pymain->err)) {
return -1;
}
- pymain->core_config.utf8_mode = Py_UTF8Mode;
pymain->core_config._disable_importlib = 0;
pymain->config.install_signal_handlers = 1;
- orig_argc = pymain->argc; /* For Py_GetArgcArgv() */
- orig_argv = pymain->argv;
+ pymain_get_global_config(pymain);
return 0;
}
@@ -2265,14 +2421,13 @@ pymain_init(_PyMain *pymain)
static int
pymain_impl(_PyMain *pymain)
{
- int res = pymain_init(pymain);
- if (res < 0) {
+ if (pymain_init(pymain) < 0) {
return -1;
}
- pymain_get_global_config(pymain);
-
- res = pymain_parse_cmdline_envvars(pymain);
+ /* Read the configuration, but initialize also the LC_CTYPE locale:
+ enable UTF-8 mode (PEP 540) and/or coerce the C locale (PEP 538) */
+ int res = pymain_read_conf(pymain);
if (res < 0) {
return -1;
}
@@ -2281,6 +2436,21 @@ pymain_impl(_PyMain *pymain)
return 0;
}
+ _Py_CommandLineDetails *cmdline = &pymain->cmdline;
+ if (cmdline->print_help) {
+ pymain_usage(0, pymain->argv[0]);
+ return 0;
+ }
+
+ if (cmdline->print_version) {
+ printf("Python %s\n",
+ (cmdline->print_version >= 2) ? Py_GetVersion() : PY_VERSION);
+ return 0;
+ }
+
+ orig_argc = pymain->argc; /* For Py_GetArgcArgv() */
+ orig_argv = pymain->argv;
+
res = pymain_init_python_core(pymain);
if (res < 0) {
return -1;
@@ -2293,7 +2463,7 @@ pymain_impl(_PyMain *pymain)
pymain_run_python(pymain);
- pymain_free_python(pymain);
+ pymain_clear_python(pymain);
if (Py_FinalizeEx() < 0) {
/* Value unlikely to be confused with a non-error exit status or
@@ -2304,22 +2474,46 @@ pymain_impl(_PyMain *pymain)
}
+static int
+pymain_main(_PyMain *pymain)
+{
+ memset(&pymain->cmdline, 0, sizeof(pymain->cmdline));
+
+ if (pymain_impl(pymain) < 0) {
+ _Py_FatalInitError(pymain->err);
+ }
+ pymain_free(pymain);
+
+ orig_argc = 0;
+ orig_argv = NULL;
+
+ return pymain->status;
+}
+
+
int
Py_Main(int argc, wchar_t **argv)
{
_PyMain pymain = _PyMain_INIT;
- memset(&pymain.cmdline, 0, sizeof(pymain.cmdline));
pymain.argc = argc;
pymain.argv = argv;
- if (pymain_impl(&pymain) < 0) {
- _Py_FatalInitError(pymain.err);
- }
- pymain_free(&pymain);
+ return pymain_main(&pymain);
+}
+
+
+int
+_Py_UnixMain(int argc, char **argv)
+{
+ _PyMain pymain = _PyMain_INIT;
+ pymain.argc = argc;
+ pymain.use_bytes_argv = 1;
+ pymain.bytes_argv = argv;
- return pymain.status;
+ return pymain_main(&pymain);
}
+
/* this is gonna seem *real weird*, but if you put some other code between
Py_Main() and Py_GetArgcArgv() you will need to adjust the test in the
while statement in Misc/gdbinit:ppystack */
diff --git a/Programs/python.c b/Programs/python.c
index aef7122..a295486 100644
--- a/Programs/python.c
+++ b/Programs/python.c
@@ -17,98 +17,9 @@ wmain(int argc, wchar_t **argv)
#else
-static void _Py_NO_RETURN
-fatal_error(const char *msg)
-{
- fprintf(stderr, "Fatal Python error: %s\n", msg);
- fflush(stderr);
- exit(1);
-}
-
-
int
main(int argc, char **argv)
{
- wchar_t **argv_copy;
- /* We need a second copy, as Python might modify the first one. */
- wchar_t **argv_copy2;
- int i, status;
- char *oldloc;
-
- _PyInitError err = _PyRuntime_Initialize();
- if (_Py_INIT_FAILED(err)) {
- fatal_error(err.msg);
- }
-
- /* Force default allocator, to be able to release memory above
- with a known allocator. */
- _PyMem_SetDefaultAllocator(PYMEM_DOMAIN_RAW, NULL);
-
- argv_copy = (wchar_t **)PyMem_RawMalloc(sizeof(wchar_t*) * (argc+1));
- argv_copy2 = (wchar_t **)PyMem_RawMalloc(sizeof(wchar_t*) * (argc+1));
- if (!argv_copy || !argv_copy2) {
- fatal_error("out of memory");
- return 1;
- }
-
- /* 754 requires that FP exceptions run in "no stop" mode by default,
- * and until C vendors implement C99's ways to control FP exceptions,
- * Python requires non-stop mode. Alas, some platforms enable FP
- * exceptions by default. Here we disable them.
- */
-#ifdef __FreeBSD__
- fedisableexcept(FE_OVERFLOW);
-#endif
-
- oldloc = _PyMem_RawStrdup(setlocale(LC_ALL, NULL));
- if (!oldloc) {
- fatal_error("out of memory");
- return 1;
- }
-
- /* Reconfigure the locale to the default for this process */
- _Py_SetLocaleFromEnv(LC_ALL);
-
- /* The legacy C locale assumes ASCII as the default text encoding, which
- * causes problems not only for the CPython runtime, but also other
- * components like GNU readline.
- *
- * Accordingly, when the CLI detects it, it attempts to coerce it to a
- * more capable UTF-8 based alternative.
- *
- * See the documentation of the PYTHONCOERCECLOCALE setting for more
- * details.
- */
- if (_Py_LegacyLocaleDetected()) {
- Py_UTF8Mode = 1;
- _Py_CoerceLegacyLocale();
- }
-
- /* Convert from char to wchar_t based on the locale settings */
- for (i = 0; i < argc; i++) {
- argv_copy[i] = Py_DecodeLocale(argv[i], NULL);
- if (!argv_copy[i]) {
- PyMem_RawFree(oldloc);
- fatal_error("unable to decode the command line arguments");
- }
- argv_copy2[i] = argv_copy[i];
- }
- argv_copy2[argc] = argv_copy[argc] = NULL;
-
- setlocale(LC_ALL, oldloc);
- PyMem_RawFree(oldloc);
-
- status = Py_Main(argc, argv_copy);
-
- /* Py_Main() can change PyMem_RawMalloc() allocator, so restore the default
- to release memory blocks allocated before Py_Main() */
- _PyMem_SetDefaultAllocator(PYMEM_DOMAIN_RAW, NULL);
-
- for (i = 0; i < argc; i++) {
- PyMem_RawFree(argv_copy2[i]);
- }
- PyMem_RawFree(argv_copy);
- PyMem_RawFree(argv_copy2);
- return status;
+ return _Py_UnixMain(argc, argv);
}
#endif
diff --git a/Python/bltinmodule.c b/Python/bltinmodule.c
index 604493d..e702f7c 100644
--- a/Python/bltinmodule.c
+++ b/Python/bltinmodule.c
@@ -29,9 +29,10 @@ const char *Py_FileSystemDefaultEncoding = NULL; /* set by initfsencoding() */
int Py_HasFileSystemDefaultEncoding = 0;
#endif
const char *Py_FileSystemDefaultEncodeErrors = "surrogateescape";
-/* UTF-8 mode (PEP 540): if non-zero, use the UTF-8 encoding, and change stdin
- and stdout error handler to "surrogateescape". */
-int Py_UTF8Mode = 0;
+/* UTF-8 mode (PEP 540): if equals to 1, use the UTF-8 encoding, and change
+ stdin and stdout error handler to "surrogateescape". It is equal to
+ -1 by default: unknown, will be set by Py_Main() */
+int Py_UTF8Mode = -1;
_Py_IDENTIFIER(__builtins__);
_Py_IDENTIFIER(__dict__);
diff --git a/Python/fileutils.c b/Python/fileutils.c
index 4b69049..c4d495d 100644
--- a/Python/fileutils.c
+++ b/Python/fileutils.c
@@ -393,7 +393,7 @@ Py_DecodeLocale(const char* arg, size_t *size)
#if defined(__APPLE__) || defined(__ANDROID__)
return _Py_DecodeUTF8_surrogateescape(arg, strlen(arg), size);
#else
- if (Py_UTF8Mode) {
+ if (Py_UTF8Mode == 1) {
return _Py_DecodeUTF8_surrogateescape(arg, strlen(arg), size);
}
@@ -539,7 +539,7 @@ Py_EncodeLocale(const wchar_t *text, size_t *error_pos)
#if defined(__APPLE__) || defined(__ANDROID__)
return _Py_EncodeLocaleUTF8(text, error_pos);
#else /* __APPLE__ */
- if (Py_UTF8Mode) {
+ if (Py_UTF8Mode == 1) {
return _Py_EncodeLocaleUTF8(text, error_pos);
}
diff --git a/Python/pylifecycle.c b/Python/pylifecycle.c
index 8c62607..6500995 100644
--- a/Python/pylifecycle.c
+++ b/Python/pylifecycle.c
@@ -385,18 +385,10 @@ static const char *_C_LOCALE_WARNING =
"C.utf8, or UTF-8 (if available) as alternative Unicode-compatible "
"locales is recommended.\n";
-static int
-_legacy_locale_warnings_enabled(void)
-{
- const char *coerce_c_locale = getenv("PYTHONCOERCECLOCALE");
- return (coerce_c_locale != NULL &&
- strncmp(coerce_c_locale, "warn", 5) == 0);
-}
-
static void
-_emit_stderr_warning_for_legacy_locale(void)
+_emit_stderr_warning_for_legacy_locale(const _PyCoreConfig *core_config)
{
- if (_legacy_locale_warnings_enabled()) {
+ if (core_config->coerce_c_locale_warn) {
if (_Py_LegacyLocaleDetected()) {
fprintf(stderr, "%s", _C_LOCALE_WARNING);
}
@@ -440,12 +432,12 @@ get_default_standard_stream_error_handler(void)
}
#ifdef PY_COERCE_C_LOCALE
-static const char _C_LOCALE_COERCION_WARNING[] =
+static const char C_LOCALE_COERCION_WARNING[] =
"Python detected LC_CTYPE=C: LC_CTYPE coerced to %.20s (set another locale "
"or PYTHONCOERCECLOCALE=0 to disable this locale coercion behavior).\n";
static void
-_coerce_default_locale_settings(const _LocaleCoercionTarget *target)
+_coerce_default_locale_settings(const _PyCoreConfig *config, const _LocaleCoercionTarget *target)
{
const char *newloc = target->locale_name;
@@ -458,8 +450,8 @@ _coerce_default_locale_settings(const _LocaleCoercionTarget *target)
"Error setting LC_CTYPE, skipping C locale coercion\n");
return;
}
- if (_legacy_locale_warnings_enabled()) {
- fprintf(stderr, _C_LOCALE_COERCION_WARNING, newloc);
+ if (config->coerce_c_locale_warn) {
+ fprintf(stderr, C_LOCALE_COERCION_WARNING, newloc);
}
/* Reconfigure with the overridden environment variables */
@@ -468,47 +460,31 @@ _coerce_default_locale_settings(const _LocaleCoercionTarget *target)
#endif
void
-_Py_CoerceLegacyLocale(void)
+_Py_CoerceLegacyLocale(const _PyCoreConfig *config)
{
#ifdef PY_COERCE_C_LOCALE
- /* We ignore the Python -E and -I flags here, as the CLI needs to sort out
- * the locale settings *before* we try to do anything with the command
- * line arguments. For cross-platform debugging purposes, we also need
- * to give end users a way to force even scripts that are otherwise
- * isolated from their environment to use the legacy ASCII-centric C
- * locale.
- *
- * Ignoring -E and -I is safe from a security perspective, as we only use
- * the setting to turn *off* the implicit locale coercion, and anyone with
- * access to the process environment already has the ability to set
- * `LC_ALL=C` to override the C level locale settings anyway.
- */
- const char *coerce_c_locale = getenv("PYTHONCOERCECLOCALE");
- if (coerce_c_locale == NULL || strncmp(coerce_c_locale, "0", 2) != 0) {
- /* PYTHONCOERCECLOCALE is not set, or is set to something other than "0" */
- const char *locale_override = getenv("LC_ALL");
- if (locale_override == NULL || *locale_override == '\0') {
- /* LC_ALL is also not set (or is set to an empty string) */
- const _LocaleCoercionTarget *target = NULL;
- for (target = _TARGET_LOCALES; target->locale_name; target++) {
- const char *new_locale = setlocale(LC_CTYPE,
- target->locale_name);
- if (new_locale != NULL) {
+ const char *locale_override = getenv("LC_ALL");
+ if (locale_override == NULL || *locale_override == '\0') {
+ /* LC_ALL is also not set (or is set to an empty string) */
+ const _LocaleCoercionTarget *target = NULL;
+ for (target = _TARGET_LOCALES; target->locale_name; target++) {
+ const char *new_locale = setlocale(LC_CTYPE,
+ target->locale_name);
+ if (new_locale != NULL) {
#if !defined(__APPLE__) && !defined(__ANDROID__) && \
- defined(HAVE_LANGINFO_H) && defined(CODESET)
- /* Also ensure that nl_langinfo works in this locale */
- char *codeset = nl_langinfo(CODESET);
- if (!codeset || *codeset == '\0') {
- /* CODESET is not set or empty, so skip coercion */
- new_locale = NULL;
- _Py_SetLocaleFromEnv(LC_CTYPE);
- continue;
- }
-#endif
- /* Successfully configured locale, so make it the default */
- _coerce_default_locale_settings(target);
- return;
+defined(HAVE_LANGINFO_H) && defined(CODESET)
+ /* Also ensure that nl_langinfo works in this locale */
+ char *codeset = nl_langinfo(CODESET);
+ if (!codeset || *codeset == '\0') {
+ /* CODESET is not set or empty, so skip coercion */
+ new_locale = NULL;
+ _Py_SetLocaleFromEnv(LC_CTYPE);
+ continue;
}
+#endif
+ /* Successfully configured locale, so make it the default */
+ _coerce_default_locale_settings(config, target);
+ return;
}
}
}
@@ -648,7 +624,7 @@ _Py_InitializeCore(const _PyCoreConfig *core_config)
the locale's charset without having to switch
locales. */
_Py_SetLocaleFromEnv(LC_CTYPE);
- _emit_stderr_warning_for_legacy_locale();
+ _emit_stderr_warning_for_legacy_locale(core_config);
#endif
err = _Py_HashRandomization_Init(core_config);