From 4b9aad49992a825d8c76e428ed1aca81dd3878b2 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Mon, 2 Nov 2020 16:49:54 +0100 Subject: bpo-42236: Enhance init and encoding documentation (GH-23109) Enhance the documentation of the Python startup, filesystem encoding and error handling, locale encoding. Add a new "Python UTF-8 Mode" section. * Add "locale encoding" and "filesystem encoding and error handler" to the glossary * Remove documentation from Include/cpython/initconfig.h: move it to Doc/c-api/init_config.rst. * Doc/c-api/init_config.rst: * Document command line options and environment variables * Document default values. * Add a new "Python UTF-8 Mode" section in Doc/library/os.rst. * Add warnings to Py_DecodeLocale() and Py_EncodeLocale() docs. * Document how Python selects the filesystem encoding and error handler at a single place: PyConfig.filesystem_encoding and PyConfig.filesystem_errors. * PyConfig: move orig_argv member at the right place. --- Doc/c-api/exceptions.rst | 8 +- Doc/c-api/init.rst | 5 +- Doc/c-api/init_config.rst | 589 ++++++++++++++++++++++++++++++++++--------- Doc/c-api/sys.rst | 70 ++--- Doc/c-api/unicode.rst | 11 +- Doc/c-api/veryhigh.rst | 21 +- Doc/glossary.rst | 31 +++ Doc/howto/unicode.rst | 10 +- Doc/library/devmode.rst | 3 + Doc/library/exceptions.rst | 4 +- Doc/library/locale.rst | 26 +- Doc/library/os.rst | 90 ++++++- Doc/library/sys.rst | 42 +-- Doc/using/cmdline.rst | 58 +---- Doc/using/windows.rst | 26 +- Doc/whatsnew/3.7.rst | 3 +- Include/cpython/initconfig.h | 253 +++---------------- Python/initconfig.c | 1 + Python/preconfig.c | 10 +- 19 files changed, 738 insertions(+), 523 deletions(-) diff --git a/Doc/c-api/exceptions.rst b/Doc/c-api/exceptions.rst index 247b6d6..4e99a01 100644 --- a/Doc/c-api/exceptions.rst +++ b/Doc/c-api/exceptions.rst @@ -182,8 +182,8 @@ For convenience, some of these functions will always return a .. c:function:: PyObject* PyErr_SetFromErrnoWithFilename(PyObject *type, const char *filename) Similar to :c:func:`PyErr_SetFromErrnoWithFilenameObject`, but the filename - is given as a C string. *filename* is decoded from the filesystem encoding - (:func:`os.fsdecode`). + is given as a C string. *filename* is decoded from the :term:`filesystem + encoding and error handler`. .. c:function:: PyObject* PyErr_SetFromWindowsErr(int ierr) @@ -266,7 +266,7 @@ For convenience, some of these functions will always return a .. c:function:: void PyErr_SyntaxLocationEx(const char *filename, int lineno, int col_offset) Like :c:func:`PyErr_SyntaxLocationObject`, but *filename* is a byte string - decoded from the filesystem encoding (:func:`os.fsdecode`). + decoded from the :term:`filesystem encoding and error handler`. .. versionadded:: 3.2 @@ -343,7 +343,7 @@ an error value). Similar to :c:func:`PyErr_WarnExplicitObject` except that *message* and *module* are UTF-8 encoded strings, and *filename* is decoded from the - filesystem encoding (:func:`os.fsdecode`). + :term:`filesystem encoding and error handler`. .. c:function:: int PyErr_WarnFormat(PyObject *category, Py_ssize_t stack_level, const char *format, ...) diff --git a/Doc/c-api/init.rst b/Doc/c-api/init.rst index 7f06648..3ce6892 100644 --- a/Doc/c-api/init.rst +++ b/Doc/c-api/init.rst @@ -151,8 +151,9 @@ to 1 and ``-bb`` sets :c:data:`Py_BytesWarningFlag` to 2. .. c:var:: int Py_LegacyWindowsFSEncodingFlag - If the flag is non-zero, use the ``mbcs`` encoding instead of the UTF-8 - encoding for the filesystem encoding. + If the flag is non-zero, use the ``mbcs`` encoding with ``replace`` error + handler, instead of the UTF-8 encoding with ``surrogatepass`` error handler, + for the :term:`filesystem encoding and error handler`. Set to ``1`` if the :envvar:`PYTHONLEGACYWINDOWSFSENCODING` environment variable is set to a non-empty string. diff --git a/Doc/c-api/init_config.rst b/Doc/c-api/init_config.rst index 92a6c3a..dad1f90 100644 --- a/Doc/c-api/init_config.rst +++ b/Doc/c-api/init_config.rst @@ -189,11 +189,7 @@ PyPreConfig .. c:type:: PyPreConfig - Structure used to preinitialize Python: - - * Set the Python memory allocator - * Configure the LC_CTYPE locale - * Set the UTF-8 mode + Structure used to preinitialize Python. Function to initialize a preconfiguration: @@ -211,7 +207,7 @@ PyPreConfig .. c:member:: int allocator - Name of the memory allocator: + Name of the Python memory allocators: * ``PYMEM_ALLOCATOR_NOT_SET`` (``0``): don't change memory allocators (use defaults) @@ -231,27 +227,48 @@ PyPreConfig See :ref:`Memory Management `. + Default: ``PYMEM_ALLOCATOR_NOT_SET``. + .. c:member:: int configure_locale - Set the LC_CTYPE locale to the user preferred locale? If equals to 0, set - :c:member:`coerce_c_locale` and :c:member:`coerce_c_locale_warn` to 0. + Set the LC_CTYPE locale to the user preferred locale? + + If equals to 0, set :c:member:`~PyPreConfig.coerce_c_locale` and + :c:member:`~PyPreConfig.coerce_c_locale_warn` members to 0. + + See the :term:`locale encoding`. + + Default: ``1`` in Python config, ``0`` in isolated config. .. c:member:: int coerce_c_locale - If equals to 2, coerce the C locale; if equals to 1, read the LC_CTYPE - locale to decide if it should be coerced. + If equals to 2, coerce the C locale. + + If equals to 1, read the LC_CTYPE locale to decide if it should be + coerced. + + See the :term:`locale encoding`. + + Default: ``-1`` in Python config, ``0`` in isolated config. .. c:member:: int coerce_c_locale_warn If non-zero, emit a warning if the C locale is coerced. + Default: ``-1`` in Python config, ``0`` in isolated config. + .. c:member:: int dev_mode - See :c:member:`PyConfig.dev_mode`. + If non-zero, enables the :ref:`Python Development Mode `: + see :c:member:`PyConfig.dev_mode`. + + Default: ``-1`` in Python mode, ``0`` in isolated mode. .. c:member:: int isolated - See :c:member:`PyConfig.isolated`. + Isolated mode: see :c:member:`PyConfig.isolated`. + + Default: ``0`` in Python mode, ``1`` in isolated mode. .. c:member:: int legacy_windows_fs_encoding @@ -267,6 +284,8 @@ PyPreConfig Only available on Windows. ``#ifdef MS_WINDOWS`` macro can be used for Windows specific code. + Default: ``0``. + .. c:member:: int parse_argv If non-zero, :c:func:`Py_PreInitializeFromArgs` and @@ -274,16 +293,36 @@ PyPreConfig same way the regular Python parses command line arguments: see :ref:`Command Line Arguments `. + Default: ``1`` in Python config, ``0`` in isolated config. + .. c:member:: int use_environment - See :c:member:`PyConfig.use_environment`. + Use :ref:`environment variables `? See + :c:member:`PyConfig.use_environment`. + + Default: ``1`` in Python config and ``0`` in isolated config. .. c:member:: int utf8_mode - If non-zero, enable the UTF-8 mode. + If non-zero, enable the :ref:`Python UTF-8 Mode `. + + Set by the :option:`-X utf8 <-X>` command line option and the + :envvar:`PYTHONUTF8` environment variable. + + Default: ``-1`` in Python config and ``0`` in isolated config. + + +.. _c-preinit: + +Preinitialize Python with PyPreConfig +------------------------------------- -Preinitialization with PyPreConfig ----------------------------------- +The preinitialization of Python: + +* Set the Python memory allocators (:c:member:`PyPreConfig.allocator`) +* Configure the LC_CTYPE locale (:term:`locale encoding`) +* Set the :ref:`Python UTF-8 Mode ` + (:c:member:`PyPreConfig.utf8_mode`) Functions to preinitialize Python: @@ -293,13 +332,17 @@ Functions to preinitialize Python: .. c:function:: PyStatus Py_PreInitializeFromBytesArgs(const PyPreConfig *preconfig, int argc, char * const *argv) - Preinitialize Python from *preconfig* preconfiguration and command line - arguments (bytes strings). + Preinitialize Python from *preconfig* preconfiguration. + + Parse *argv* command line arguments (bytes strings) if + :c:member:`~PyPreConfig.parse_argv` of *preconfig* is non-zero. .. c:function:: PyStatus Py_PreInitializeFromArgs(const PyPreConfig *preconfig, int argc, wchar_t * const * argv) - Preinitialize Python from *preconfig* preconfiguration and command line - arguments (wide strings). + Preinitialize Python from *preconfig* preconfiguration. + + Parse *argv* command line arguments (wide strings) if + :c:member:`~PyPreConfig.parse_argv` of *preconfig* is non-zero. The caller is responsible to handle exceptions (error or exit) using :c:func:`PyStatus_Exception` and :c:func:`Py_ExitStatusException`. @@ -309,7 +352,7 @@ For :ref:`Python Configuration ` command line arguments, the command line arguments must also be passed to preinitialize Python, since they have an effect on the pre-configuration like encodings. For example, the :option:`-X utf8 <-X>` command line option -enables the UTF-8 Mode. +enables the :ref:`Python UTF-8 Mode `. ``PyMem_SetAllocator()`` can be called after :c:func:`Py_PreInitialize` and before :c:func:`Py_InitializeFromConfig` to install a custom memory allocator. @@ -317,11 +360,12 @@ It can be called before :c:func:`Py_PreInitialize` if :c:member:`PyPreConfig.allocator` is set to ``PYMEM_ALLOCATOR_NOT_SET``. Python memory allocation functions like :c:func:`PyMem_RawMalloc` must not be -used before Python preinitialization, whereas calling directly ``malloc()`` and -``free()`` is always safe. :c:func:`Py_DecodeLocale` must not be called before -the preinitialization. +used before the Python preinitialization, whereas calling directly ``malloc()`` +and ``free()`` is always safe. :c:func:`Py_DecodeLocale` must not be called +before the Python preinitialization. -Example using the preinitialization to enable the UTF-8 Mode:: +Example using the preinitialization to enable +the :ref:`Python UTF-8 Mode `:: PyStatus status; PyPreConfig preconfig; @@ -334,7 +378,7 @@ Example using the preinitialization to enable the UTF-8 Mode:: Py_ExitStatusException(status); } - /* at this point, Python will speak UTF-8 */ + /* at this point, Python speaks UTF-8 */ Py_Initialize(); /* ... use Python API here ... */ @@ -348,47 +392,54 @@ PyConfig Structure containing most parameters to configure Python. + When done, the :c:func:`PyConfig_Clear` function must be used to release the + configuration memory. + Structure methods: .. c:function:: void PyConfig_InitPythonConfig(PyConfig *config) - Initialize configuration with :ref:`Python Configuration + Initialize configuration with the :ref:`Python Configuration `. .. c:function:: void PyConfig_InitIsolatedConfig(PyConfig *config) - Initialize configuration with :ref:`Isolated Configuration + Initialize configuration with the :ref:`Isolated Configuration `. .. c:function:: PyStatus PyConfig_SetString(PyConfig *config, wchar_t * const *config_str, const wchar_t *str) Copy the wide character string *str* into ``*config_str``. - Preinitialize Python if needed. + :ref:`Preinitialize Python ` if needed. .. c:function:: PyStatus PyConfig_SetBytesString(PyConfig *config, wchar_t * const *config_str, const char *str) - Decode *str* using ``Py_DecodeLocale()`` and set the result into ``*config_str``. + Decode *str* using :c:func:`Py_DecodeLocale` and set the result into + ``*config_str``. - Preinitialize Python if needed. + :ref:`Preinitialize Python ` if needed. .. c:function:: PyStatus PyConfig_SetArgv(PyConfig *config, int argc, wchar_t * const *argv) - Set command line arguments from wide character strings. + Set command line arguments (:c:member:`~PyConfig.argv` member of + *config*) from the *argv* list of wide character strings. - Preinitialize Python if needed. + :ref:`Preinitialize Python ` if needed. .. c:function:: PyStatus PyConfig_SetBytesArgv(PyConfig *config, int argc, char * const *argv) - Set command line arguments: decode bytes using :c:func:`Py_DecodeLocale`. + Set command line arguments (:c:member:`~PyConfig.argv` member of + *config*) from the *argv* list of bytes strings. Decode bytes using + :c:func:`Py_DecodeLocale`. - Preinitialize Python if needed. + :ref:`Preinitialize Python ` if needed. .. c:function:: PyStatus PyConfig_SetWideStringList(PyConfig *config, PyWideStringList *list, Py_ssize_t length, wchar_t **items) Set the list of wide strings *list* to *length* and *items*. - Preinitialize Python if needed. + :ref:`Preinitialize Python ` if needed. .. c:function:: PyStatus PyConfig_Read(PyConfig *config) @@ -396,24 +447,25 @@ PyConfig Fields which are already initialized are left unchanged. - Preinitialize Python if needed. + :ref:`Preinitialize Python ` if needed. .. c:function:: void PyConfig_Clear(PyConfig *config) Release configuration memory. - Most ``PyConfig`` methods preinitialize Python if needed. In that case, the - Python preinitialization configuration in based on the :c:type:`PyConfig`. - If configuration fields which are in common with :c:type:`PyPreConfig` are - tuned, they must be set before calling a :c:type:`PyConfig` method: + Most ``PyConfig`` methods :ref:`preinitialize Python ` if needed. + In that case, the Python preinitialization configuration + (:c:type:`PyPreConfig`) in based on the :c:type:`PyConfig`. If configuration + fields which are in common with :c:type:`PyPreConfig` are tuned, they must + be set before calling a :c:type:`PyConfig` method: - * :c:member:`~PyConfig.dev_mode` - * :c:member:`~PyConfig.isolated` - * :c:member:`~PyConfig.parse_argv` - * :c:member:`~PyConfig.use_environment` + * :c:member:`PyConfig.dev_mode` + * :c:member:`PyConfig.isolated` + * :c:member:`PyConfig.parse_argv` + * :c:member:`PyConfig.use_environment` Moreover, if :c:func:`PyConfig_SetArgv` or :c:func:`PyConfig_SetBytesArgv` - is used, this method must be called first, before other methods, since the + is used, this method must be called before other methods, since the preinitialization configuration depends on command line arguments (if :c:member:`parse_argv` is non-zero). @@ -424,11 +476,17 @@ PyConfig .. c:member:: PyWideStringList argv - Command line arguments, :data:`sys.argv`. See - :c:member:`~PyConfig.parse_argv` to parse :c:member:`~PyConfig.argv` the - same way the regular Python parses Python command line arguments. If - :c:member:`~PyConfig.argv` is empty, an empty string is added to ensure - that :data:`sys.argv` always exists and is never empty. + Command line arguments: :data:`sys.argv`. + + Set :c:member:`~PyConfig.parse_argv` to ``1`` to parse + :c:member:`~PyConfig.argv` the same way the regular Python parses Python + command line arguments and then to strip Python arguments from + :c:member:`~PyConfig.argv`. + + If :c:member:`~PyConfig.argv` is empty, an empty string is added to + ensure that :data:`sys.argv` always exists and is never empty. + + Default: ``NULL``. See also the :c:member:`~PyConfig.orig_argv` member. @@ -436,76 +494,136 @@ PyConfig :data:`sys.base_exec_prefix`. + Default: ``NULL``. + + Part of the :ref:`Path Configuration ` output. + .. c:member:: wchar_t* base_executable - :data:`sys._base_executable`: ``__PYVENV_LAUNCHER__`` environment - variable value, or copy of :c:member:`PyConfig.executable`. + Python base executable: :data:`sys._base_executable`. + + Set by the :envvar:`__PYVENV_LAUNCHER__` environment variable. + + Set from :c:member:`PyConfig.executable` if ``NULL``. + + Default: ``NULL``. + + Part of the :ref:`Path Configuration ` output. .. c:member:: wchar_t* base_prefix :data:`sys.base_prefix`. - .. c:member:: wchar_t* platlibdir - - :data:`sys.platlibdir`: platform library directory name, set at configure time - by ``--with-platlibdir``, overrideable by the ``PYTHONPLATLIBDIR`` - environment variable. + Default: ``NULL``. - .. versionadded:: 3.9 + Part of the :ref:`Path Configuration ` output. .. c:member:: int buffered_stdio - If equals to 0, enable unbuffered mode, making the stdout and stderr - streams unbuffered. + If equals to 0 and :c:member:`~PyConfig.configure_c_stdio` is non-zero, + disable buffering on the C streams stdout and stderr. + + Set to 0 by the :option:`-u` command line option and the + :envvar:`PYTHONUNBUFFERED` environment variable. stdin is always opened in buffered mode. + Default: ``1``. + .. c:member:: int bytes_warning If equals to 1, issue a warning when comparing :class:`bytes` or :class:`bytearray` with :class:`str`, or comparing :class:`bytes` with - :class:`int`. If equal or greater to 2, raise a :exc:`BytesWarning` - exception. + :class:`int`. + + If equal or greater to 2, raise a :exc:`BytesWarning` exception in these + cases. + + Incremented by the :option:`-b` command line option. + + Default: ``0``. .. c:member:: wchar_t* check_hash_pycs_mode - Control the validation behavior of hash-based ``.pyc`` files (see - :pep:`552`): :option:`--check-hash-based-pycs` command line option value. + Control the validation behavior of hash-based ``.pyc`` files: + value of the :option:`--check-hash-based-pycs` command line option. - Valid values: ``always``, ``never`` and ``default``. + Valid values: - The default value is: ``default``. + - ``L"always"``: Hash the source file for invalidation regardless of + value of the 'check_source' flag. + - ``L"never"``: Assume that hash-based pycs always are valid. + - ``L"default"``: The 'check_source' flag in hash-based pycs + determines invalidation. + + Default: ``L"default"``. + + See also :pep:`552` "Deterministic pycs". .. c:member:: int configure_c_stdio - If non-zero, configure C standard streams (``stdio``, ``stdout``, - ``stdout``). For example, set their mode to ``O_BINARY`` on Windows. + If non-zero, configure C standard streams: + + * On Windows, set the binary mode (``O_BINARY``) on stdin, stdout and + stderr. + * If :c:member:`~PyConfig.buffered_stdio` equals zero, disable buffering + of stdin, stdout and stderr streams. + * If :c:member:`~PyConfig.interactive` is non-zero, enable stream + buffering on stdin and stdout (only stdout on Windows). + + Default: ``1`` in Python config, ``0`` in isolated config. .. c:member:: int dev_mode If non-zero, enable the :ref:`Python Development Mode `. + Default: ``-1`` in Python mode, ``0`` in isolated mode. + .. c:member:: int dump_refs + Dump Python refererences? + If non-zero, dump all objects which are still alive at exit. - ``Py_TRACE_REFS`` macro must be defined in build. + Set to ``1`` by the :envvar:`PYTHONDUMPREFS` environment variable. + + Need a special build of Python with the ``Py_TRACE_REFS`` macro defined. + + Default: ``0``. .. c:member:: wchar_t* exec_prefix - :data:`sys.exec_prefix`. + The site-specific directory prefix where the platform-dependent Python + files are installed: :data:`sys.exec_prefix`. + + Default: ``NULL``. + + Part of the :ref:`Path Configuration ` output. .. c:member:: wchar_t* executable + The absolute path of the executable binary for the Python interpreter: :data:`sys.executable`. + Default: ``NULL``. + + Part of the :ref:`Path Configuration ` output. + .. c:member:: int faulthandler + Enable faulthandler? + If non-zero, call :func:`faulthandler.enable` at startup. + Set to ``1`` by :option:`-X faulthandler <-X>` and the + :envvar:`PYTHONFAULTHANDLER` environment variable. + + Default: ``-1`` in Python mode, ``0`` in isolated mode. + .. c:member:: wchar_t* filesystem_encoding - Filesystem encoding: :func:`sys.getfilesystemencoding`. + :term:`Filesystem encoding `: + :func:`sys.getfilesystemencoding`. On macOS, Android and VxWorks: use ``"utf-8"`` by default. @@ -521,7 +639,7 @@ PyConfig ``mbstowcs()`` function decodes from a different encoding (usually Latin1). * ``"utf-8"`` if ``nl_langinfo(CODESET)`` returns an empty string. - * Otherwise, use the LC_CTYPE locale encoding: + * Otherwise, use the :term:`locale encoding`: ``nl_langinfo(CODESET)`` result. At Python statup, the encoding name is normalized to the Python codec @@ -531,7 +649,8 @@ PyConfig .. c:member:: wchar_t* filesystem_errors - Filesystem error handler: :func:`sys.getfilesystemencodeerrors`. + :term:`Filesystem error handler `: + :func:`sys.getfilesystemencodeerrors`. On Windows: use ``"surrogatepass"`` by default, or ``"replace"`` if :c:member:`~PyPreConfig.legacy_windows_fs_encoding` of @@ -553,30 +672,62 @@ PyConfig Randomized hash function seed. If :c:member:`~PyConfig.use_hash_seed` is zero, a seed is chosen randomly - at Pythonstartup, and :c:member:`~PyConfig.hash_seed` is ignored. + at Python startup, and :c:member:`~PyConfig.hash_seed` is ignored. + + Set by the :envvar:`PYTHONHASHSEED` environment variable. + + Default *use_hash_seed* value: ``-1`` in Python mode, ``0`` in isolated + mode. .. c:member:: wchar_t* home Python home directory. - Initialized from :envvar:`PYTHONHOME` environment variable value by - default. + If :c:func:`Py_SetPythonHome` has been called, use its argument if it is + not ``NULL``. + + Set by the :envvar:`PYTHONHOME` environment variable. + + Default: ``NULL``. + + Part of the :ref:`Path Configuration ` input. .. c:member:: int import_time If non-zero, profile import time. + Set the ``1`` by the :option:`-X importtime <-X>` option and the + :envvar:`PYTHONPROFILEIMPORTTIME` environment variable. + + Default: ``0``. + .. c:member:: int inspect Enter interactive mode after executing a script or a command. + If greater than 0, enable inspect: when a script is passed as first + argument or the -c option is used, enter interactive mode after executing + the script or the command, even when :data:`sys.stdin` does not appear to + be a terminal. + + Incremented by the :option:`-i` command line option. Set to ``1`` if the + :envvar:`PYTHONINSPECT` environment variable is non-empty. + + Default: ``0``. + .. c:member:: int install_signal_handlers - Install signal handlers? + Install Python signal handlers? + + Default: ``1`` in Python mode, ``0`` in isolated mode. .. c:member:: int interactive - Interactive mode. + If greater than 0, enable the interactive mode (REPL). + + Incremented by the :option:`-i` command line option. + + Default: ``0``. .. c:member:: int isolated @@ -590,50 +741,93 @@ PyConfig * Set :c:member:`~PyConfig.use_environment` and :c:member:`~PyConfig.user_site_directory` to 0. + Default: ``0`` in Python mode, ``1`` in isolated mode. + + See also :c:member:`PyPreConfig.isolated`. + .. c:member:: int legacy_windows_stdio If non-zero, use :class:`io.FileIO` instead of :class:`io.WindowsConsoleIO` for :data:`sys.stdin`, :data:`sys.stdout` and :data:`sys.stderr`. + Set to ``1`` if the :envvar:`PYTHONLEGACYWINDOWSSTDIO` environment + variable is set to a non-empty string. + Only available on Windows. ``#ifdef MS_WINDOWS`` macro can be used for Windows specific code. + Default: ``0``. + + See also the :pep:`528` (Change Windows console encoding to UTF-8). + .. c:member:: int malloc_stats If non-zero, dump statistics on :ref:`Python pymalloc memory allocator ` at exit. + Set to ``1`` by the :envvar:`PYTHONMALLOCSTATS` environment variable. + The option is ignored if Python is built using ``--without-pymalloc``. + Default: ``0``. + + .. c:member:: wchar_t* platlibdir + + Platform library directory name: :data:`sys.platlibdir`. + + Set by the :envvar:`PYTHONPLATLIBDIR` environment variable. + + Default: value of the ``PLATLIBDIR`` macro which is set at configure time + by ``--with-platlibdir`` (default: ``"lib"``). + + Part of the :ref:`Path Configuration ` input. + + .. versionadded:: 3.9 + .. c:member:: wchar_t* pythonpath_env - Module search paths as a string separated by ``DELIM`` + Module search paths (:data:`sys.path`) as a string separated by ``DELIM`` (:data:`os.path.pathsep`). - Initialized from :envvar:`PYTHONPATH` environment variable value by - default. + Set by the :envvar:`PYTHONPATH` environment variable. + + Default: ``NULL``. + + Part of the :ref:`Path Configuration ` input. .. c:member:: PyWideStringList module_search_paths .. c:member:: int module_search_paths_set - :data:`sys.path`. If :c:member:`~PyConfig.module_search_paths_set` is - equal to 0, the :c:member:`~PyConfig.module_search_paths` is overridden - by the function calculating the :ref:`Path Configuration - `. + Module search paths: :data:`sys.path`. + + If :c:member:`~PyConfig.module_search_paths_set` is equal to 0, the + function calculating the :ref:`Path Configuration ` + overrides the :c:member:`~PyConfig.module_search_paths` and sets + :c:member:`~PyConfig.module_search_paths_set` to ``1``. + + Default: empty list (``module_search_paths``) and ``0`` + (``module_search_paths_set``). + + Part of the :ref:`Path Configuration ` output. .. c:member:: int optimization_level Compilation optimization level: - * 0: Peephole optimizer (and ``__debug__`` is set to ``True``) - * 1: Remove assertions, set ``__debug__`` to ``False`` - * 2: Strip docstrings + * ``0``: Peephole optimizer, set ``__debug__`` to ``True``. + * ``1``: Level 0, remove assertions, set ``__debug__`` to ``False``. + * ``2``: Level 1, strip docstrings. + + Incremented by the :option:`-O` command line option. Set to the + :envvar:`PYTHONOPTIMIZE` environment variable value. + + Default: ``0``. .. c:member:: PyWideStringList orig_argv The list of the original command line arguments passed to the Python - executable. + executable: :data:`sys.orig_argv`. If :c:member:`~PyConfig.orig_argv` list is empty and :c:member:`~PyConfig.argv` is not a list only containing an empty @@ -645,57 +839,117 @@ PyConfig See also the :c:member:`~PyConfig.argv` member and the :c:func:`Py_GetArgcArgv` function. + Default: empty list. + .. versionadded:: 3.10 .. c:member:: int parse_argv + Parse command line arguments? + If non-zero, parse :c:member:`~PyConfig.argv` the same way the regular - Python command line arguments, and strip Python arguments from - :c:member:`~PyConfig.argv`: see :ref:`Command Line Arguments - `. + Python parses :ref:`command line arguments `, and strip + Python arguments from :c:member:`~PyConfig.argv`. + + Default: ``1`` in Python mode, ``0`` in isolated mode. .. c:member:: int parser_debug - If non-zero, turn on parser debugging output (for expert only, depending + Parser debug mode. If greater than 0, turn on parser debugging output (for expert only, depending on compilation options). + Incremented by the :option:`-d` command line option. Set to the + :envvar:`PYTHONDEBUG` environment variable value. + + Default: ``0``. + .. c:member:: int pathconfig_warnings - If equal to 0, suppress warnings when calculating the :ref:`Path - Configuration ` (Unix only, Windows does not log any - warning). Otherwise, warnings are written into ``stderr``. + On Unix, if non-zero, calculating the :ref:`Path Configuration + ` can log warnings into ``stderr``. If equals to 0, + suppress these warnings. + + It has no effect on Windows. + + Default: ``1`` in Python mode, ``0`` in isolated mode. + + Part of the :ref:`Path Configuration ` input. .. c:member:: wchar_t* prefix - :data:`sys.prefix`. + The site-specific directory prefix where the platform independent Python + files are installed: :data:`sys.prefix`. + + Default: ``NULL``. + + Part of the :ref:`Path Configuration ` output. .. c:member:: wchar_t* program_name - Program name. Used to initialize :c:member:`~PyConfig.executable`, and in - early error messages. + Program name used to initialize :c:member:`~PyConfig.executable` and in + early error messages during Python initialization. + + * If :func:`Py_SetProgramName` has been called, use its argument. + * On macOS, use :envvar:`PYTHONEXECUTABLE` environment variable if set. + * If the ``WITH_NEXT_FRAMEWORK`` macro is defined, use + :envvar:`__PYVENV_LAUNCHER__` environment variable if set. + * Use ``argv[0]`` of :c:member:`~PyConfig.argv` if available and + non-empty. + * Otherwise, use ``L"python"`` on Windows, or ``L"python3"`` on other + platforms. + + Default: ``NULL``. + + Part of the :ref:`Path Configuration ` input. .. c:member:: wchar_t* pycache_prefix - :data:`sys.pycache_prefix`: ``.pyc`` cache prefix. + Directory where cached ``.pyc`` files are written: + :data:`sys.pycache_prefix`. + + Set by the :option:`-X pycache_prefix=PATH <-X>` command line option and + the :envvar:`PYTHONPYCACHEPREFIX` environment variable. If ``NULL``, :data:`sys.pycache_prefix` is set to ``None``. + Default: ``NULL``. + .. c:member:: int quiet - Quiet mode. For example, don't display the copyright and version messages - in interactive mode. + Quiet mode. If greater than 0, don't display the copyright and version at + Python startup in interactive mode. + + Incremented by the :option:`-q` command line option. + + Default: ``0``. .. c:member:: wchar_t* run_command - ``python3 -c COMMAND`` argument. Used by :c:func:`Py_RunMain`. + Value of the :option:`-c` command line option. + + Used by :c:func:`Py_RunMain`. + + Default: ``NULL``. .. c:member:: wchar_t* run_filename - ``python3 FILENAME`` argument. Used by :c:func:`Py_RunMain`. + Filename passed on the command line: trailing command line argument + without :option:`-c` or :option:`-m`. + + For example, it is set to ``script.py`` by the ``python3 script.py arg`` + command. + + Used by :c:func:`Py_RunMain`. + + Default: ``NULL``. .. c:member:: wchar_t* run_module - ``python3 -m MODULE`` argument. Used by :c:func:`Py_RunMain`. + Value of the :option:`-m` command line option. + + Used by :c:func:`Py_RunMain`. + + Default: ``NULL``. .. c:member:: int show_ref_count @@ -705,64 +959,146 @@ PyConfig Need a debug build of Python (``Py_REF_DEBUG`` macro must be defined). + Default: ``0``. + .. c:member:: int site_import Import the :mod:`site` module at startup? + If equal to zero, disable the import of the module site and the + site-dependent manipulations of :data:`sys.path` that it entails. + + Also disable these manipulations if the :mod:`site` module is explicitly + imported later (call :func:`site.main` if you want them to be triggered). + + Set to ``0`` by the :option:`-S` command line option. + + :data:`sys.flags.no_site` is set to the inverted value of + :c:member:`~PyConfig.site_import`. + + Default: ``1``. + .. c:member:: int skip_source_first_line - Skip the first line of the source? + If non-zero, skip the first line of the :c:member:`PyConfig.run_filename` + source. + + It allows the usage of non-Unix forms of ``#!cmd``. This is intended for + a DOS specific hack only. + + Set to ``1`` by the :option:`-x` command line option. + + Default: ``0``. .. c:member:: wchar_t* stdio_encoding .. c:member:: wchar_t* stdio_errors Encoding and encoding errors of :data:`sys.stdin`, :data:`sys.stdout` and - :data:`sys.stderr`. + :data:`sys.stderr` (but :data:`sys.stderr` always uses + ``"backslashreplace"`` error handler). + + If :c:func:`Py_SetStandardStreamEncoding` has been called, use its + *error* and *errors* arguments if they are not ``NULL``. + + Use the :envvar:`PYTHONIOENCODING` environment variable if it is + non-empty. + + Default encoding: + + * ``"UTF-8"`` if :c:member:`PyPreConfig.utf8_mode` is non-zero. + * Otherwise, use the :term:`locale encoding`. + + Default error handler: + + * On Windows: use ``"surrogateescape"``. + * ``"surrogateescape"`` if :c:member:`PyPreConfig.utf8_mode` is non-zero, + or if the LC_CTYPE locale is "C" or "POSIX". + * ``"strict"`` otherwise. .. c:member:: int tracemalloc + Enable tracemalloc? + If non-zero, call :func:`tracemalloc.start` at startup. + Set by :option:`-X tracemalloc=N <-X>` command line option and by the + :envvar:`PYTHONTRACEMALLOC` environment variable. + + Default: ``-1`` in Python mode, ``0`` in isolated mode. + .. c:member:: int use_environment - If greater than 0, use :ref:`environment variables `. + Use :ref:`environment variables `? + + If equals to zero, ignore the :ref:`environment variables + `. + + Default: ``1`` in Python config and ``0`` in isolated config. .. c:member:: int user_site_directory - If non-zero, add user site directory to :data:`sys.path`. + If non-zero, add the user site directory to :data:`sys.path`. + + Set to ``0`` by the :option:`-s` and :option:`-I` command line options. + + Set to ``0`` by the :envvar:`PYTHONNOUSERSITE` environment variable. + + Default: ``1`` in Python mode, ``0`` in isolated mode. .. c:member:: int verbose - If non-zero, enable verbose mode. + Verbose mode. If greater than 0, print a message each time a module is + imported, showing the place (filename or built-in module) from which + it is loaded. + + If greater or equal to 2, print a message for each file that is checked + for when searching for a module. Also provides information on module + cleanup at exit. + + Incremented by the :option:`-v` command line option. + + Set to the :envvar:`PYTHONVERBOSE` environment variable value. + + Default: ``0``. .. c:member:: PyWideStringList warnoptions - :data:`sys.warnoptions`: options of the :mod:`warnings` module to build - warnings filters: lowest to highest priority. + Options of the :mod:`warnings` module to build warnings filters, lowest + to highest priority: :data:`sys.warnoptions`. The :mod:`warnings` module adds :data:`sys.warnoptions` in the reverse order: the last :c:member:`PyConfig.warnoptions` item becomes the first item of :data:`warnings.filters` which is checked first (highest priority). + Default: empty list. + .. c:member:: int write_bytecode - If non-zero, write ``.pyc`` files. + If equal to 0, Python won't try to write ``.pyc`` files on the import of + source modules. + + Set to ``0`` by the :option:`-B` command line option and the + :envvar:`PYTHONDONTWRITEBYTECODE` environment variable. :data:`sys.dont_write_bytecode` is initialized to the inverted value of :c:member:`~PyConfig.write_bytecode`. + Default: ``1``. + .. c:member:: PyWideStringList xoptions - :data:`sys._xoptions`. + Values of the :option:`-X` command line options: :data:`sys._xoptions`. + + Default: empty list. -If ``parse_argv`` is non-zero, ``argv`` arguments are parsed the same -way the regular Python parses command line arguments, and Python -arguments are stripped from ``argv``: see :ref:`Command Line Arguments -`. +If :c:member:`~PyConfig.parse_argv` is non-zero, :c:member:`~PyConfig.argv` +arguments are parsed the same way the regular Python parses :ref:`command line +arguments `, and Python arguments are stripped from +:c:member:`~PyConfig.argv`. -The ``xoptions`` options are parsed to set other options: see :option:`-X` -option. +The :c:member:`~PyConfig.xoptions` options are parsed to set other options: see +the :option:`-X` command line option. .. versionchanged:: 3.9 @@ -781,9 +1117,9 @@ Function to initialize Python: The caller is responsible to handle exceptions (error or exit) using :c:func:`PyStatus_Exception` and :c:func:`Py_ExitStatusException`. -If ``PyImport_FrozenModules``, ``PyImport_AppendInittab()`` or -``PyImport_ExtendInittab()`` are used, they must be set or called after Python -preinitialization and before the Python initialization. +If :c:func:`PyImport_FrozenModules`, :c:func:`PyImport_AppendInittab` or +:c:func:`PyImport_ExtendInittab` are used, they must be set or called after +Python preinitialization and before the Python initialization. Example setting the program name:: @@ -828,7 +1164,7 @@ configuration, and then override some parameters:: Implicitly preinitialize Python. */ status = PyConfig_SetBytesString(&config, &config.program_name, - program_name); + program_name); if (PyStatus_Exception(status)) { goto done; } @@ -894,7 +1230,8 @@ the regular Python. Environments variables and command line arguments are used to configure Python, whereas global configuration variables are ignored. -This function enables C locale coercion (:pep:`538`) and UTF-8 Mode +This function enables C locale coercion (:pep:`538`) +and :ref:`Python UTF-8 Mode ` (:pep:`540`) depending on the LC_CTYPE locale, :envvar:`PYTHONUTF8` and :envvar:`PYTHONCOERCECLOCALE` environment variables. diff --git a/Doc/c-api/sys.rst b/Doc/c-api/sys.rst index 9ac9179..97717f5 100644 --- a/Doc/c-api/sys.rst +++ b/Doc/c-api/sys.rst @@ -118,22 +118,21 @@ Operating System Utilities .. c:function:: wchar_t* Py_DecodeLocale(const char* arg, size_t *size) - Decode a byte string from the locale encoding with the :ref:`surrogateescape - error handler `: undecodable bytes are decoded as - characters in range U+DC80..U+DCFF. If a byte sequence can be decoded as a - surrogate character, escape the bytes using the surrogateescape error - handler instead of decoding them. - - Encoding, highest priority to lowest priority: - - * ``UTF-8`` on macOS, Android, and VxWorks; - * ``UTF-8`` on Windows if :c:data:`Py_LegacyWindowsFSEncodingFlag` is zero; - * ``UTF-8`` if the Python UTF-8 mode is enabled; - * ``ASCII`` if the ``LC_CTYPE`` locale is ``"C"``, - ``nl_langinfo(CODESET)`` returns the ``ASCII`` encoding (or an alias), - and :c:func:`mbstowcs` and :c:func:`wcstombs` functions uses the - ``ISO-8859-1`` encoding. - * the current locale encoding. + .. warning:: + This function should not be called directly: use the :c:type:`PyConfig` + API with the :c:func:`PyConfig_SetBytesString` function which ensures + that :ref:`Python is preinitialized `. + + This function must not be called before :ref:`Python is preinitialized + ` and so that the LC_CTYPE locale is properly configured: see + the :c:func:`Py_PreInitialize` function. + + Decode a byte string from the :term:`filesystem encoding and error handler`. + If the error handler is :ref:`surrogateescape error handler + `, undecodable bytes are decoded as characters in range + U+DC80..U+DCFF; and if a byte sequence can be decoded as a surrogate + character, the bytes are escaped using the surrogateescape error handler + instead of decoding them. Return a pointer to a newly allocated wide character string, use :c:func:`PyMem_RawFree` to free the memory. If size is not ``NULL``, write @@ -143,6 +142,10 @@ Operating System Utilities not ``NULL``, ``*size`` is set to ``(size_t)-1`` on memory error or set to ``(size_t)-2`` on decoding error. + The :term:`filesystem encoding and error handler` are selected by + :c:func:`PyConfig_Read`: see :c:member:`~PyConfig.filesystem_encoding` and + :c:member:`~PyConfig.filesystem_errors` members of :c:type:`PyConfig`. + Decoding errors should never happen, unless there is a bug in the C library. @@ -157,7 +160,8 @@ Operating System Utilities .. versionadded:: 3.5 .. versionchanged:: 3.7 - The function now uses the UTF-8 encoding in the UTF-8 mode. + The function now uses the UTF-8 encoding in the :ref:`Python UTF-8 Mode + `. .. versionchanged:: 3.8 The function now uses the UTF-8 encoding on Windows if @@ -166,22 +170,10 @@ Operating System Utilities .. c:function:: char* Py_EncodeLocale(const wchar_t *text, size_t *error_pos) - Encode a wide character string to the locale encoding with the - :ref:`surrogateescape error handler `: surrogate characters - in the range U+DC80..U+DCFF are converted to bytes 0x80..0xFF. - - Encoding, highest priority to lowest priority: - - * ``UTF-8`` on macOS, Android, and VxWorks; - * ``UTF-8`` on Windows if :c:data:`Py_LegacyWindowsFSEncodingFlag` is zero; - * ``UTF-8`` if the Python UTF-8 mode is enabled; - * ``ASCII`` if the ``LC_CTYPE`` locale is ``"C"``, - ``nl_langinfo(CODESET)`` returns the ``ASCII`` encoding (or an alias), - and :c:func:`mbstowcs` and :c:func:`wcstombs` functions uses the - ``ISO-8859-1`` encoding. - * the current locale encoding. - - The function uses the UTF-8 encoding in the Python UTF-8 mode. + Encode a wide character string to the :term:`filesystem encoding and error + handler`. If the error handler is :ref:`surrogateescape error handler + `, surrogate characters in the range U+DC80..U+DCFF are + converted to bytes 0x80..0xFF. Return a pointer to a newly allocated byte string, use :c:func:`PyMem_Free` to free the memory. Return ``NULL`` on encoding error or memory allocation @@ -190,9 +182,18 @@ Operating System Utilities If error_pos is not ``NULL``, ``*error_pos`` is set to ``(size_t)-1`` on success, or set to the index of the invalid character on encoding error. + The :term:`filesystem encoding and error handler` are selected by + :c:func:`PyConfig_Read`: see :c:member:`~PyConfig.filesystem_encoding` and + :c:member:`~PyConfig.filesystem_errors` members of :c:type:`PyConfig`. + Use the :c:func:`Py_DecodeLocale` function to decode the bytes string back to a wide character string. + .. warning:: + This function must not be called before :ref:`Python is preinitialized + ` and so that the LC_CTYPE locale is properly configured: see + the :c:func:`Py_PreInitialize` function. + .. seealso:: The :c:func:`PyUnicode_EncodeFSDefault` and @@ -201,7 +202,8 @@ Operating System Utilities .. versionadded:: 3.5 .. versionchanged:: 3.7 - The function now uses the UTF-8 encoding in the UTF-8 mode. + The function now uses the UTF-8 encoding in the :ref:`Python UTF-8 Mode + `. .. versionchanged:: 3.8 The function now uses the UTF-8 encoding on Windows if diff --git a/Doc/c-api/unicode.rst b/Doc/c-api/unicode.rst index 54bd0a3..b7f99d3 100644 --- a/Doc/c-api/unicode.rst +++ b/Doc/c-api/unicode.rst @@ -783,7 +783,7 @@ system. :c:data:`Py_FileSystemDefaultEncoding` (the locale encoding read at Python startup). - This function ignores the Python UTF-8 mode. + This function ignores the :ref:`Python UTF-8 Mode `. .. seealso:: @@ -819,7 +819,7 @@ system. :c:data:`Py_FileSystemDefaultEncoding` (the locale encoding read at Python startup). - This function ignores the Python UTF-8 mode. + This function ignores the :ref:`Python UTF-8 Mode `. .. seealso:: @@ -878,8 +878,7 @@ conversion function: .. c:function:: PyObject* PyUnicode_DecodeFSDefaultAndSize(const char *s, Py_ssize_t size) - Decode a string using :c:data:`Py_FileSystemDefaultEncoding` and the - :c:data:`Py_FileSystemDefaultEncodeErrors` error handler. + Decode a string from the :term:`filesystem encoding and error handler`. If :c:data:`Py_FileSystemDefaultEncoding` is not set, fall back to the locale encoding. @@ -899,8 +898,8 @@ conversion function: .. c:function:: PyObject* PyUnicode_DecodeFSDefault(const char *s) - Decode a null-terminated string using :c:data:`Py_FileSystemDefaultEncoding` - and the :c:data:`Py_FileSystemDefaultEncodeErrors` error handler. + Decode a null-terminated string from the :term:`filesystem encoding and + error handler`. If :c:data:`Py_FileSystemDefaultEncoding` is not set, fall back to the locale encoding. diff --git a/Doc/c-api/veryhigh.rst b/Doc/c-api/veryhigh.rst index b908cb8..0f760ea 100644 --- a/Doc/c-api/veryhigh.rst +++ b/Doc/c-api/veryhigh.rst @@ -112,9 +112,9 @@ the same library that the Python runtime is using. Similar to :c:func:`PyRun_SimpleStringFlags`, but the Python source code is read from *fp* instead of an in-memory string. *filename* should be the name of - the file, it is decoded from the filesystem encoding - (:func:`sys.getfilesystemencoding`). If *closeit* is true, the file is - closed before PyRun_SimpleFileExFlags returns. + the file, it is decoded from :term:`filesystem encoding and error handler`. + If *closeit* is true, the file is closed before + ``PyRun_SimpleFileExFlags()`` returns. .. note:: On Windows, *fp* should be opened as binary mode (e.g. ``fopen(filename, "rb")``). @@ -132,7 +132,7 @@ the same library that the Python runtime is using. Read and execute a single statement from a file associated with an interactive device according to the *flags* argument. The user will be prompted using ``sys.ps1`` and ``sys.ps2``. *filename* is decoded from the - filesystem encoding (:func:`sys.getfilesystemencoding`). + :term:`filesystem encoding and error handler`. Returns ``0`` when the input was executed successfully, ``-1`` if there was an exception, or an error code @@ -151,9 +151,8 @@ the same library that the Python runtime is using. Read and execute statements from a file associated with an interactive device until EOF is reached. The user will be prompted using ``sys.ps1`` and - ``sys.ps2``. *filename* is decoded from the filesystem encoding - (:func:`sys.getfilesystemencoding`). Returns ``0`` at EOF or a negative - number upon failure. + ``sys.ps2``. *filename* is decoded from the :term:`filesystem encoding and + error handler`. Returns ``0`` at EOF or a negative number upon failure. .. c:var:: int (*PyOS_InputHook)(void) @@ -206,8 +205,8 @@ the same library that the Python runtime is using. Parse Python source code from *str* using the start token *start* according to the *flags* argument. The result can be used to create a code object which can be evaluated efficiently. This is useful if a code fragment must be evaluated - many times. *filename* is decoded from the filesystem encoding - (:func:`sys.getfilesystemencoding`). + many times. *filename* is decoded from the :term:`filesystem encoding and + error handler`. .. c:function:: struct _node* PyParser_SimpleParseFile(FILE *fp, const char *filename, int start) @@ -262,7 +261,7 @@ the same library that the Python runtime is using. Similar to :c:func:`PyRun_StringFlags`, but the Python source code is read from *fp* instead of an in-memory string. *filename* should be the name of the file, - it is decoded from the filesystem encoding (:func:`sys.getfilesystemencoding`). + it is decoded from the :term:`filesystem encoding and error handler`. If *closeit* is true, the file is closed before :c:func:`PyRun_FileExFlags` returns. @@ -301,7 +300,7 @@ the same library that the Python runtime is using. .. c:function:: PyObject* Py_CompileStringExFlags(const char *str, const char *filename, int start, PyCompilerFlags *flags, int optimize) Like :c:func:`Py_CompileStringObject`, but *filename* is a byte string - decoded from the filesystem encoding (:func:`os.fsdecode`). + decoded from the :term:`filesystem encoding and error handler`. .. versionadded:: 3.2 diff --git a/Doc/glossary.rst b/Doc/glossary.rst index 4fd01e0..506973e 100644 --- a/Doc/glossary.rst +++ b/Doc/glossary.rst @@ -386,6 +386,25 @@ Glossary file-like object A synonym for :term:`file object`. + filesystem encoding and error handler + Encoding and error handler used by Python to decode bytes from the + operating system and encode Unicode to the operating system. + + The filesystem encoding must guarantee to successfully decode all bytes + below 128. If the file system encoding fails to provide this guarantee, + API functions can raise :exc:`UnicodeError`. + + The :func:`sys.getfilesystemencoding` and + :func:`sys.getfilesystemencodeerrors` functions can be used to get the + filesystem encoding and error handler. + + The :term:`filesystem encoding and error handler` are configured at + Python startup by the :c:func:`PyConfig_Read` function: see + :c:member:`~PyConfig.filesystem_encoding` and + :c:member:`~PyConfig.filesystem_errors` members of :c:type:`PyConfig`. + + See also the :term:`locale encoding`. + finder An object that tries to find the :term:`loader` for a module that is being imported. @@ -673,6 +692,18 @@ Glossary thread removes *key* from *mapping* after the test, but before the lookup. This issue can be solved with locks or by using the EAFP approach. + locale encoding + On Unix, it is the encoding of the LC_CTYPE locale. It can be set with + ``locale.setlocale(locale.LC_CTYPE, new_locale)``. + + On Windows, it is is the ANSI code page (ex: ``cp1252``). + + ``locale.getpreferredencoding(False)`` can be used to get the locale + encoding. + + Python uses the :term:`filesystem encoding and error handler` to convert + between Unicode filenames and bytes filenames. + list A built-in Python :term:`sequence`. Despite its name it is more akin to an array in other languages than to a linked list since access to diff --git a/Doc/howto/unicode.rst b/Doc/howto/unicode.rst index e948c1e..535b21b 100644 --- a/Doc/howto/unicode.rst +++ b/Doc/howto/unicode.rst @@ -609,9 +609,9 @@ implemented by converting the Unicode string into some encoding that varies depending on the system. Today Python is converging on using UTF-8: Python on MacOS has used UTF-8 for several versions, and Python 3.6 switched to using UTF-8 on Windows as well. On Unix systems, -there will only be a filesystem encoding if you've set the ``LANG`` or -``LC_CTYPE`` environment variables; if you haven't, the default -encoding is again UTF-8. +there will only be a :term:`filesystem encoding `. if you've set the ``LANG`` or ``LC_CTYPE`` environment variables; if +you haven't, the default encoding is again UTF-8. The :func:`sys.getfilesystemencoding` function returns the encoding to use on your current system, in case you want to do the encoding manually, but there's @@ -633,8 +633,8 @@ provided the directory path as bytes or a Unicode string. If you pass a Unicode string as the path, filenames will be decoded using the filesystem's encoding and a list of Unicode strings will be returned, while passing a byte path will return the filenames as bytes. For example, -assuming the default filesystem encoding is UTF-8, running the following -program:: +assuming the default :term:`filesystem encoding ` is UTF-8, running the following program:: fn = 'filename\u4500abc' f = open(fn, 'w') diff --git a/Doc/library/devmode.rst b/Doc/library/devmode.rst index d5a40cd..e6ed594 100644 --- a/Doc/library/devmode.rst +++ b/Doc/library/devmode.rst @@ -93,6 +93,9 @@ The Python Development Mode does not prevent the :option:`-O` command line option from removing :keyword:`assert` statements nor from setting :const:`__debug__` to ``False``. +The Python Development Mode can only be enabled at the Python startup. Its +value can be read from :data:`sys.flags.dev_mode `. + .. versionchanged:: 3.8 The :class:`io.IOBase` destructor now logs ``close()`` exceptions. diff --git a/Doc/library/exceptions.rst b/Doc/library/exceptions.rst index df2cda9..8fb25a5 100644 --- a/Doc/library/exceptions.rst +++ b/Doc/library/exceptions.rst @@ -313,8 +313,8 @@ The following exceptions are the exceptions that are usually raised. .. versionchanged:: 3.4 The :attr:`filename` attribute is now the original file name passed to the function, instead of the name encoded to or decoded from the - filesystem encoding. Also, the *filename2* constructor argument and - attribute was added. + :term:`filesystem encoding and error handler`. Also, the *filename2* + constructor argument and attribute was added. .. exception:: OverflowError diff --git a/Doc/library/locale.rst b/Doc/library/locale.rst index 678148a..0a77be4 100644 --- a/Doc/library/locale.rst +++ b/Doc/library/locale.rst @@ -315,21 +315,25 @@ The :mod:`locale` module defines the following exception and functions: .. function:: getpreferredencoding(do_setlocale=True) - Return the encoding used for text data, according to user preferences. User - preferences are expressed differently on different systems, and might not be - available programmatically on some systems, so this function only returns a - guess. + Return the :term:`locale encoding` used for text data, according to user + preferences. User preferences are expressed differently on different + systems, and might not be available programmatically on some systems, so + this function only returns a guess. - On some systems, it is necessary to invoke :func:`setlocale` to obtain the user - preferences, so this function is not thread-safe. If invoking setlocale is not - necessary or desired, *do_setlocale* should be set to ``False``. + On some systems, it is necessary to invoke :func:`setlocale` to obtain the + user preferences, so this function is not thread-safe. If invoking setlocale + is not necessary or desired, *do_setlocale* should be set to ``False``. - On Android or in the UTF-8 mode (:option:`-X` ``utf8`` option), always - return ``'UTF-8'``, the locale and the *do_setlocale* argument are ignored. + On Android or if the :ref:`Python UTF-8 Mode ` is enabled, always + return ``'UTF-8'``, the :term:`locale encoding` and the *do_setlocale* + argument are ignored. + + The :ref:`Python preinitialization ` configures the LC_CTYPE + locale. See also the :term:`filesystem encoding and error handler`. .. versionchanged:: 3.7 - The function now always returns ``UTF-8`` on Android or if the UTF-8 mode - is enabled. + The function now always returns ``UTF-8`` on Android or if the + :ref:`Python UTF-8 Mode ` is enabled. .. function:: normalize(localename) diff --git a/Doc/library/os.rst b/Doc/library/os.rst index 718d981..f9f35b3 100644 --- a/Doc/library/os.rst +++ b/Doc/library/os.rst @@ -68,8 +68,13 @@ File Names, Command Line Arguments, and Environment Variables In Python, file names, command line arguments, and environment variables are represented using the string type. On some systems, decoding these strings to and from bytes is necessary before passing them to the operating system. Python -uses the file system encoding to perform this conversion (see -:func:`sys.getfilesystemencoding`). +uses the :term:`filesystem encoding and error handler` to perform this +conversion (see :func:`sys.getfilesystemencoding`). + +The :term:`filesystem encoding and error handler` are configured at Python +startup by the :c:func:`PyConfig_Read` function: see +:c:member:`~PyConfig.filesystem_encoding` and +:c:member:`~PyConfig.filesystem_errors` members of :c:type:`PyConfig`. .. versionchanged:: 3.1 On some systems, conversion using the file system encoding may fail. In this @@ -79,9 +84,70 @@ uses the file system encoding to perform this conversion (see original byte on encoding. -The file system encoding must guarantee to successfully decode all bytes -below 128. If the file system encoding fails to provide this guarantee, API -functions may raise UnicodeErrors. +The :term:`file system encoding ` must +guarantee to successfully decode all bytes below 128. If the file system +encoding fails to provide this guarantee, API functions can raise +:exc:`UnicodeError`. + +See also the :term:`locale encoding`. + + +.. _utf8-mode: + +Python UTF-8 Mode +----------------- + +.. versionadded:: 3.7 + See :pep:`540` for more details. + +The Python UTF-8 Mode ignores the :term:`locale encoding` and forces the usage +of the UTF-8 encoding: + +* Use UTF-8 as the :term:`filesystem encoding `. +* :func:`sys.getfilesystemencoding()` returns ``'UTF-8'``. +* :func:`locale.getpreferredencoding()` returns ``'UTF-8'`` (the *do_setlocale* + argument has no effect). +* :data:`sys.stdin`, :data:`sys.stdout`, and :data:`sys.stderr` all use + UTF-8 as their text encoding, with the ``surrogateescape`` + :ref:`error handler ` being enabled for :data:`sys.stdin` + and :data:`sys.stdout` (:data:`sys.stderr` continues to use + ``backslashreplace`` as it does in the default locale-aware mode) + +Note that the standard stream settings in UTF-8 mode can be overridden by +:envvar:`PYTHONIOENCODING` (just as they can be in the default locale-aware +mode). + +As a consequence of the changes in those lower level APIs, other higher +level APIs also exhibit different default behaviours: + +* Command line arguments, environment variables and filenames are decoded + to text using the UTF-8 encoding. +* :func:`os.fsdecode()` and :func:`os.fsencode()` use the UTF-8 encoding. +* :func:`open()`, :func:`io.open()`, and :func:`codecs.open()` use the UTF-8 + encoding by default. However, they still use the strict error handler by + default so that attempting to open a binary file in text mode is likely + to raise an exception rather than producing nonsense data. + +The :ref:`Python UTF-8 Mode ` is enabled if the LC_CTYPE locale is +``C`` or ``POSIX`` at Python startup (see the :c:func:`PyConfig_Read` +function). + +It can be enabled or disabled using the :option:`-X utf8 <-X>` command line +option and the :envvar:`PYTHONUTF8` environment variable. + +If the :envvar:`PYTHONUTF8` environment variable is not set at all, then the +interpreter defaults to using the current locale settings, *unless* the current +locale is identified as a legacy ASCII-based locale (as described for +:envvar:`PYTHONCOERCECLOCALE`), and locale coercion is either disabled or +fails. In such legacy locales, the interpreter will default to enabling UTF-8 +mode unless explicitly instructed not to do so. + +The Python UTF-8 Mode can only be enabled at the Python startup. Its value +can be read from :data:`sys.flags.utf8_mode `. + +See also the :ref:`UTF-8 mode on Windows ` +and the :term:`filesystem encoding and error handler`. .. _os-procinfo: @@ -165,9 +231,9 @@ process and user. .. function:: fsencode(filename) - Encode :term:`path-like ` *filename* to the filesystem - encoding with ``'surrogateescape'`` error handler, or ``'strict'`` on - Windows; return :class:`bytes` unchanged. + Encode :term:`path-like ` *filename* to the + :term:`filesystem encoding and error handler`; return :class:`bytes` + unchanged. :func:`fsdecode` is the reverse function. @@ -181,8 +247,8 @@ process and user. .. function:: fsdecode(filename) Decode the :term:`path-like ` *filename* from the - filesystem encoding with ``'surrogateescape'`` error handler, or ``'strict'`` - on Windows; return :class:`str` unchanged. + :term:`filesystem encoding and error handler`; return :class:`str` + unchanged. :func:`fsencode` is the reverse function. @@ -3246,7 +3312,7 @@ These functions are all available on Linux only. Removes the extended filesystem attribute *attribute* from *path*. *attribute* should be bytes or str (directly or indirectly through the :class:`PathLike` interface). If it is a string, it is encoded - with the filesystem encoding. + with the :term:`filesystem encoding and error handler`. This function can support :ref:`specifying a file descriptor ` and :ref:`not following symlinks `. @@ -3262,7 +3328,7 @@ These functions are all available on Linux only. Set the extended filesystem attribute *attribute* on *path* to *value*. *attribute* must be a bytes or str with no embedded NULs (directly or indirectly through the :class:`PathLike` interface). If it is a str, - it is encoded with the filesystem encoding. *flags* may be + it is encoded with the :term:`filesystem encoding and error handler`. *flags* may be :data:`XATTR_REPLACE` or :data:`XATTR_CREATE`. If :data:`XATTR_REPLACE` is given and the attribute does not exist, ``EEXISTS`` will be raised. If :data:`XATTR_CREATE` is given and the attribute already exists, the diff --git a/Doc/library/sys.rst b/Doc/library/sys.rst index f0acfcf..0f13adc 100644 --- a/Doc/library/sys.rst +++ b/Doc/library/sys.rst @@ -627,21 +627,24 @@ always available. .. function:: getfilesystemencoding() - Return the name of the encoding used to convert between Unicode - filenames and bytes filenames. + Get the :term:`filesystem encoding `: + the encoding used with the :term:`filesystem error handler ` to convert between Unicode filenames and bytes + filenames. The filesystem error handler is returned from + :func:`getfilesystemencoding`. For best compatibility, str should be used for filenames in all cases, although representing filenames as bytes is also supported. Functions accepting or returning filenames should support either str or bytes and internally convert to the system's preferred representation. - This encoding is always ASCII-compatible. - :func:`os.fsencode` and :func:`os.fsdecode` should be used to ensure that the correct encoding and errors mode are used. - The filesystem encoding is initialized from - :c:member:`PyConfig.filesystem_encoding`. + The :term:`filesystem encoding and error handler` are configured at Python + startup by the :c:func:`PyConfig_Read` function: see + :c:member:`~PyConfig.filesystem_encoding` and + :c:member:`~PyConfig.filesystem_errors` members of :c:type:`PyConfig`. .. versionchanged:: 3.2 :func:`getfilesystemencoding` result cannot be ``None`` anymore. @@ -651,20 +654,25 @@ always available. and :func:`_enablelegacywindowsfsencoding` for more information. .. versionchanged:: 3.7 - Return 'utf-8' in the UTF-8 mode. + Return ``'utf-8'`` if the :ref:`Python UTF-8 Mode ` is + enabled. .. function:: getfilesystemencodeerrors() - Return the name of the error mode used to convert between Unicode filenames - and bytes filenames. The encoding name is returned from + Get the :term:`filesystem error handler `: the error handler used with the :term:`filesystem encoding + ` to convert between Unicode + filenames and bytes filenames. The filesystem encoding is returned from :func:`getfilesystemencoding`. :func:`os.fsencode` and :func:`os.fsdecode` should be used to ensure that the correct encoding and errors mode are used. - The filesystem error handler is initialized from - :c:member:`PyConfig.filesystem_errors`. + The :term:`filesystem encoding and error handler` are configured at Python + startup by the :c:func:`PyConfig_Read` function: see + :c:member:`~PyConfig.filesystem_encoding` and + :c:member:`~PyConfig.filesystem_errors` members of :c:type:`PyConfig`. .. versionadded:: 3.6 @@ -1457,8 +1465,9 @@ always available. .. function:: _enablelegacywindowsfsencoding() - Changes the default filesystem encoding and errors mode to 'mbcs' and - 'replace' respectively, for consistency with versions of Python prior to 3.6. + Changes the :term:`filesystem encoding and error handler` to 'mbcs' and + 'replace' respectively, for consistency with versions of Python prior to + 3.6. This is equivalent to defining the :envvar:`PYTHONLEGACYWINDOWSFSENCODING` environment variable before launching Python. @@ -1488,9 +1497,8 @@ always available. returned by the :func:`open` function. Their parameters are chosen as follows: - * The character encoding is platform-dependent. Non-Windows - platforms use the locale encoding (see - :meth:`locale.getpreferredencoding()`). + * The encoding and error handling are is initialized from + :c:member:`PyConfig.stdio_encoding` and :c:member:`PyConfig.stdio_errors`. On Windows, UTF-8 is used for the console device. Non-character devices such as disk files and pipes use the system locale @@ -1498,7 +1506,7 @@ always available. devices such as NUL (i.e. where ``isatty()`` returns ``True``) use the value of the console input and output codepages at startup, respectively for stdin and stdout/stderr. This defaults to the - system locale encoding if the process is not initially attached + system :term:`locale encoding` if the process is not initially attached to a console. The special behaviour of the console can be overridden diff --git a/Doc/using/cmdline.rst b/Doc/using/cmdline.rst index 603b0e1..04e0f32 100644 --- a/Doc/using/cmdline.rst +++ b/Doc/using/cmdline.rst @@ -447,10 +447,9 @@ Miscellaneous options * ``-X dev``: enable :ref:`Python Development Mode `, introducing additional runtime checks that are too expensive to be enabled by default. - * ``-X utf8`` enables UTF-8 mode for operating system interfaces, overriding - the default locale-aware mode. ``-X utf8=0`` explicitly disables UTF-8 - mode (even when it would otherwise activate automatically). - See :envvar:`PYTHONUTF8` for more details. + * ``-X utf8`` enables the :ref:`Python UTF-8 Mode `. + ``-X utf8=0`` explicitly disables :ref:`Python UTF-8 Mode ` + (even when it would otherwise activate automatically). * ``-X pycache_prefix=PATH`` enables writing ``.pyc`` files to a parallel tree rooted at the given directory instead of to the code tree. See also :envvar:`PYTHONPYCACHEPREFIX`. @@ -810,9 +809,10 @@ conflict. .. envvar:: PYTHONLEGACYWINDOWSFSENCODING - If set to a non-empty string, the default filesystem encoding and errors mode - will revert to their pre-3.6 values of 'mbcs' and 'replace', respectively. - Otherwise, the new defaults 'utf-8' and 'surrogatepass' are used. + If set to a non-empty string, the default :term:`filesystem encoding and + error handler` mode will revert to their pre-3.6 values of 'mbcs' and + 'replace', respectively. Otherwise, the new defaults 'utf-8' and + 'surrogatepass' are used. This may also be enabled at runtime with :func:`sys._enablelegacywindowsfsencoding()`. @@ -898,54 +898,14 @@ conflict. .. envvar:: PYTHONUTF8 - If set to ``1``, enables the interpreter's UTF-8 mode, where ``UTF-8`` is - used as the text encoding for system interfaces, regardless of the - current locale setting. + If set to ``1``, enable the :ref:`Python UTF-8 Mode `. - This means that: - - * :func:`sys.getfilesystemencoding()` returns ``'UTF-8'`` (the locale - encoding is ignored). - * :func:`locale.getpreferredencoding()` returns ``'UTF-8'`` (the locale - encoding is ignored, and the function's ``do_setlocale`` parameter has no - effect). - * :data:`sys.stdin`, :data:`sys.stdout`, and :data:`sys.stderr` all use - UTF-8 as their text encoding, with the ``surrogateescape`` - :ref:`error handler ` being enabled for :data:`sys.stdin` - and :data:`sys.stdout` (:data:`sys.stderr` continues to use - ``backslashreplace`` as it does in the default locale-aware mode) - - As a consequence of the changes in those lower level APIs, other higher - level APIs also exhibit different default behaviours: - - * Command line arguments, environment variables and filenames are decoded - to text using the UTF-8 encoding. - * :func:`os.fsdecode()` and :func:`os.fsencode()` use the UTF-8 encoding. - * :func:`open()`, :func:`io.open()`, and :func:`codecs.open()` use the UTF-8 - encoding by default. However, they still use the strict error handler by - default so that attempting to open a binary file in text mode is likely - to raise an exception rather than producing nonsense data. - - Note that the standard stream settings in UTF-8 mode can be overridden by - :envvar:`PYTHONIOENCODING` (just as they can be in the default locale-aware - mode). - - If set to ``0``, the interpreter runs in its default locale-aware mode. + If set to ``0``, disable the :ref:`Python UTF-8 Mode `. Setting any other non-empty string causes an error during interpreter initialisation. - If this environment variable is not set at all, then the interpreter defaults - to using the current locale settings, *unless* the current locale is - identified as a legacy ASCII-based locale - (as described for :envvar:`PYTHONCOERCECLOCALE`), and locale coercion is - either disabled or fails. In such legacy locales, the interpreter will - default to enabling UTF-8 mode unless explicitly instructed not to do so. - - Also available as the :option:`-X` ``utf8`` option. - .. versionadded:: 3.7 - See :pep:`540` for more details. Debug-mode variables diff --git a/Doc/using/windows.rst b/Doc/using/windows.rst index 275495b..78c1e03 100644 --- a/Doc/using/windows.rst +++ b/Doc/using/windows.rst @@ -614,21 +614,14 @@ Page). Python uses it for the default encoding of text files (e.g. This may cause issues because UTF-8 is widely used on the internet and most Unix systems, including WSL (Windows Subsystem for Linux). -You can use UTF-8 mode to change the default text encoding to UTF-8. -You can enable UTF-8 mode via the ``-X utf8`` command line option, or -the ``PYTHONUTF8=1`` environment variable. See :envvar:`PYTHONUTF8` for -enabling UTF-8 mode, and :ref:`setting-envvars` for how to modify -environment variables. - -When UTF-8 mode is enabled: - -* :func:`locale.getpreferredencoding` returns ``'UTF-8'`` instead of - the system encoding. This function is used for the default text - encoding in many places, including :func:`open`, :class:`Popen`, - :meth:`Path.read_text`, etc. -* :data:`sys.stdin`, :data:`sys.stdout`, and :data:`sys.stderr` - all use UTF-8 as their text encoding. -* You can still use the system encoding via the "mbcs" codec. +You can use the :ref:`Python UTF-8 Mode ` to change the default text +encoding to UTF-8. You can enable the :ref:`Python UTF-8 Mode ` via +the ``-X utf8`` command line option, or the ``PYTHONUTF8=1`` environment +variable. See :envvar:`PYTHONUTF8` for enabling UTF-8 mode, and +:ref:`setting-envvars` for how to modify environment variables. + +When the :ref:`Python UTF-8 Mode ` is enabled, you can still use the +system encoding (the ANSI Code Page) via the "mbcs" codec. Note that adding ``PYTHONUTF8=1`` to the default environment variables will affect all Python 3.7+ applications on your system. @@ -641,7 +634,8 @@ temporarily or use the ``-X utf8`` command line option. on Windows for: * Console I/O including standard I/O (see :pep:`528` for details). - * The filesystem encoding (see :pep:`529` for details). + * The :term:`filesystem encoding ` + (see :pep:`529` for details). .. _launcher: diff --git a/Doc/whatsnew/3.7.rst b/Doc/whatsnew/3.7.rst index 25b1e1e..7590af3 100644 --- a/Doc/whatsnew/3.7.rst +++ b/Doc/whatsnew/3.7.rst @@ -232,7 +232,8 @@ PEP 540: Forced UTF-8 Runtime Mode ----------------------------------- The new :option:`-X` ``utf8`` command line option and :envvar:`PYTHONUTF8` -environment variable can be used to enable the CPython *UTF-8 mode*. +environment variable can be used to enable the :ref:`Python UTF-8 Mode +`. When in UTF-8 mode, CPython ignores the locale settings, and uses the UTF-8 encoding by default. The error handlers for :data:`sys.stdin` and diff --git a/Include/cpython/initconfig.h b/Include/cpython/initconfig.h index dd5ca61..3a38f77 100644 --- a/Include/cpython/initconfig.h +++ b/Include/cpython/initconfig.h @@ -127,273 +127,84 @@ PyAPI_FUNC(void) PyPreConfig_InitIsolatedConfig(PyPreConfig *config); /* --- PyConfig ---------------------------------------------- */ +/* This structure is best documented in the Doc/c-api/init_config.rst file. */ typedef struct { int _config_init; /* _PyConfigInitEnum value */ - int isolated; /* Isolated mode? see PyPreConfig.isolated */ - int use_environment; /* Use environment variables? see PyPreConfig.use_environment */ - int dev_mode; /* Python Development Mode? See PyPreConfig.dev_mode */ - - /* Install signal handlers? Yes by default. */ + int isolated; + int use_environment; + int dev_mode; int install_signal_handlers; - - int use_hash_seed; /* PYTHONHASHSEED=x */ + int use_hash_seed; unsigned long hash_seed; - - /* Enable faulthandler? - Set to 1 by -X faulthandler and PYTHONFAULTHANDLER. -1 means unset. */ int faulthandler; - - /* Enable tracemalloc? - Set by -X tracemalloc=N and PYTHONTRACEMALLOC. -1 means unset */ int tracemalloc; - - int import_time; /* PYTHONPROFILEIMPORTTIME, -X importtime */ - int show_ref_count; /* -X showrefcount */ - int dump_refs; /* PYTHONDUMPREFS */ - int malloc_stats; /* PYTHONMALLOCSTATS */ - - /* Python filesystem encoding and error handler: - sys.getfilesystemencoding() and sys.getfilesystemencodeerrors(). - - The Doc/c-api/init_config.rst documentation explains how Python selects - the filesystem encoding and error handler. - - _PyUnicode_InitEncodings() updates the encoding name to the Python codec - name. For example, "ANSI_X3.4-1968" is replaced with "ascii". It also - sets Py_FileSystemDefaultEncoding to filesystem_encoding and - sets Py_FileSystemDefaultEncodeErrors to filesystem_errors. */ + int import_time; + int show_ref_count; + int dump_refs; + int malloc_stats; wchar_t *filesystem_encoding; wchar_t *filesystem_errors; - - wchar_t *pycache_prefix; /* PYTHONPYCACHEPREFIX, -X pycache_prefix=PATH */ - int parse_argv; /* Parse argv command line arguments? */ - - /* Command line arguments (sys.argv). - - Set parse_argv to 1 to parse argv as Python command line arguments - and then strip Python arguments from argv. - - If argv is empty, an empty string is added to ensure that sys.argv - always exists and is never empty. */ + wchar_t *pycache_prefix; + int parse_argv; PyWideStringList argv; - - /* Program name: - - - If Py_SetProgramName() was called, use its value. - - On macOS, use PYTHONEXECUTABLE environment variable if set. - - If WITH_NEXT_FRAMEWORK macro is defined, use __PYVENV_LAUNCHER__ - environment variable is set. - - Use argv[0] if available and non-empty. - - Use "python" on Windows, or "python3 on other platforms. */ wchar_t *program_name; - - PyWideStringList xoptions; /* Command line -X options */ - - /* Warnings options: lowest to highest priority. warnings.filters - is built in the reverse order (highest to lowest priority). */ + PyWideStringList xoptions; PyWideStringList warnoptions; - - /* If equal to zero, disable the import of the module site and the - site-dependent manipulations of sys.path that it entails. Also disable - these manipulations if site is explicitly imported later (call - site.main() if you want them to be triggered). - - Set to 0 by the -S command line option. If set to -1 (default), it is - set to !Py_NoSiteFlag. */ int site_import; - - /* Bytes warnings: - - * If equal to 1, issue a warning when comparing bytes or bytearray with - str or bytes with int. - * If equal or greater to 2, issue an error. - - Incremented by the -b command line option. If set to -1 (default), inherit - Py_BytesWarningFlag value. */ int bytes_warning; - - /* If greater than 0, enable inspect: when a script is passed as first - argument or the -c option is used, enter interactive mode after - executing the script or the command, even when sys.stdin does not appear - to be a terminal. - - Incremented by the -i command line option. Set to 1 if the PYTHONINSPECT - environment variable is non-empty. If set to -1 (default), inherit - Py_InspectFlag value. */ int inspect; - - /* If greater than 0: enable the interactive mode (REPL). - - Incremented by the -i command line option. If set to -1 (default), - inherit Py_InteractiveFlag value. */ int interactive; - - /* Optimization level. - - Incremented by the -O command line option. Set by the PYTHONOPTIMIZE - environment variable. If set to -1 (default), inherit Py_OptimizeFlag - value. */ int optimization_level; - - /* If greater than 0, enable the debug mode: turn on parser debugging - output (for expert only, depending on compilation options). - - Incremented by the -d command line option. Set by the PYTHONDEBUG - environment variable. If set to -1 (default), inherit Py_DebugFlag - value. */ int parser_debug; - - /* If equal to 0, Python won't try to write ``.pyc`` files on the - import of source modules. - - Set to 0 by the -B command line option and the PYTHONDONTWRITEBYTECODE - environment variable. If set to -1 (default), it is set to - !Py_DontWriteBytecodeFlag. */ int write_bytecode; - - /* If greater than 0, enable the verbose mode: print a message each time a - module is initialized, showing the place (filename or built-in module) - from which it is loaded. - - If greater or equal to 2, print a message for each file that is checked - for when searching for a module. Also provides information on module - cleanup at exit. - - Incremented by the -v option. Set by the PYTHONVERBOSE environment - variable. If set to -1 (default), inherit Py_VerboseFlag value. */ int verbose; - - /* If greater than 0, enable the quiet mode: Don't display the copyright - and version messages even in interactive mode. - - Incremented by the -q option. If set to -1 (default), inherit - Py_QuietFlag value. */ int quiet; - - /* If greater than 0, don't add the user site-packages directory to - sys.path. - - Set to 0 by the -s and -I command line options , and the PYTHONNOUSERSITE - environment variable. If set to -1 (default), it is set to - !Py_NoUserSiteDirectory. */ int user_site_directory; - - /* If non-zero, configure C standard steams (stdio, stdout, - stderr): - - - Set O_BINARY mode on Windows. - - If buffered_stdio is equal to zero, make streams unbuffered. - Otherwise, enable streams buffering if interactive is non-zero. */ int configure_c_stdio; - - /* If equal to 0, enable unbuffered mode: force the stdout and stderr - streams to be unbuffered. - - Set to 0 by the -u option. Set by the PYTHONUNBUFFERED environment - variable. - If set to -1 (default), it is set to !Py_UnbufferedStdioFlag. */ int buffered_stdio; - - /* Encoding of sys.stdin, sys.stdout and sys.stderr. - Value set from PYTHONIOENCODING environment variable and - Py_SetStandardStreamEncoding() function. - See also 'stdio_errors' attribute. */ wchar_t *stdio_encoding; - - /* Error handler of sys.stdin and sys.stdout. - Value set from PYTHONIOENCODING environment variable and - Py_SetStandardStreamEncoding() function. - See also 'stdio_encoding' attribute. */ wchar_t *stdio_errors; - #ifdef MS_WINDOWS - /* If greater than zero, use io.FileIO instead of WindowsConsoleIO for sys - standard streams. - - Set to 1 if the PYTHONLEGACYWINDOWSSTDIO environment variable is set to - a non-empty string. If set to -1 (default), inherit - Py_LegacyWindowsStdioFlag value. - - See PEP 528 for more details. */ int legacy_windows_stdio; #endif - - /* Value of the --check-hash-based-pycs command line option: - - - "default" means the 'check_source' flag in hash-based pycs - determines invalidation - - "always" causes the interpreter to hash the source file for - invalidation regardless of value of 'check_source' bit - - "never" causes the interpreter to always assume hash-based pycs are - valid - - The default value is "default". - - See PEP 552 "Deterministic pycs" for more details. */ wchar_t *check_hash_pycs_mode; + PyWideStringList orig_argv; /* --- Path configuration inputs ------------ */ - - /* If greater than 0, suppress _PyPathConfig_Calculate() warnings on Unix. - The parameter has no effect on Windows. - - If set to -1 (default), inherit !Py_FrozenFlag value. */ int pathconfig_warnings; - - wchar_t *pythonpath_env; /* PYTHONPATH environment variable */ - wchar_t *home; /* PYTHONHOME environment variable, - see also Py_SetPythonHome(). */ + wchar_t *pythonpath_env; + wchar_t *home; /* --- Path configuration outputs ----------- */ - - int module_search_paths_set; /* If non-zero, use module_search_paths */ - PyWideStringList module_search_paths; /* sys.path paths. Computed if - module_search_paths_set is equal - to zero. */ - - wchar_t *executable; /* sys.executable */ - wchar_t *base_executable; /* sys._base_executable */ - wchar_t *prefix; /* sys.prefix */ - wchar_t *base_prefix; /* sys.base_prefix */ - wchar_t *exec_prefix; /* sys.exec_prefix */ - wchar_t *base_exec_prefix; /* sys.base_exec_prefix */ - wchar_t *platlibdir; /* sys.platlibdir */ + int module_search_paths_set; + PyWideStringList module_search_paths; + wchar_t *executable; + wchar_t *base_executable; + wchar_t *prefix; + wchar_t *base_prefix; + wchar_t *exec_prefix; + wchar_t *base_exec_prefix; + wchar_t *platlibdir; /* --- Parameter only used by Py_Main() ---------- */ - - /* Skip the first line of the source ('run_filename' parameter), allowing use of non-Unix forms of - "#!cmd". This is intended for a DOS specific hack only. - - Set by the -x command line option. */ int skip_source_first_line; - - wchar_t *run_command; /* -c command line argument */ - wchar_t *run_module; /* -m command line argument */ - wchar_t *run_filename; /* Trailing command line argument without -c or -m */ + wchar_t *run_command; + wchar_t *run_module; + wchar_t *run_filename; /* --- Private fields ---------------------------- */ - /* Install importlib? If set to 0, importlib is not initialized at all. - Needed by freeze_importlib. */ + // Install importlib? If equals to 0, importlib is not initialized at all. + // Needed by freeze_importlib. int _install_importlib; - /* If equal to 0, stop Python initialization before the "main" phase */ + // If equal to 0, stop Python initialization before the "main" phase. int _init_main; - /* If non-zero, disallow threads, subprocesses, and fork. - Default: 0. */ + // If non-zero, disallow threads, subprocesses, and fork. + // Default: 0. int _isolated_interpreter; - - /* The list of the original command line arguments passed to the Python - executable. - - If 'orig_argv' list is empty and 'argv' is not a list only containing an - empty string, PyConfig_Read() copies 'argv' into 'orig_argv' before - modifying 'argv' (if 'parse_argv is non-zero). - - _PyConfig_Write() initializes Py_GetArgcArgv() to this list. */ - PyWideStringList orig_argv; } PyConfig; PyAPI_FUNC(void) PyConfig_InitPythonConfig(PyConfig *config); diff --git a/Python/initconfig.c b/Python/initconfig.c index d0ff888..7bb28ed 100644 --- a/Python/initconfig.c +++ b/Python/initconfig.c @@ -543,6 +543,7 @@ _Py_SetArgcArgv(Py_ssize_t argc, wchar_t * const *argv) } +// _PyConfig_Write() calls _Py_SetArgcArgv() with PyConfig.orig_argv. void Py_GetArgcArgv(int *argc, wchar_t ***argv) { diff --git a/Python/preconfig.c b/Python/preconfig.c index 149afcd..b8b0c3a 100644 --- a/Python/preconfig.c +++ b/Python/preconfig.c @@ -19,11 +19,6 @@ preconfig_copy(PyPreConfig *config, const PyPreConfig *config2); /* --- File system encoding/errors -------------------------------- */ -/* The filesystem encoding is chosen by config_init_fs_encoding(), - see also initfsencoding(). - - Py_FileSystemDefaultEncoding and Py_FileSystemDefaultEncodeErrors - are encoded to UTF-8. */ const char *Py_FileSystemDefaultEncoding = NULL; int Py_HasFileSystemDefaultEncoding = 0; const char *Py_FileSystemDefaultEncodeErrors = NULL; @@ -44,7 +39,10 @@ _Py_ClearFileSystemEncoding(void) /* Set Py_FileSystemDefaultEncoding and Py_FileSystemDefaultEncodeErrors - global configuration variables. */ + global configuration variables to PyConfig.filesystem_encoding and + PyConfig.filesystem_errors (encoded to UTF-8). + + Function called by _PyUnicode_InitEncodings(). */ int _Py_SetFileSystemEncoding(const char *encoding, const char *errors) { -- cgit v0.12