summaryrefslogtreecommitdiffstats
path: root/Python
diff options
context:
space:
mode:
authorVictor Stinner <vstinner@redhat.com>2018-08-29 11:25:36 (GMT)
committerGitHub <noreply@github.com>2018-08-29 11:25:36 (GMT)
commitb2457efc78b74a1d6d1b77d11a939e886b8a4e2c (patch)
treeb715b8061d730f07584d13e4475660d61fd261f5 /Python
parentdfe0dc74536dfb6f331131d9b2b49557675bb6b7 (diff)
downloadcpython-b2457efc78b74a1d6d1b77d11a939e886b8a4e2c.zip
cpython-b2457efc78b74a1d6d1b77d11a939e886b8a4e2c.tar.gz
cpython-b2457efc78b74a1d6d1b77d11a939e886b8a4e2c.tar.bz2
bpo-34523: Add _PyCoreConfig.filesystem_encoding (GH-8963)
_PyCoreConfig_Read() is now responsible to choose the filesystem encoding and error handler. Using Py_Main(), the encoding is now chosen even before calling Py_Initialize(). _PyCoreConfig.filesystem_encoding is now the reference, instead of Py_FileSystemDefaultEncoding, for the Python filesystem encoding. Changes: * Add filesystem_encoding and filesystem_errors to _PyCoreConfig * _PyCoreConfig_Read() now reads the locale encoding for the file system encoding. * PyUnicode_EncodeFSDefault() and PyUnicode_DecodeFSDefaultAndSize() now use the interpreter configuration rather than Py_FileSystemDefaultEncoding and Py_FileSystemDefaultEncodeErrors global configuration variables. * Add _Py_SetFileSystemEncoding() and _Py_ClearFileSystemEncoding() private functions to only modify Py_FileSystemDefaultEncoding and Py_FileSystemDefaultEncodeErrors in coreconfig.c. * _Py_CoerceLegacyLocale() now takes an int rather than _PyCoreConfig for the warning.
Diffstat (limited to 'Python')
-rw-r--r--Python/coreconfig.c145
-rw-r--r--Python/pylifecycle.c80
-rw-r--r--Python/sysmodule.c42
3 files changed, 196 insertions, 71 deletions
diff --git a/Python/coreconfig.c b/Python/coreconfig.c
index 00037d9..0ec4640 100644
--- a/Python/coreconfig.c
+++ b/Python/coreconfig.c
@@ -5,6 +5,11 @@
# include <langinfo.h>
#endif
+#include <locale.h> /* setlocale() */
+#ifdef HAVE_LANGINFO_H
+#include <langinfo.h> /* nl_langinfo(CODESET) */
+#endif
+
#define DECODE_LOCALE_ERR(NAME, LEN) \
(((LEN) == -2) \
@@ -32,6 +37,8 @@ const char *Py_FileSystemDefaultEncoding = NULL; /* set by initfsencoding() */
int Py_HasFileSystemDefaultEncoding = 0;
#endif
const char *Py_FileSystemDefaultEncodeErrors = "surrogateescape";
+static int _Py_HasFileSystemDefaultEncodeErrors = 1;
+
/* UTF-8 mode (PEP 540): if equals to 1, use the UTF-8 encoding, and change
stdin and stdout error handler to "surrogateescape". It is equal to
-1 by default: unknown, will be set by Py_Main() */
@@ -88,6 +95,47 @@ _Py_wstrlist_copy(int len, wchar_t **list)
}
+void
+_Py_ClearFileSystemEncoding(void)
+{
+ if (!Py_HasFileSystemDefaultEncoding && Py_FileSystemDefaultEncoding) {
+ PyMem_RawFree((char*)Py_FileSystemDefaultEncoding);
+ Py_FileSystemDefaultEncoding = NULL;
+ }
+ if (!_Py_HasFileSystemDefaultEncodeErrors && Py_FileSystemDefaultEncodeErrors) {
+ PyMem_RawFree((char*)Py_FileSystemDefaultEncodeErrors);
+ Py_FileSystemDefaultEncodeErrors = NULL;
+ }
+}
+
+
+/* Set Py_FileSystemDefaultEncoding and Py_FileSystemDefaultEncodeErrors
+ global configuration variables. */
+int
+_Py_SetFileSystemEncoding(const char *encoding, const char *errors)
+{
+ char *encoding2 = _PyMem_RawStrdup(encoding);
+ if (encoding2 == NULL) {
+ return -1;
+ }
+
+ char *errors2 = _PyMem_RawStrdup(errors);
+ if (errors2 == NULL) {
+ PyMem_RawFree(encoding2);
+ return -1;
+ }
+
+ _Py_ClearFileSystemEncoding();
+
+ Py_FileSystemDefaultEncoding = encoding2;
+ Py_HasFileSystemDefaultEncoding = 0;
+
+ Py_FileSystemDefaultEncodeErrors = errors2;
+ _Py_HasFileSystemDefaultEncodeErrors = 0;
+ return 0;
+}
+
+
/* Helper to allow an embedding application to override the normal
* mechanism that attempts to figure out an appropriate IO encoding
*/
@@ -209,6 +257,8 @@ _PyCoreConfig_Clear(_PyCoreConfig *config)
#endif
CLEAR(config->base_exec_prefix);
+ CLEAR(config->filesystem_encoding);
+ CLEAR(config->filesystem_errors);
CLEAR(config->stdio_encoding);
CLEAR(config->stdio_errors);
#undef CLEAR
@@ -302,6 +352,8 @@ _PyCoreConfig_Copy(_PyCoreConfig *config, const _PyCoreConfig *config2)
COPY_ATTR(quiet);
COPY_ATTR(user_site_directory);
COPY_ATTR(buffered_stdio);
+ COPY_STR_ATTR(filesystem_encoding);
+ COPY_STR_ATTR(filesystem_errors);
COPY_STR_ATTR(stdio_encoding);
COPY_STR_ATTR(stdio_errors);
#ifdef MS_WINDOWS
@@ -312,6 +364,7 @@ _PyCoreConfig_Copy(_PyCoreConfig *config, const _PyCoreConfig *config2)
COPY_ATTR(_frozen);
#undef COPY_ATTR
+#undef COPY_STR_ATTR
#undef COPY_WSTR_ATTR
#undef COPY_WSTRLIST
return 0;
@@ -976,8 +1029,8 @@ get_stdio_errors(const _PyCoreConfig *config)
}
-_PyInitError
-_Py_get_locale_encoding(char **locale_encoding)
+static _PyInitError
+get_locale_encoding(char **locale_encoding)
{
#ifdef MS_WINDOWS
char encoding[20];
@@ -1087,7 +1140,7 @@ config_init_stdio_encoding(_PyCoreConfig *config)
/* Choose the default error handler based on the current locale. */
if (config->stdio_encoding == NULL) {
- _PyInitError err = _Py_get_locale_encoding(&config->stdio_encoding);
+ _PyInitError err = get_locale_encoding(&config->stdio_encoding);
if (_Py_INIT_FAILED(err)) {
return err;
}
@@ -1104,6 +1157,81 @@ config_init_stdio_encoding(_PyCoreConfig *config)
}
+static _PyInitError
+config_init_fs_encoding(_PyCoreConfig *config)
+{
+#ifdef MS_WINDOWS
+ if (config->legacy_windows_fs_encoding) {
+ /* Legacy Windows filesystem encoding: mbcs/replace */
+ if (config->filesystem_encoding == NULL) {
+ config->filesystem_encoding = _PyMem_RawStrdup("mbcs");
+ if (config->filesystem_encoding == NULL) {
+ return _Py_INIT_NO_MEMORY();
+ }
+ }
+ if (config->filesystem_errors == NULL) {
+ config->filesystem_errors = _PyMem_RawStrdup("replace");
+ if (config->filesystem_errors == NULL) {
+ return _Py_INIT_NO_MEMORY();
+ }
+ }
+ }
+
+ /* Windows defaults to utf-8/surrogatepass (PEP 529) */
+ if (config->filesystem_encoding == NULL) {
+ config->filesystem_encoding = _PyMem_RawStrdup("utf-8");
+ if (config->filesystem_encoding == NULL) {
+ return _Py_INIT_NO_MEMORY();
+ }
+ }
+ if (config->filesystem_errors == NULL) {
+ config->filesystem_errors = _PyMem_RawStrdup("surrogatepass");
+ if (config->filesystem_errors == NULL) {
+ return _Py_INIT_NO_MEMORY();
+ }
+ }
+#else
+ if (config->utf8_mode) {
+ /* UTF-8 Mode use: utf-8/surrogateescape */
+ if (config->filesystem_encoding == NULL) {
+ config->filesystem_encoding = _PyMem_RawStrdup("utf-8");
+ if (config->filesystem_encoding == NULL) {
+ return _Py_INIT_NO_MEMORY();
+ }
+ }
+ /* errors defaults to surrogateescape above */
+ }
+
+ if (config->filesystem_encoding == NULL) {
+ /* macOS and Android use UTF-8, other platforms use
+ the locale encoding. */
+ char *locale_encoding;
+#if defined(__APPLE__) || defined(__ANDROID__)
+ locale_encoding = "UTF-8";
+#else
+ _PyInitError err = get_locale_encoding(&locale_encoding);
+ if (_Py_INIT_FAILED(err)) {
+ return err;
+ }
+#endif
+ config->filesystem_encoding = _PyMem_RawStrdup(locale_encoding);
+ if (config->filesystem_encoding == NULL) {
+ return _Py_INIT_NO_MEMORY();
+ }
+ }
+
+ if (config->filesystem_errors == NULL) {
+ /* by default, use the "surrogateescape" error handler */
+ config->filesystem_errors = _PyMem_RawStrdup("surrogateescape");
+ if (config->filesystem_errors == NULL) {
+ return _Py_INIT_NO_MEMORY();
+ }
+ }
+#endif
+ return _Py_INIT_OK();
+}
+
+
/* Read configuration settings from standard locations
*
* This function doesn't make any changes to the interpreter state - it
@@ -1216,6 +1344,13 @@ _PyCoreConfig_Read(_PyCoreConfig *config)
config->argc = 0;
}
+ if (config->filesystem_encoding == NULL && config->filesystem_errors == NULL) {
+ err = config_init_fs_encoding(config);
+ if (_Py_INIT_FAILED(err)) {
+ return err;
+ }
+ }
+
err = config_init_stdio_encoding(config);
if (_Py_INIT_FAILED(err)) {
return err;
@@ -1223,6 +1358,10 @@ _PyCoreConfig_Read(_PyCoreConfig *config)
assert(config->coerce_c_locale >= 0);
assert(config->use_environment >= 0);
+ assert(config->filesystem_encoding != NULL);
+ assert(config->filesystem_errors != NULL);
+ assert(config->stdio_encoding != NULL);
+ assert(config->stdio_errors != NULL);
return _Py_INIT_OK();
}
diff --git a/Python/pylifecycle.c b/Python/pylifecycle.c
index 9f6757f..6d97f2f 100644
--- a/Python/pylifecycle.c
+++ b/Python/pylifecycle.c
@@ -339,7 +339,7 @@ static const char C_LOCALE_COERCION_WARNING[] =
"or PYTHONCOERCECLOCALE=0 to disable this locale coercion behavior).\n";
static void
-_coerce_default_locale_settings(const _PyCoreConfig *config, const _LocaleCoercionTarget *target)
+_coerce_default_locale_settings(int warn, const _LocaleCoercionTarget *target)
{
const char *newloc = target->locale_name;
@@ -352,7 +352,7 @@ _coerce_default_locale_settings(const _PyCoreConfig *config, const _LocaleCoerci
"Error setting LC_CTYPE, skipping C locale coercion\n");
return;
}
- if (config->coerce_c_locale_warn) {
+ if (warn) {
fprintf(stderr, C_LOCALE_COERCION_WARNING, newloc);
}
@@ -362,7 +362,7 @@ _coerce_default_locale_settings(const _PyCoreConfig *config, const _LocaleCoerci
#endif
void
-_Py_CoerceLegacyLocale(const _PyCoreConfig *config)
+_Py_CoerceLegacyLocale(int warn)
{
#ifdef PY_COERCE_C_LOCALE
const char *locale_override = getenv("LC_ALL");
@@ -385,7 +385,7 @@ defined(HAVE_LANGINFO_H) && defined(CODESET)
}
#endif
/* Successfully configured locale, so make it the default */
- _coerce_default_locale_settings(config, target);
+ _coerce_default_locale_settings(warn, target);
return;
}
}
@@ -1162,11 +1162,7 @@ Py_FinalizeEx(void)
/* Cleanup Unicode implementation */
_PyUnicode_Fini();
- /* reset file system default encoding */
- if (!Py_HasFileSystemDefaultEncoding && Py_FileSystemDefaultEncoding) {
- PyMem_RawFree((char*)Py_FileSystemDefaultEncoding);
- Py_FileSystemDefaultEncoding = NULL;
- }
+ _Py_ClearFileSystemEncoding();
/* XXX Still allocated:
- various static ad-hoc pointers to interned strings
@@ -1475,59 +1471,31 @@ add_main_module(PyInterpreterState *interp)
static _PyInitError
initfsencoding(PyInterpreterState *interp)
{
- PyObject *codec;
+ _PyCoreConfig *config = &interp->core_config;
-#ifdef MS_WINDOWS
- if (Py_LegacyWindowsFSEncodingFlag) {
- Py_FileSystemDefaultEncoding = "mbcs";
- Py_FileSystemDefaultEncodeErrors = "replace";
- }
- else {
- Py_FileSystemDefaultEncoding = "utf-8";
- Py_FileSystemDefaultEncodeErrors = "surrogatepass";
+ char *encoding = get_codec_name(config->filesystem_encoding);
+ if (encoding == NULL) {
+ /* Such error can only occurs in critical situations: no more
+ memory, import a module of the standard library failed, etc. */
+ return _Py_INIT_ERR("failed to get the Python codec "
+ "of the filesystem encoding");
}
-#else
- if (Py_FileSystemDefaultEncoding == NULL) {
- if (interp->core_config.utf8_mode) {
- Py_FileSystemDefaultEncoding = "utf-8";
- Py_HasFileSystemDefaultEncoding = 1;
- }
- else if (_Py_GetForceASCII()) {
- Py_FileSystemDefaultEncoding = "ascii";
- Py_HasFileSystemDefaultEncoding = 1;
- }
- else {
- extern _PyInitError _Py_get_locale_encoding(char **locale_encoding);
- char *locale_encoding;
- _PyInitError err = _Py_get_locale_encoding(&locale_encoding);
- if (_Py_INIT_FAILED(err)) {
- return err;
- }
-
- Py_FileSystemDefaultEncoding = get_codec_name(locale_encoding);
- PyMem_RawFree(locale_encoding);
- if (Py_FileSystemDefaultEncoding == NULL) {
- return _Py_INIT_ERR("failed to get the Python codec "
- "of the locale encoding");
- }
+ /* Update the filesystem encoding to the normalized Python codec name.
+ For example, replace "ANSI_X3.4-1968" (locale encoding) with "ascii"
+ (Python codec name). */
+ PyMem_RawFree(config->filesystem_encoding);
+ config->filesystem_encoding = encoding;
- Py_HasFileSystemDefaultEncoding = 0;
- interp->fscodec_initialized = 1;
- return _Py_INIT_OK();
- }
+ /* Set Py_FileSystemDefaultEncoding and Py_FileSystemDefaultEncodeErrors
+ global configuration variables. */
+ if (_Py_SetFileSystemEncoding(config->filesystem_encoding,
+ config->filesystem_errors) < 0) {
+ return _Py_INIT_NO_MEMORY();
}
-#endif
- /* the encoding is mbcs, utf-8 or ascii */
- codec = _PyCodec_Lookup(Py_FileSystemDefaultEncoding);
- if (!codec) {
- /* Such error can only occurs in critical situations: no more
- * memory, import a module of the standard library failed,
- * etc. */
- return _Py_INIT_ERR("unable to load the file system codec");
- }
- Py_DECREF(codec);
+ /* PyUnicode can now use the Python codec rather than C implementation
+ for the filesystem encoding */
interp->fscodec_initialized = 1;
return _Py_INIT_OK();
}
diff --git a/Python/sysmodule.c b/Python/sysmodule.c
index 177b830..91df4b0 100644
--- a/Python/sysmodule.c
+++ b/Python/sysmodule.c
@@ -389,11 +389,9 @@ implementation."
static PyObject *
sys_getfilesystemencoding(PyObject *self, PyObject *Py_UNUSED(ignored))
{
- if (Py_FileSystemDefaultEncoding)
- return PyUnicode_FromString(Py_FileSystemDefaultEncoding);
- PyErr_SetString(PyExc_RuntimeError,
- "filesystem encoding is not initialized");
- return NULL;
+ PyInterpreterState *interp = _PyInterpreterState_GET_UNSAFE();
+ const _PyCoreConfig *config = &interp->core_config;
+ return PyUnicode_FromString(config->filesystem_encoding);
}
PyDoc_STRVAR(getfilesystemencoding_doc,
@@ -406,11 +404,9 @@ operating system filenames."
static PyObject *
sys_getfilesystemencodeerrors(PyObject *self, PyObject *Py_UNUSED(ignored))
{
- if (Py_FileSystemDefaultEncodeErrors)
- return PyUnicode_FromString(Py_FileSystemDefaultEncodeErrors);
- PyErr_SetString(PyExc_RuntimeError,
- "filesystem encoding is not initialized");
- return NULL;
+ PyInterpreterState *interp = _PyInterpreterState_GET_UNSAFE();
+ const _PyCoreConfig *config = &interp->core_config;
+ return PyUnicode_FromString(config->filesystem_errors);
}
PyDoc_STRVAR(getfilesystemencodeerrors_doc,
@@ -1150,8 +1146,30 @@ environment variable before launching Python."
static PyObject *
sys_enablelegacywindowsfsencoding(PyObject *self)
{
- Py_FileSystemDefaultEncoding = "mbcs";
- Py_FileSystemDefaultEncodeErrors = "replace";
+ PyInterpreterState *interp = _PyInterpreterState_GET_UNSAFE();
+ _PyCoreConfig *config = &interp->core_config;
+
+ /* Set the filesystem encoding to mbcs/replace (PEP 529) */
+ char *encoding = _PyMem_RawStrdup("mbcs");
+ char *errors = _PyMem_RawStrdup("replace");
+ if (encoding == NULL || errors == NULL) {
+ PyMem_Free(encoding);
+ PyMem_Free(errors);
+ PyErr_NoMemory();
+ return NULL;
+ }
+
+ PyMem_RawFree(config->filesystem_encoding);
+ config->filesystem_encoding = encoding;
+ PyMem_RawFree(config->filesystem_errors);
+ config->filesystem_errors = errors;
+
+ if (_Py_SetFileSystemEncoding(config->filesystem_encoding,
+ config->filesystem_errors) < 0) {
+ PyErr_NoMemory();
+ return NULL;
+ }
+
Py_RETURN_NONE;
}