From b744ba1d14c5487576c95d0311e357b707600b47 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Sat, 15 May 2010 12:27:16 +0000 Subject: Issue #8610: Load file system codec at startup, and display a fatal error on failure. Set the file system encoding to utf-8 (instead of None) if getting the locale encoding failed, or if nl_langinfo(CODESET) function is missing. --- Doc/library/sys.rst | 12 +++++----- Misc/NEWS | 4 ++++ Python/bltinmodule.c | 11 ++++++++-- Python/pythonrun.c | 62 +++++++++++++++++++++++++++++++++++----------------- 4 files changed, 62 insertions(+), 27 deletions(-) diff --git a/Doc/library/sys.rst b/Doc/library/sys.rst index cf15d8f..3b9bbb0 100644 --- a/Doc/library/sys.rst +++ b/Doc/library/sys.rst @@ -298,15 +298,13 @@ always available. .. function:: getfilesystemencoding() - Return the name of the encoding used to convert Unicode filenames into system - file names, or ``None`` if the system default encoding is used. The result value - depends on the operating system: + Return the name of the encoding used to convert Unicode filenames into + system file names. The result value depends on the operating system: * On Mac OS X, the encoding is ``'utf-8'``. * On Unix, the encoding is the user's preference according to the result of - nl_langinfo(CODESET), or ``None`` if the ``nl_langinfo(CODESET)`` - failed. + nl_langinfo(CODESET), or ``'utf-8'`` if ``nl_langinfo(CODESET)`` failed. * On Windows NT+, file names are Unicode natively, so no conversion is performed. :func:`getfilesystemencoding` still returns ``'mbcs'``, as @@ -316,6 +314,10 @@ always available. * On Windows 9x, the encoding is ``'mbcs'``. + .. versionchanged:: 3.2 + On Unix, use ``'utf-8'`` instead of ``None`` if ``nl_langinfo(CODESET)`` + failed. :func:`getfilesystemencoding` result cannot be ``None``. + .. function:: getrefcount(object) diff --git a/Misc/NEWS b/Misc/NEWS index ad25de5..8b72124 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -12,6 +12,10 @@ What's New in Python 3.2 Alpha 1? Core and Builtins ----------------- +- Issue #8610: Load file system codec at startup, and display a fatal error on + failure. Set the file system encoding to utf-8 (instead of None) if getting + the locale encoding failed, or if nl_langinfo(CODESET) function is missing. + - PyFile_FromFd() uses PyUnicode_DecodeFSDefault() instead of PyUnicode_FromString() to support surrogates in the filename and use the right encoding diff --git a/Python/bltinmodule.c b/Python/bltinmodule.c index 97f7b96..a658f9b 100644 --- a/Python/bltinmodule.c +++ b/Python/bltinmodule.c @@ -9,6 +9,10 @@ #include +#ifdef HAVE_LANGINFO_H +#include /* CODESET */ +#endif + /* The default encoding used by the platform file system APIs Can remain NULL for all platforms that don't have such a concept @@ -21,9 +25,12 @@ int Py_HasFileSystemDefaultEncoding = 1; #elif defined(__APPLE__) const char *Py_FileSystemDefaultEncoding = "utf-8"; int Py_HasFileSystemDefaultEncoding = 1; -#else -const char *Py_FileSystemDefaultEncoding = NULL; /* use default */ +#elif defined(HAVE_LANGINFO_H) && defined(CODESET) +const char *Py_FileSystemDefaultEncoding = NULL; /* set by initfsencoding() */ int Py_HasFileSystemDefaultEncoding = 0; +#else +const char *Py_FileSystemDefaultEncoding = "utf-8"; +int Py_HasFileSystemDefaultEncoding = 1; #endif int diff --git a/Python/pythonrun.c b/Python/pythonrun.c index 3031aef..4932c4a 100644 --- a/Python/pythonrun.c +++ b/Python/pythonrun.c @@ -57,6 +57,7 @@ extern grammar _PyParser_Grammar; /* From graminit.c */ /* Forward */ static void initmain(void); +static void initfsencoding(void); static void initsite(void); static int initstdio(void); static void flush_io(void); @@ -159,7 +160,6 @@ get_codeset(void) error: Py_XDECREF(codec); - PyErr_Clear(); return NULL; } #endif @@ -171,9 +171,6 @@ Py_InitializeEx(int install_sigs) PyThreadState *tstate; PyObject *bimod, *sysmod, *pstderr; char *p; -#if defined(HAVE_LANGINFO_H) && defined(CODESET) - char *codeset; -#endif extern void _Py_ReadyTypes(void); if (initialized) @@ -264,21 +261,7 @@ Py_InitializeEx(int install_sigs) _PyImportHooks_Init(); -#if defined(HAVE_LANGINFO_H) && defined(CODESET) - /* On Unix, set the file system encoding according to the - user's preference, if the CODESET names a well-known - Python codec, and Py_FileSystemDefaultEncoding isn't - initialized by other means. Also set the encoding of - stdin and stdout if these are terminals. */ - - codeset = get_codeset(); - if (codeset) { - if (!Py_FileSystemDefaultEncoding) - Py_FileSystemDefaultEncoding = codeset; - else - free(codeset); - } -#endif + initfsencoding(); if (install_sigs) initsigs(); /* Signal handling stuff, including initintr() */ @@ -496,7 +479,7 @@ Py_Finalize(void) _PyUnicode_Fini(); /* reset file system default encoding */ - if (!Py_HasFileSystemDefaultEncoding) { + if (!Py_HasFileSystemDefaultEncoding && Py_FileSystemDefaultEncoding) { free((char*)Py_FileSystemDefaultEncoding); Py_FileSystemDefaultEncoding = NULL; } @@ -707,6 +690,45 @@ initmain(void) } } +static void +initfsencoding(void) +{ + PyObject *codec; +#if defined(HAVE_LANGINFO_H) && defined(CODESET) + char *codeset; + + /* On Unix, set the file system encoding according to the + user's preference, if the CODESET names a well-known + Python codec, and Py_FileSystemDefaultEncoding isn't + initialized by other means. Also set the encoding of + stdin and stdout if these are terminals. */ + codeset = get_codeset(); + if (codeset != NULL) { + Py_FileSystemDefaultEncoding = codeset; + Py_HasFileSystemDefaultEncoding = 0; + return; + } + + PyErr_Clear(); + fprintf(stderr, + "Unable to get the locale encoding: " + "fallback to utf-8\n"); + Py_FileSystemDefaultEncoding = "utf-8"; + Py_HasFileSystemDefaultEncoding = 1; +#endif + + /* the encoding is mbcs, utf-8 or ascii */ + codec = _PyCodec_Lookup(Py_FileSystemDefaultEncoding); + if (!codec) { + /* Such error can only occurs in critical situations: no more + * memory, import a module of the standard library failed, + * etc. */ + Py_FatalError("Py_Initialize: unable to load the file system codec"); + } else { + Py_DECREF(codec); + } +} + /* Import the site module (not into __main__ though) */ static void -- cgit v0.12