From 94908bbc1503df830d1d615e7b57744ae1b41079 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Wed, 18 Aug 2010 21:23:25 +0000 Subject: Issue #8622: Add PYTHONFSENCODING environment variable to override the filesystem encoding. initfsencoding() displays also a better error message if get_codeset() failed. --- Doc/using/cmdline.rst | 15 +++++++++--- Doc/whatsnew/3.2.rst | 9 +++++++ Lib/test/test_pep277.py | 2 +- Lib/test/test_sys.py | 32 ++++++++++++++++++------ Misc/NEWS | 3 +++ Modules/main.c | 1 + Python/pythonrun.c | 65 ++++++++++++++++++++++++++++++++----------------- 7 files changed, 93 insertions(+), 34 deletions(-) diff --git a/Doc/using/cmdline.rst b/Doc/using/cmdline.rst index 81d118b..c1130e4 100644 --- a/Doc/using/cmdline.rst +++ b/Doc/using/cmdline.rst @@ -442,11 +442,20 @@ These environment variables influence Python's behavior. import of source modules. +.. envvar:: PYTHONFSENCODING + + If this is set before running the intepreter, it overrides the encoding used + for the filesystem encoding (see :func:`sys.getfilesystemencoding`). + + .. versionadded:: 3.2 + + .. envvar:: PYTHONIOENCODING - Overrides the encoding used for stdin/stdout/stderr, in the syntax - ``encodingname:errorhandler``. The ``:errorhandler`` part is optional and - has the same meaning as in :func:`str.encode`. + If this is set before running the intepreter, it overrides the encoding used + for stdin/stdout/stderr, in the syntax ``encodingname:errorhandler``. The + ``:errorhandler`` part is optional and has the same meaning as in + :func:`str.encode`. For stderr, the ``:errorhandler`` part is ignored; the handler will always be ``'backslashreplace'``. diff --git a/Doc/whatsnew/3.2.rst b/Doc/whatsnew/3.2.rst index a2b7af4..1799b70 100644 --- a/Doc/whatsnew/3.2.rst +++ b/Doc/whatsnew/3.2.rst @@ -232,6 +232,15 @@ Major performance enhancements have been added: * Stub + +Unicode +======= + +The filesystem encoding can be specified by setting the +:envvar:`PYTHONFSENCODING` environment variable before running the intepreter. +The value should be a string in the form ````, e.g. ``utf-8``. + + IDLE ==== diff --git a/Lib/test/test_pep277.py b/Lib/test/test_pep277.py index 60d99db..0699317 100644 --- a/Lib/test/test_pep277.py +++ b/Lib/test/test_pep277.py @@ -43,7 +43,7 @@ if sys.platform != 'darwin': # Is it Unicode-friendly? if not os.path.supports_unicode_filenames: - fsencoding = sys.getfilesystemencoding() or sys.getdefaultencoding() + fsencoding = sys.getfilesystemencoding() try: for name in filenames: name.encode(fsencoding) diff --git a/Lib/test/test_sys.py b/Lib/test/test_sys.py index 44ef5c1..d2f5b85 100644 --- a/Lib/test/test_sys.py +++ b/Lib/test/test_sys.py @@ -863,16 +863,24 @@ class SizeofTest(unittest.TestCase): def test_getfilesystemencoding(self): import codecs - def check_fsencoding(fs_encoding): + def check_fsencoding(fs_encoding, expected=None): self.assertIsNotNone(fs_encoding) if sys.platform == 'darwin': self.assertEqual(fs_encoding, 'utf-8') codecs.lookup(fs_encoding) + if expected: + self.assertEqual(fs_encoding, expected) fs_encoding = sys.getfilesystemencoding() check_fsencoding(fs_encoding) - # Even in C locale + def get_fsencoding(env): + output = subprocess.check_output( + [sys.executable, "-c", + "import sys; print(sys.getfilesystemencoding())"], + env=env) + return output.rstrip().decode('ascii') + try: sys.executable.encode('ascii') except UnicodeEncodeError: @@ -880,14 +888,22 @@ class SizeofTest(unittest.TestCase): # see issue #8611 pass else: + # Even in C locale env = os.environ.copy() env['LANG'] = 'C' - output = subprocess.check_output( - [sys.executable, "-c", - "import sys; print(sys.getfilesystemencoding())"], - env=env) - fs_encoding = output.rstrip().decode('ascii') - check_fsencoding(fs_encoding) + try: + del env['PYTHONFSENCODING'] + except KeyError: + pass + check_fsencoding(get_fsencoding(env), 'ascii') + + # Filesystem encoding is hardcoded on Windows and Mac OS X + if sys.platform not in ('win32', 'darwin'): + for encoding in ('ascii', 'cp850', 'iso8859-1', 'utf-8'): + env = os.environ.copy() + env['PYTHONFSENCODING'] = encoding + check_fsencoding(get_fsencoding(env), encoding) + def test_setfilesystemencoding(self): old = sys.getfilesystemencoding() diff --git a/Misc/NEWS b/Misc/NEWS index 1e7acc6..5b22508 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -12,6 +12,9 @@ What's New in Python 3.2 Alpha 2? Core and Builtins ----------------- +- Issue #8622: Add PYTHONFSENCODING environment variable to override the + filesystem encoding. + - Issue #5127: The C functions that access the Unicode Database now accept and return characters from the full Unicode range, even on narrow unicode builds (Py_UNICODE_TOLOWER, Py_UNICODE_ISDECIMAL, and others). A visible difference diff --git a/Modules/main.c b/Modules/main.c index 3e7e065..d129aba 100644 --- a/Modules/main.c +++ b/Modules/main.c @@ -99,6 +99,7 @@ PYTHONHOME : alternate directory (or %c).\n\ The default module search path uses %s.\n\ PYTHONCASEOK : ignore case in 'import' statements (Windows).\n\ PYTHONIOENCODING: Encoding[:errors] used for stdin/stdout/stderr.\n\ +PYTHONFSENCODING: Encoding used for the filesystem.\n\ "; FILE * diff --git a/Python/pythonrun.c b/Python/pythonrun.c index 76a8eef..fd31974 100644 --- a/Python/pythonrun.c +++ b/Python/pythonrun.c @@ -134,18 +134,13 @@ add_flag(int flag, const char *envs) return flag; } -#if defined(HAVE_LANGINFO_H) && defined(CODESET) static char* -get_codeset(void) +get_codec_name(const char *encoding) { - char* codeset, *name_str; + char *name_utf8, *name_str; PyObject *codec, *name = NULL; - codeset = nl_langinfo(CODESET); - if (!codeset || codeset[0] == '\0') - return NULL; - - codec = _PyCodec_Lookup(codeset); + codec = _PyCodec_Lookup(encoding); if (!codec) goto error; @@ -154,18 +149,34 @@ get_codeset(void) if (!name) goto error; - name_str = _PyUnicode_AsString(name); + name_utf8 = _PyUnicode_AsString(name); if (name == NULL) goto error; - codeset = strdup(name_str); + name_str = strdup(name_utf8); Py_DECREF(name); - return codeset; + if (name_str == NULL) { + PyErr_NoMemory(); + return NULL; + } + return name_str; error: Py_XDECREF(codec); Py_XDECREF(name); return NULL; } + +#if defined(HAVE_LANGINFO_H) && defined(CODESET) +static char* +get_codeset(void) +{ + char* codeset = nl_langinfo(CODESET); + if (!codeset || codeset[0] == '\0') { + PyErr_SetString(PyExc_ValueError, "CODESET is not set or empty"); + return NULL; + } + return get_codec_name(codeset); +} #endif void @@ -706,25 +717,35 @@ initfsencoding(void) { PyObject *codec; #if defined(HAVE_LANGINFO_H) && defined(CODESET) - char *codeset; + char *codeset = NULL; if (Py_FileSystemDefaultEncoding == NULL) { - /* On Unix, set the file system encoding according to the - user's preference, if the CODESET names a well-known - Python codec, and Py_FileSystemDefaultEncoding isn't - initialized by other means. Also set the encoding of - stdin and stdout if these are terminals. */ - codeset = get_codeset(); + const char *env_encoding = Py_GETENV("PYTHONFSENCODING"); + if (env_encoding != NULL) { + codeset = get_codec_name(env_encoding); + if (!codeset) { + fprintf(stderr, "PYTHONFSENCODING is not a valid encoding:\n"); + PyErr_Print(); + } + } + if (!codeset) { + /* On Unix, set the file system encoding according to the + user's preference, if the CODESET names a well-known + Python codec, and Py_FileSystemDefaultEncoding isn't + initialized by other means. Also set the encoding of + stdin and stdout if these are terminals. */ + codeset = get_codeset(); + } if (codeset != NULL) { Py_FileSystemDefaultEncoding = codeset; Py_HasFileSystemDefaultEncoding = 0; return; + } else { + fprintf(stderr, "Unable to get the locale encoding:\n"); + PyErr_Print(); } - PyErr_Clear(); - fprintf(stderr, - "Unable to get the locale encoding: " - "fallback to utf-8\n"); + fprintf(stderr, "Unable to get the filesystem encoding: fallback to utf-8\n"); Py_FileSystemDefaultEncoding = "utf-8"; Py_HasFileSystemDefaultEncoding = 1; } -- cgit v0.12