summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Doc/c-api/unicode.rst9
-rw-r--r--Include/code.h9
-rw-r--r--Include/unicodeobject.h9
-rw-r--r--Misc/NEWS3
-rw-r--r--Objects/codeobject.c13
-rw-r--r--Objects/object.c4
-rw-r--r--Objects/unicodeobject.c40
-rw-r--r--Python/pythonrun.c258
8 files changed, 48 insertions, 297 deletions
diff --git a/Doc/c-api/unicode.rst b/Doc/c-api/unicode.rst
index dac01a4..d9a48d6 100644
--- a/Doc/c-api/unicode.rst
+++ b/Doc/c-api/unicode.rst
@@ -415,7 +415,8 @@ used, passing :c:func:`PyUnicode_FSDecoder` as the conversion function:
Decode a string using :c:data:`Py_FileSystemDefaultEncoding` and the
``'surrogateescape'`` error handler, or ``'strict'`` on Windows.
- If :c:data:`Py_FileSystemDefaultEncoding` is not set, fall back to UTF-8.
+ If :c:data:`Py_FileSystemDefaultEncoding` is not set, fall back to the
+ locale encoding.
.. versionchanged:: 3.2
Use ``'strict'`` error handler on Windows.
@@ -426,7 +427,8 @@ used, passing :c:func:`PyUnicode_FSDecoder` as the conversion function:
Decode a null-terminated string using :c:data:`Py_FileSystemDefaultEncoding`
and the ``'surrogateescape'`` error handler, or ``'strict'`` on Windows.
- If :c:data:`Py_FileSystemDefaultEncoding` is not set, fall back to UTF-8.
+ If :c:data:`Py_FileSystemDefaultEncoding` is not set, fall back to the
+ locale encoding.
Use :c:func:`PyUnicode_DecodeFSDefaultAndSize` if you know the string length.
@@ -440,7 +442,8 @@ used, passing :c:func:`PyUnicode_FSDecoder` as the conversion function:
``'surrogateescape'`` error handler, or ``'strict'`` on Windows, and return
:class:`bytes`.
- If :c:data:`Py_FileSystemDefaultEncoding` is not set, fall back to UTF-8.
+ If :c:data:`Py_FileSystemDefaultEncoding` is not set, fall back to the
+ locale encoding.
.. versionadded:: 3.2
diff --git a/Include/code.h b/Include/code.h
index bdbfaba..11ecc95 100644
--- a/Include/code.h
+++ b/Include/code.h
@@ -72,7 +72,7 @@ PyAPI_DATA(PyTypeObject) PyCode_Type;
PyAPI_FUNC(PyCodeObject *) PyCode_New(
int, int, int, int, int, PyObject *, PyObject *,
PyObject *, PyObject *, PyObject *, PyObject *,
- PyObject *, PyObject *, int, PyObject *);
+ PyObject *, PyObject *, int, PyObject *);
/* same as struct above */
/* Creates a new empty code object with the specified source location. */
@@ -99,13 +99,6 @@ PyAPI_FUNC(int) _PyCode_CheckLineNumber(PyCodeObject* co,
PyAPI_FUNC(PyObject*) PyCode_Optimize(PyObject *code, PyObject* consts,
PyObject *names, PyObject *lineno_obj);
-/* List of weak references to all code objects. The list is used by
- initfsencoding() to redecode code filenames at startup if the filesystem
- encoding changes. At initfsencoding() exit, the list is set to NULL and it
- is no more used. */
-
-extern PyObject *_Py_code_object_list;
-
#ifdef __cplusplus
}
#endif
diff --git a/Include/unicodeobject.h b/Include/unicodeobject.h
index 39a6b2e..f61712b 100644
--- a/Include/unicodeobject.h
+++ b/Include/unicodeobject.h
@@ -1193,7 +1193,8 @@ PyAPI_FUNC(int) PyUnicode_FSDecoder(PyObject*, void*);
/* Decode a null-terminated string using Py_FileSystemDefaultEncoding
and the "surrogateescape" error handler.
- If Py_FileSystemDefaultEncoding is not set, fall back to UTF-8.
+ If Py_FileSystemDefaultEncoding is not set, fall back to the locale
+ encoding.
Use PyUnicode_DecodeFSDefaultAndSize() if the string length is known.
*/
@@ -1205,7 +1206,8 @@ PyAPI_FUNC(PyObject*) PyUnicode_DecodeFSDefault(
/* Decode a string using Py_FileSystemDefaultEncoding
and the "surrogateescape" error handler.
- If Py_FileSystemDefaultEncoding is not set, fall back to UTF-8.
+ If Py_FileSystemDefaultEncoding is not set, fall back to the locale
+ encoding.
*/
PyAPI_FUNC(PyObject*) PyUnicode_DecodeFSDefaultAndSize(
@@ -1216,7 +1218,8 @@ PyAPI_FUNC(PyObject*) PyUnicode_DecodeFSDefaultAndSize(
/* Encode a Unicode object to Py_FileSystemDefaultEncoding with the
"surrogateescape" error handler, and return bytes.
- If Py_FileSystemDefaultEncoding is not set, fall back to UTF-8.
+ If Py_FileSystemDefaultEncoding is not set, fall back to the locale
+ encoding.
*/
PyAPI_FUNC(PyObject*) PyUnicode_EncodeFSDefault(
diff --git a/Misc/NEWS b/Misc/NEWS
index 38e68e3..4e66031 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -10,6 +10,9 @@ What's New in Python 3.2 Beta 1?
Core and Builtins
-----------------
+- Use locale encoding instead of UTF-8 to encode and decode filenames if
+ Py_FileSystemDefaultEncoding is not set.
+
- Issue #10095: fp_setreadl() doesn't reopen the file, reuse instead the file
descriptor.
diff --git a/Objects/codeobject.c b/Objects/codeobject.c
index 470bf56..e24fc8d 100644
--- a/Objects/codeobject.c
+++ b/Objects/codeobject.c
@@ -5,8 +5,6 @@
#define NAME_CHARS \
"0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz"
-PyObject *_Py_code_object_list = NULL;
-
/* all_name_chars(s): true iff all chars in s are valid NAME_CHARS */
static int
@@ -111,17 +109,6 @@ PyCode_New(int argcount, int kwonlyargcount,
co->co_lnotab = lnotab;
co->co_zombieframe = NULL;
co->co_weakreflist = NULL;
-
- if (_Py_code_object_list != NULL) {
- int err;
- PyObject *ref = PyWeakref_NewRef((PyObject*)co, NULL);
- if (ref == NULL)
- goto error;
- err = PyList_Append(_Py_code_object_list, ref);
- Py_DECREF(ref);
- if (err)
- goto error;
- }
}
return co;
diff --git a/Objects/object.c b/Objects/object.c
index e322e53..ff3363f 100644
--- a/Objects/object.c
+++ b/Objects/object.c
@@ -1604,10 +1604,6 @@ _Py_ReadyTypes(void)
if (PyType_Ready(&PyCode_Type) < 0)
Py_FatalError("Can't initialize code type");
- _Py_code_object_list = PyList_New(0);
- if (_Py_code_object_list == NULL)
- Py_FatalError("Can't initialize code type");
-
if (PyType_Ready(&PyFrame_Type) < 0)
Py_FatalError("Can't initialize frame type");
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index a18eeef..98427e3 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -1597,11 +1597,22 @@ PyObject *PyUnicode_EncodeFSDefault(PyObject *unicode)
"surrogateescape");
}
else {
- /* if you change the default encoding, update also
- PyUnicode_DecodeFSDefaultAndSize() and redecode_filenames() */
- return PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(unicode),
- PyUnicode_GET_SIZE(unicode),
- "surrogateescape");
+ /* locale encoding with surrogateescape */
+ wchar_t *wchar;
+ char *bytes;
+ PyObject *bytes_obj;
+
+ wchar = PyUnicode_AsWideCharString(unicode, NULL);
+ if (wchar == NULL)
+ return NULL;
+ bytes = _Py_wchar2char(wchar);
+ PyMem_Free(wchar);
+ if (bytes == NULL)
+ return NULL;
+
+ bytes_obj = PyBytes_FromString(bytes);
+ PyMem_Free(bytes);
+ return bytes_obj;
}
}
@@ -1769,9 +1780,22 @@ PyUnicode_DecodeFSDefaultAndSize(const char *s, Py_ssize_t size)
"surrogateescape");
}
else {
- /* if you change the default encoding, update also
- PyUnicode_EncodeFSDefault() and redecode_filenames() */
- return PyUnicode_DecodeUTF8(s, size, "surrogateescape");
+ /* locale encoding with surrogateescape */
+ wchar_t *wchar;
+ PyObject *unicode;
+
+ if (s[size] != '\0' || size != strlen(s)) {
+ PyErr_SetString(PyExc_TypeError, "embedded NUL character");
+ return NULL;
+ }
+
+ wchar = _Py_char2wchar(s);
+ if (wchar == NULL)
+ return NULL;
+
+ unicode = PyUnicode_FromWideChar(wchar, -1);
+ PyMem_Free(wchar);
+ return unicode;
}
}
diff --git a/Python/pythonrun.c b/Python/pythonrun.c
index 026fcfa..73fef75 100644
--- a/Python/pythonrun.c
+++ b/Python/pythonrun.c
@@ -719,259 +719,6 @@ initmain(void)
}
}
-/* Redecode a filename from the default filesystem encoding (utf-8) to
- 'new_encoding' encoding with 'errors' error handler */
-static PyObject*
-redecode_filename(PyObject *file, const char *new_encoding,
- const char *errors)
-{
- PyObject *file_bytes, *new_file;
-
- file_bytes = PyUnicode_EncodeFSDefault(file);
- if (file_bytes == NULL)
- return NULL;
- new_file = PyUnicode_Decode(
- PyBytes_AsString(file_bytes),
- PyBytes_GET_SIZE(file_bytes),
- new_encoding,
- errors);
- Py_DECREF(file_bytes);
- return new_file;
-}
-
-/* Redecode a path list */
-static int
-redecode_path_list(PyObject *paths,
- const char *new_encoding, const char *errors)
-{
- PyObject *filename, *new_filename;
- Py_ssize_t i, size;
-
- size = PyList_Size(paths);
- for (i=0; i < size; i++) {
- filename = PyList_GetItem(paths, i);
- if (filename == NULL)
- return -1;
-
- new_filename = redecode_filename(filename, new_encoding, errors);
- if (new_filename == NULL)
- return -1;
- if (PyList_SetItem(paths, i, new_filename)) {
- Py_DECREF(new_filename);
- return -1;
- }
- }
- return 0;
-}
-
-/* Redecode __file__ and __path__ attributes of sys.modules */
-static int
-redecode_sys_modules(const char *new_encoding, const char *errors)
-{
- PyInterpreterState *interp;
- PyObject *modules, *values, *file, *new_file, *paths;
- PyObject *iter = NULL, *module = NULL;
-
- interp = PyThreadState_GET()->interp;
- modules = interp->modules;
-
- values = PyObject_CallMethod(modules, "values", "");
- if (values == NULL)
- goto error;
-
- iter = PyObject_GetIter(values);
- Py_DECREF(values);
- if (iter == NULL)
- goto error;
-
- while (1)
- {
- module = PyIter_Next(iter);
- if (module == NULL) {
- if (PyErr_Occurred())
- goto error;
- else
- break;
- }
-
- file = PyModule_GetFilenameObject(module);
- if (file != NULL) {
- new_file = redecode_filename(file, new_encoding, errors);
- Py_DECREF(file);
- if (new_file == NULL)
- goto error;
- if (PyObject_SetAttrString(module, "__file__", new_file)) {
- Py_DECREF(new_file);
- goto error;
- }
- Py_DECREF(new_file);
- }
- else
- PyErr_Clear();
-
- paths = PyObject_GetAttrString(module, "__path__");
- if (paths != NULL) {
- if (redecode_path_list(paths, new_encoding, errors))
- goto error;
- }
- else
- PyErr_Clear();
-
- Py_CLEAR(module);
- }
- Py_CLEAR(iter);
- return 0;
-
-error:
- Py_XDECREF(iter);
- Py_XDECREF(module);
- return -1;
-}
-
-/* Redecode sys.path_importer_cache keys */
-static int
-redecode_sys_path_importer_cache(const char *new_encoding, const char *errors)
-{
- PyObject *path_importer_cache, *items, *item, *path, *importer, *new_path;
- PyObject *new_cache = NULL, *iter = NULL;
-
- path_importer_cache = PySys_GetObject("path_importer_cache");
- if (path_importer_cache == NULL)
- goto error;
-
- items = PyObject_CallMethod(path_importer_cache, "items", "");
- if (items == NULL)
- goto error;
-
- iter = PyObject_GetIter(items);
- Py_DECREF(items);
- if (iter == NULL)
- goto error;
-
- new_cache = PyDict_New();
- if (new_cache == NULL)
- goto error;
-
- while (1)
- {
- item = PyIter_Next(iter);
- if (item == NULL) {
- if (PyErr_Occurred())
- goto error;
- else
- break;
- }
- path = PyTuple_GET_ITEM(item, 0);
- importer = PyTuple_GET_ITEM(item, 1);
-
- new_path = redecode_filename(path, new_encoding, errors);
- if (new_path == NULL)
- goto error;
- if (PyDict_SetItem(new_cache, new_path, importer)) {
- Py_DECREF(new_path);
- goto error;
- }
- Py_DECREF(new_path);
- }
- Py_CLEAR(iter);
- if (PySys_SetObject("path_importer_cache", new_cache))
- goto error;
- Py_CLEAR(new_cache);
- return 0;
-
-error:
- Py_XDECREF(iter);
- Py_XDECREF(new_cache);
- return -1;
-}
-
-/* Redecode co_filename attribute of all code objects */
-static int
-redecode_code_objects(const char *new_encoding, const char *errors)
-{
- Py_ssize_t i, len;
- PyCodeObject *co;
- PyObject *ref, *new_file;
-
- len = Py_SIZE(_Py_code_object_list);
- for (i=0; i < len; i++) {
- ref = PyList_GET_ITEM(_Py_code_object_list, i);
- co = (PyCodeObject *)PyWeakref_GetObject(ref);
- if ((PyObject*)co == Py_None)
- continue;
- if (co == NULL)
- return -1;
-
- new_file = redecode_filename(co->co_filename, new_encoding, errors);
- if (new_file == NULL)
- return -1;
- Py_DECREF(co->co_filename);
- co->co_filename = new_file;
- }
- Py_CLEAR(_Py_code_object_list);
- return 0;
-}
-
-/* Redecode the filenames of all modules (__file__ and __path__ attributes),
- all code objects (co_filename attribute), sys.path, sys.meta_path,
- sys.executable and sys.path_importer_cache (keys) when the filesystem
- encoding changes from the default encoding (utf-8) to new_encoding */
-static int
-redecode_filenames(const char *new_encoding)
-{
- char *errors;
- PyObject *paths, *executable, *new_executable;
-
- /* PyUnicode_DecodeFSDefault() and PyUnicode_EncodeFSDefault() do already
- use utf-8 if Py_FileSystemDefaultEncoding is NULL */
- if (strcmp(new_encoding, "utf-8") == 0)
- return 0;
-
- if (strcmp(new_encoding, "mbcs") != 0)
- errors = "surrogateescape";
- else
- errors = NULL;
-
- /* sys.modules */
- if (redecode_sys_modules(new_encoding, errors))
- return -1;
-
- /* sys.path and sys.meta_path */
- paths = PySys_GetObject("path");
- if (paths != NULL) {
- if (redecode_path_list(paths, new_encoding, errors))
- return -1;
- }
- paths = PySys_GetObject("meta_path");
- if (paths != NULL) {
- if (redecode_path_list(paths, new_encoding, errors))
- return -1;
- }
-
- /* sys.executable */
- executable = PySys_GetObject("executable");
- if (executable == NULL)
- return -1;
- new_executable = redecode_filename(executable, new_encoding, errors);
- if (new_executable == NULL)
- return -1;
- if (PySys_SetObject("executable", new_executable)) {
- Py_DECREF(new_executable);
- return -1;
- }
- Py_DECREF(new_executable);
-
- /* sys.path_importer_cache */
- if (redecode_sys_path_importer_cache(new_encoding, errors))
- return -1;
-
- /* code objects */
- if (redecode_code_objects(new_encoding, errors))
- return -1;
-
- return 0;
-}
-
static void
initfsencoding(void)
{
@@ -987,11 +734,8 @@ initfsencoding(void)
stdin and stdout if these are terminals. */
codeset = get_codeset();
if (codeset != NULL) {
- if (redecode_filenames(codeset))
- Py_FatalError("Py_Initialize: can't redecode filenames");
Py_FileSystemDefaultEncoding = codeset;
Py_HasFileSystemDefaultEncoding = 0;
- Py_CLEAR(_Py_code_object_list);
return;
} else {
fprintf(stderr, "Unable to get the locale encoding:\n");
@@ -1004,8 +748,6 @@ initfsencoding(void)
}
#endif
- Py_CLEAR(_Py_code_object_list);
-
/* the encoding is mbcs, utf-8 or ascii */
codec = _PyCodec_Lookup(Py_FileSystemDefaultEncoding);
if (!codec) {