diff options
author | Guido van Rossum <guido@python.org> | 2007-10-15 02:52:41 (GMT) |
---|---|---|
committer | Guido van Rossum <guido@python.org> | 2007-10-15 02:52:41 (GMT) |
commit | 00bc0e0a2d0b6c403a3c6ab96fa7d3398b5c751e (patch) | |
tree | 34fda27260f18f813912d83a2cf060264a736190 | |
parent | cdadf242ba32f1b3ef55e74d2eeb021e62da8041 (diff) | |
download | cpython-00bc0e0a2d0b6c403a3c6ab96fa7d3398b5c751e.zip cpython-00bc0e0a2d0b6c403a3c6ab96fa7d3398b5c751e.tar.gz cpython-00bc0e0a2d0b6c403a3c6ab96fa7d3398b5c751e.tar.bz2 |
Patch #1272, by Christian Heimes and Alexandre Vassalotti.
Changes to make __file__ a proper Unicode object, using the default
filesystem encoding.
This is a bit tricky because the default filesystem encoding isn't
set by the time we import the first modules; at that point we fudge
things a bit. This is okay since __file__ isn't really used much
except for error reporting.
Tested on OSX and Linux only so far.
-rw-r--r-- | Include/code.h | 4 | ||||
-rw-r--r-- | Include/unicodeobject.h | 16 | ||||
-rw-r--r-- | Misc/ACKS | 2 | ||||
-rw-r--r-- | Modules/_ctypes/callbacks.c | 4 | ||||
-rw-r--r-- | Modules/posixmodule.c | 4 | ||||
-rw-r--r-- | Modules/pyexpat.c | 4 | ||||
-rw-r--r-- | Objects/codeobject.c | 19 | ||||
-rw-r--r-- | Objects/moduleobject.c | 4 | ||||
-rw-r--r-- | Objects/unicodeobject.c | 35 | ||||
-rw-r--r-- | Python/bltinmodule.c | 3 | ||||
-rw-r--r-- | Python/ceval.c | 16 | ||||
-rw-r--r-- | Python/compile.c | 4 | ||||
-rw-r--r-- | Python/frozen.c | 2 | ||||
-rw-r--r-- | Python/import.c | 9 | ||||
-rw-r--r-- | Python/importdl.c | 4 | ||||
-rw-r--r-- | Python/pythonrun.c | 3 | ||||
-rw-r--r-- | Python/traceback.c | 4 |
17 files changed, 96 insertions, 41 deletions
diff --git a/Include/code.h b/Include/code.h index 2bd6c5b..3f3df49 100644 --- a/Include/code.h +++ b/Include/code.h @@ -21,8 +21,8 @@ typedef struct { PyObject *co_freevars; /* tuple of strings (free variable names) */ PyObject *co_cellvars; /* tuple of strings (cell variable names) */ /* The rest doesn't count for hash/cmp */ - PyObject *co_filename; /* string (where it was loaded from) */ - PyObject *co_name; /* string (name, for reference) */ + PyObject *co_filename; /* unicode (where it was loaded from) */ + PyObject *co_name; /* unicode (name, for reference) */ int co_firstlineno; /* first source line number */ PyObject *co_lnotab; /* string (encoding addr<->lineno mapping) */ void *co_zombieframe; /* for optimization only (see frameobject.c) */ diff --git a/Include/unicodeobject.h b/Include/unicodeobject.h index 4374857..3ef354f 100644 --- a/Include/unicodeobject.h +++ b/Include/unicodeobject.h @@ -154,6 +154,7 @@ typedef PY_UNICODE_TYPE Py_UNICODE; # define PyUnicode_DecodeASCII PyUnicodeUCS2_DecodeASCII # define PyUnicode_DecodeCharmap PyUnicodeUCS2_DecodeCharmap # define PyUnicode_DecodeLatin1 PyUnicodeUCS2_DecodeLatin1 +# define PyUnicode_DecodeFSDefault PyUnicodeUCS2_DecodeFSDefault # define PyUnicode_DecodeRawUnicodeEscape PyUnicodeUCS2_DecodeRawUnicodeEscape # define PyUnicode_DecodeUTF32 PyUnicodeUCS2_DecodeUTF32 # define PyUnicode_DecodeUTF32Stateful PyUnicodeUCS2_DecodeUTF32Stateful @@ -245,6 +246,7 @@ typedef PY_UNICODE_TYPE Py_UNICODE; # define PyUnicode_DecodeASCII PyUnicodeUCS4_DecodeASCII # define PyUnicode_DecodeCharmap PyUnicodeUCS4_DecodeCharmap # define PyUnicode_DecodeLatin1 PyUnicodeUCS4_DecodeLatin1 +# define PyUnicode_DecodeFSDefault PyUnicodeUCS4_DecodeFSDefault # define PyUnicode_DecodeRawUnicodeEscape PyUnicodeUCS4_DecodeRawUnicodeEscape # define PyUnicode_DecodeUTF32 PyUnicodeUCS4_DecodeUTF32 # define PyUnicode_DecodeUTF32Stateful PyUnicodeUCS4_DecodeUTF32Stateful @@ -641,6 +643,20 @@ PyAPI_FUNC(PyObject*) PyUnicode_FromOrdinal(int ordinal); PyAPI_FUNC(PyObject *) _PyUnicode_AsDefaultEncodedString( PyObject *, const char *); +/* Decode a null-terminated string using Py_FileSystemDefaultEncoding. + + If the encoding is supported by one of the built-in codecs (i.e., UTF-8, + UTF-16, UTF-32, Latin-1 or MBCS), otherwise fallback to UTF-8 and replace + invalid characters with '?'. + + The function is intended to be used for paths and file names only + during bootstrapping process where the codecs are not set up. +*/ + +PyAPI_FUNC(PyObject*) PyUnicode_DecodeFSDefault( + const char *s /* encoded string */ + ); + /* Return a char* holding the UTF-8 encoded value of the Unicode object. @@ -273,6 +273,7 @@ Larry Hastings Shane Hathaway Rycharde Hawkes Jochen Hayek +Christian Heimes Thomas Heller Lance Finn Helsten Jonathan Hendry @@ -667,6 +668,7 @@ Michael Urman Hector Urtubia Atul Varma Dmitry Vasiliev +Alexandre Vassalotti Frank Vercruesse Mike Verdone Jaap Vermeulen diff --git a/Modules/_ctypes/callbacks.c b/Modules/_ctypes/callbacks.c index a1a0e0d..385add4 100644 --- a/Modules/_ctypes/callbacks.c +++ b/Modules/_ctypes/callbacks.c @@ -34,9 +34,9 @@ void _AddTraceback(char *funcname, char *filename, int lineno) PyCodeObject *py_code = 0; PyFrameObject *py_frame = 0; - py_srcfile = PyString_FromString(filename); + py_srcfile = PyUnicode_DecodeFSDefault(filename); if (!py_srcfile) goto bad; - py_funcname = PyString_FromString(funcname); + py_funcname = PyUnicode_FromString(funcname); if (!py_funcname) goto bad; py_globals = PyDict_New(); if (!py_globals) goto bad; diff --git a/Modules/posixmodule.c b/Modules/posixmodule.c index 55af338..647ea3e 100644 --- a/Modules/posixmodule.c +++ b/Modules/posixmodule.c @@ -5370,7 +5370,7 @@ posix_tempnam(PyObject *self, PyObject *args) #endif if (name == NULL) return PyErr_NoMemory(); - result = PyString_FromString(name); + result = PyUnicode_DecodeFSDefault(name); free(name); return result; } @@ -5428,7 +5428,7 @@ posix_tmpnam(PyObject *self, PyObject *noargs) Py_XDECREF(err); return NULL; } - return PyString_FromString(buffer); + return PyUnicode_DecodeFSDefault(buffer); } #endif diff --git a/Modules/pyexpat.c b/Modules/pyexpat.c index 6cfb8ec..ae6f143 100644 --- a/Modules/pyexpat.c +++ b/Modules/pyexpat.c @@ -232,13 +232,13 @@ getcode(enum HandlerTypes slot, char* func_name, int lineno) code = PyString_FromString(""); if (code == NULL) goto failed; - name = PyString_FromString(func_name); + name = PyUnicode_FromString(func_name); if (name == NULL) goto failed; nulltuple = PyTuple_New(0); if (nulltuple == NULL) goto failed; - filename = PyString_FromString(__FILE__); + filename = PyUnicode_DecodeFSDefault(__FILE__); handler_info[slot].tb_code = PyCode_New(0, /* argcount */ 0, /* kwonlyargcount */ diff --git a/Objects/codeobject.c b/Objects/codeobject.c index 7bd292a..7aeddcc 100644 --- a/Objects/codeobject.c +++ b/Objects/codeobject.c @@ -50,6 +50,7 @@ PyCode_New(int argcount, int kwonlyargcount, { PyCodeObject *co; Py_ssize_t i; + /* Check argument types */ if (argcount < 0 || nlocals < 0 || code == NULL || @@ -58,20 +59,16 @@ PyCode_New(int argcount, int kwonlyargcount, varnames == NULL || !PyTuple_Check(varnames) || freevars == NULL || !PyTuple_Check(freevars) || cellvars == NULL || !PyTuple_Check(cellvars) || - name == NULL || (!PyString_Check(name) && !PyUnicode_Check(name)) || - filename == NULL || !PyString_Check(filename) || + name == NULL || !PyUnicode_Check(name) || + filename == NULL || !PyUnicode_Check(filename) || lnotab == NULL || !PyString_Check(lnotab) || !PyObject_CheckReadBuffer(code)) { PyErr_BadInternalCall(); return NULL; } - if (PyString_Check(name)) { - name = PyUnicode_FromString(PyString_AS_STRING(name)); - if (name == NULL) - return NULL; - } else { - Py_INCREF(name); - } + Py_INCREF(name); + Py_INCREF(filename); + intern_strings(names); intern_strings(varnames); intern_strings(freevars); @@ -299,8 +296,8 @@ code_repr(PyCodeObject *co) if (co->co_firstlineno != 0) lineno = co->co_firstlineno; - if (co->co_filename && PyString_Check(co->co_filename)) - filename = PyString_AS_STRING(co->co_filename); + if (co->co_filename && PyUnicode_Check(co->co_filename)) + filename = PyUnicode_AsString(co->co_filename); return PyUnicode_FromFormat( "<code object %.100U at %p, file \"%.300s\", line %d>", co->co_name, co, filename, lineno); diff --git a/Objects/moduleobject.c b/Objects/moduleobject.c index fbb9fba..13c1ab4 100644 --- a/Objects/moduleobject.c +++ b/Objects/moduleobject.c @@ -86,12 +86,12 @@ PyModule_GetFilename(PyObject *m) d = ((PyModuleObject *)m)->md_dict; if (d == NULL || (fileobj = PyDict_GetItemString(d, "__file__")) == NULL || - !PyString_Check(fileobj)) + !PyUnicode_Check(fileobj)) { PyErr_SetString(PyExc_SystemError, "module filename missing"); return NULL; } - return PyString_AsString(fileobj); + return PyUnicode_AsString(fileobj); } void diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index def9011..98723db 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -117,7 +117,11 @@ static PyUnicodeObject *unicode_latin1[256]; /* Default encoding to use and assume when NULL is passed as encoding parameter; it is fixed to "utf-8". Always use the - PyUnicode_GetDefaultEncoding() API to access this global. */ + PyUnicode_GetDefaultEncoding() API to access this global. + + Don't forget to alter Py_FileSystemDefaultEncoding() if you change the + hard coded default! +*/ static const char unicode_default_encoding[] = "utf-8"; Py_UNICODE @@ -1231,6 +1235,35 @@ PyObject *_PyUnicode_AsDefaultEncodedString(PyObject *unicode, return v; } +PyObject* +PyUnicode_DecodeFSDefault(const char *s) +{ + Py_ssize_t size = (Py_ssize_t)strlen(s); + + /* During the early bootstrapping process, Py_FileSystemDefaultEncoding + can be undefined. If it is case, decode using UTF-8. The following assumes + that Py_FileSystemDefaultEncoding is set to a built-in encoding during the + bootstrapping process where the codecs aren't ready yet. + */ + if (Py_FileSystemDefaultEncoding) { +#if defined(MS_WINDOWS) && defined(HAVE_USABLE_WCHAR_T) + if (strcmp(Py_FileSystemDefaultEncoding, "mbcs")) { + return PyUnicode_DecodeMBCS(s, size, "replace"); + } +#elif defined(__APPLE__) + if (strcmp(Py_FileSystemDefaultEncoding, "utf-8")) { + return PyUnicode_DecodeUTF8(s, size, "replace"); + } +#endif + return PyUnicode_Decode(s, size, + Py_FileSystemDefaultEncoding, + "replace"); + } + else { + return PyUnicode_DecodeUTF8(s, size, "replace"); + } +} + char* PyUnicode_AsStringAndSize(PyObject *unicode, Py_ssize_t *psize) { diff --git a/Python/bltinmodule.c b/Python/bltinmodule.c index cafffdc..338c424 100644 --- a/Python/bltinmodule.c +++ b/Python/bltinmodule.c @@ -10,6 +10,9 @@ /* The default encoding used by the platform file system APIs Can remain NULL for all platforms that don't have such a concept + + Don't forget to modify PyUnicode_DecodeFSDefault() if you touch any of the + values for Py_FileSystemDefaultEncoding! */ #if defined(MS_WINDOWS) && defined(HAVE_USABLE_WCHAR_T) const char *Py_FileSystemDefaultEncoding = "mbcs"; diff --git a/Python/ceval.c b/Python/ceval.c index dd6f6c4..ae8434d 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -767,7 +767,7 @@ PyEval_EvalFrameEx(PyFrameObject *f, int throwflag) lltrace = PyDict_GetItemString(f->f_globals, "__lltrace__") != NULL; #endif #if defined(Py_DEBUG) || defined(LLTRACE) - filename = PyString_AsString(co->co_filename); + filename = PyUnicode_AsString(co->co_filename); #endif why = WHY_NOT; @@ -2565,7 +2565,7 @@ PyEval_EvalCodeEx(PyCodeObject *co, PyObject *globals, PyObject *locals, if (argcount > co->co_argcount) { if (!(co->co_flags & CO_VARARGS)) { PyErr_Format(PyExc_TypeError, - "%S() takes %s %d " + "%U() takes %s %d " "%spositional argument%s (%d given)", co->co_name, defcount ? "at most" : "exactly", @@ -2599,7 +2599,7 @@ PyEval_EvalCodeEx(PyCodeObject *co, PyObject *globals, PyObject *locals, int j; if (keyword == NULL || !PyUnicode_Check(keyword)) { PyErr_Format(PyExc_TypeError, - "%S() keywords must be strings", + "%U() keywords must be strings", co->co_name); goto fail; } @@ -2622,7 +2622,7 @@ PyEval_EvalCodeEx(PyCodeObject *co, PyObject *globals, PyObject *locals, if (j >= co->co_argcount + co->co_kwonlyargcount) { if (kwdict == NULL) { PyErr_Format(PyExc_TypeError, - "%S() got an unexpected " + "%U() got an unexpected " "keyword argument '%S'", co->co_name, keyword); @@ -2633,7 +2633,7 @@ PyEval_EvalCodeEx(PyCodeObject *co, PyObject *globals, PyObject *locals, else { if (GETLOCAL(j) != NULL) { PyErr_Format(PyExc_TypeError, - "%S() got multiple " + "%U() got multiple " "values for keyword " "argument '%S'", co->co_name, @@ -2661,7 +2661,7 @@ PyEval_EvalCodeEx(PyCodeObject *co, PyObject *globals, PyObject *locals, continue; } PyErr_Format(PyExc_TypeError, - "%S() needs keyword-only argument %S", + "%U() needs keyword-only argument %S", co->co_name, name); goto fail; } @@ -2671,7 +2671,7 @@ PyEval_EvalCodeEx(PyCodeObject *co, PyObject *globals, PyObject *locals, for (i = argcount; i < m; i++) { if (GETLOCAL(i) == NULL) { PyErr_Format(PyExc_TypeError, - "%S() takes %s %d " + "%U() takes %s %d " "%spositional argument%s " "(%d given)", co->co_name, @@ -2699,7 +2699,7 @@ PyEval_EvalCodeEx(PyCodeObject *co, PyObject *globals, PyObject *locals, else { if (argcount > 0 || kwcount > 0) { PyErr_Format(PyExc_TypeError, - "%S() takes no arguments (%d given)", + "%U() takes no arguments (%d given)", co->co_name, argcount + kwcount); goto fail; diff --git a/Python/compile.c b/Python/compile.c index d20da0a..93087db 100644 --- a/Python/compile.c +++ b/Python/compile.c @@ -1247,7 +1247,7 @@ compiler_make_closure(struct compiler *c, PyCodeObject *co, int args) PyObject_REPR(name), PyString_AS_STRING(c->u->u_name), reftype, arg, - PyString_AS_STRING(co->co_name), + PyUnicode_AsString(co->co_name), PyObject_REPR(co->co_freevars)); Py_FatalError("compiler_make_closure()"); } @@ -4001,7 +4001,7 @@ makecode(struct compiler *c, struct assembler *a) freevars = dict_keys_inorder(c->u->u_freevars, PyTuple_Size(cellvars)); if (!freevars) goto error; - filename = PyString_FromString(c->c_filename); + filename = PyUnicode_DecodeFSDefault(c->c_filename); if (!filename) goto error; diff --git a/Python/frozen.c b/Python/frozen.c index d404562..ee06c35 100644 --- a/Python/frozen.c +++ b/Python/frozen.c @@ -17,7 +17,7 @@ static unsigned char M___hello__[] = { 131,1,0,1,100,1,0,83,40,2,0,0,0,117,14,0, 0,0,72,101,108,108,111,32,119,111,114,108,100,46,46,46, 78,40,1,0,0,0,117,5,0,0,0,112,114,105,110,116, - 40,0,0,0,0,40,0,0,0,0,40,0,0,0,0,115, + 40,0,0,0,0,40,0,0,0,0,40,0,0,0,0,117, 8,0,0,0,104,101,108,108,111,46,112,121,117,8,0,0, 0,60,109,111,100,117,108,101,62,1,0,0,0,115,0,0, 0,0, diff --git a/Python/import.c b/Python/import.c index c2f42e9..21dcbd4 100644 --- a/Python/import.c +++ b/Python/import.c @@ -74,10 +74,11 @@ extern time_t PyOS_GetLastModificationTime(char *, FILE *); 3040 (added signature annotations) 3050 (print becomes a function) 3060 (PEP 3115 metaclass syntax) - 3070 (PEP 3109 raise changes) + 3070 (PEP 3109 raise changes) + 3080 (PEP 3137 make __file__ and __name__ unicode) . */ -#define MAGIC (3070 | ((long)'\r'<<16) | ((long)'\n'<<24)) +#define MAGIC (3080 | ((long)'\r'<<16) | ((long)'\n'<<24)) /* Magic word as global; note that _PyImport_Init() can change the value of this global to accommodate for alterations of how the @@ -652,7 +653,7 @@ PyImport_ExecCodeModuleEx(char *name, PyObject *co, char *pathname) /* Remember the filename as the __file__ attribute */ v = NULL; if (pathname != NULL) { - v = PyString_FromString(pathname); + v = PyUnicode_DecodeFSDefault(pathname); if (v == NULL) PyErr_Clear(); } @@ -983,7 +984,7 @@ load_package(char *name, char *pathname) PySys_WriteStderr("import %s # directory %s\n", name, pathname); d = PyModule_GetDict(m); - file = PyString_FromString(pathname); + file = PyUnicode_DecodeFSDefault(pathname); if (file == NULL) goto error; path = Py_BuildValue("[O]", file); diff --git a/Python/importdl.c b/Python/importdl.c index 9c325e4..7978c48 100644 --- a/Python/importdl.c +++ b/Python/importdl.c @@ -62,7 +62,9 @@ _PyImport_LoadDynamicModule(char *name, char *pathname, FILE *fp) return NULL; } /* Remember the filename as the __file__ attribute */ - if (PyModule_AddStringConstant(m, "__file__", pathname) < 0) + PyObject *path; + path = PyUnicode_DecodeFSDefault(pathname); + if (PyModule_AddObject(m, "__file__", path) < 0) PyErr_Clear(); /* Not important enough to report */ if (_PyImport_FixupExtension(name, pathname) == NULL) diff --git a/Python/pythonrun.c b/Python/pythonrun.c index a37a3e4..4e239c9 100644 --- a/Python/pythonrun.c +++ b/Python/pythonrun.c @@ -867,7 +867,8 @@ PyRun_SimpleFileExFlags(FILE *fp, const char *filename, int closeit, return -1; d = PyModule_GetDict(m); if (PyDict_GetItemString(d, "__file__") == NULL) { - PyObject *f = PyString_FromString(filename); + PyObject *f; + f = PyUnicode_DecodeFSDefault(filename); if (f == NULL) return -1; if (PyDict_SetItemString(d, "__file__", f) < 0) { diff --git a/Python/traceback.c b/Python/traceback.c index 5bb8841..9d7a2e0 100644 --- a/Python/traceback.c +++ b/Python/traceback.c @@ -229,10 +229,10 @@ tb_printinternal(PyTracebackObject *tb, PyObject *f, int limit) while (tb != NULL && err == 0) { if (depth <= limit) { err = tb_displayline(f, - PyString_AsString( + PyUnicode_AsString( tb->tb_frame->f_code->co_filename), tb->tb_lineno, - PyString_AsString(tb->tb_frame->f_code->co_name)); + PyUnicode_AsString(tb->tb_frame->f_code->co_name)); } depth--; tb = tb->tb_next; |