diff options
author | Guido van Rossum <guido@python.org> | 2007-10-15 02:52:41 (GMT) |
---|---|---|
committer | Guido van Rossum <guido@python.org> | 2007-10-15 02:52:41 (GMT) |
commit | 00bc0e0a2d0b6c403a3c6ab96fa7d3398b5c751e (patch) | |
tree | 34fda27260f18f813912d83a2cf060264a736190 /Objects | |
parent | cdadf242ba32f1b3ef55e74d2eeb021e62da8041 (diff) | |
download | cpython-00bc0e0a2d0b6c403a3c6ab96fa7d3398b5c751e.zip cpython-00bc0e0a2d0b6c403a3c6ab96fa7d3398b5c751e.tar.gz cpython-00bc0e0a2d0b6c403a3c6ab96fa7d3398b5c751e.tar.bz2 |
Patch #1272, by Christian Heimes and Alexandre Vassalotti.
Changes to make __file__ a proper Unicode object, using the default
filesystem encoding.
This is a bit tricky because the default filesystem encoding isn't
set by the time we import the first modules; at that point we fudge
things a bit. This is okay since __file__ isn't really used much
except for error reporting.
Tested on OSX and Linux only so far.
Diffstat (limited to 'Objects')
-rw-r--r-- | Objects/codeobject.c | 19 | ||||
-rw-r--r-- | Objects/moduleobject.c | 4 | ||||
-rw-r--r-- | Objects/unicodeobject.c | 35 |
3 files changed, 44 insertions, 14 deletions
diff --git a/Objects/codeobject.c b/Objects/codeobject.c index 7bd292a..7aeddcc 100644 --- a/Objects/codeobject.c +++ b/Objects/codeobject.c @@ -50,6 +50,7 @@ PyCode_New(int argcount, int kwonlyargcount, { PyCodeObject *co; Py_ssize_t i; + /* Check argument types */ if (argcount < 0 || nlocals < 0 || code == NULL || @@ -58,20 +59,16 @@ PyCode_New(int argcount, int kwonlyargcount, varnames == NULL || !PyTuple_Check(varnames) || freevars == NULL || !PyTuple_Check(freevars) || cellvars == NULL || !PyTuple_Check(cellvars) || - name == NULL || (!PyString_Check(name) && !PyUnicode_Check(name)) || - filename == NULL || !PyString_Check(filename) || + name == NULL || !PyUnicode_Check(name) || + filename == NULL || !PyUnicode_Check(filename) || lnotab == NULL || !PyString_Check(lnotab) || !PyObject_CheckReadBuffer(code)) { PyErr_BadInternalCall(); return NULL; } - if (PyString_Check(name)) { - name = PyUnicode_FromString(PyString_AS_STRING(name)); - if (name == NULL) - return NULL; - } else { - Py_INCREF(name); - } + Py_INCREF(name); + Py_INCREF(filename); + intern_strings(names); intern_strings(varnames); intern_strings(freevars); @@ -299,8 +296,8 @@ code_repr(PyCodeObject *co) if (co->co_firstlineno != 0) lineno = co->co_firstlineno; - if (co->co_filename && PyString_Check(co->co_filename)) - filename = PyString_AS_STRING(co->co_filename); + if (co->co_filename && PyUnicode_Check(co->co_filename)) + filename = PyUnicode_AsString(co->co_filename); return PyUnicode_FromFormat( "<code object %.100U at %p, file \"%.300s\", line %d>", co->co_name, co, filename, lineno); diff --git a/Objects/moduleobject.c b/Objects/moduleobject.c index fbb9fba..13c1ab4 100644 --- a/Objects/moduleobject.c +++ b/Objects/moduleobject.c @@ -86,12 +86,12 @@ PyModule_GetFilename(PyObject *m) d = ((PyModuleObject *)m)->md_dict; if (d == NULL || (fileobj = PyDict_GetItemString(d, "__file__")) == NULL || - !PyString_Check(fileobj)) + !PyUnicode_Check(fileobj)) { PyErr_SetString(PyExc_SystemError, "module filename missing"); return NULL; } - return PyString_AsString(fileobj); + return PyUnicode_AsString(fileobj); } void diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index def9011..98723db 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -117,7 +117,11 @@ static PyUnicodeObject *unicode_latin1[256]; /* Default encoding to use and assume when NULL is passed as encoding parameter; it is fixed to "utf-8". Always use the - PyUnicode_GetDefaultEncoding() API to access this global. */ + PyUnicode_GetDefaultEncoding() API to access this global. + + Don't forget to alter Py_FileSystemDefaultEncoding() if you change the + hard coded default! +*/ static const char unicode_default_encoding[] = "utf-8"; Py_UNICODE @@ -1231,6 +1235,35 @@ PyObject *_PyUnicode_AsDefaultEncodedString(PyObject *unicode, return v; } +PyObject* +PyUnicode_DecodeFSDefault(const char *s) +{ + Py_ssize_t size = (Py_ssize_t)strlen(s); + + /* During the early bootstrapping process, Py_FileSystemDefaultEncoding + can be undefined. If it is case, decode using UTF-8. The following assumes + that Py_FileSystemDefaultEncoding is set to a built-in encoding during the + bootstrapping process where the codecs aren't ready yet. + */ + if (Py_FileSystemDefaultEncoding) { +#if defined(MS_WINDOWS) && defined(HAVE_USABLE_WCHAR_T) + if (strcmp(Py_FileSystemDefaultEncoding, "mbcs")) { + return PyUnicode_DecodeMBCS(s, size, "replace"); + } +#elif defined(__APPLE__) + if (strcmp(Py_FileSystemDefaultEncoding, "utf-8")) { + return PyUnicode_DecodeUTF8(s, size, "replace"); + } +#endif + return PyUnicode_Decode(s, size, + Py_FileSystemDefaultEncoding, + "replace"); + } + else { + return PyUnicode_DecodeUTF8(s, size, "replace"); + } +} + char* PyUnicode_AsStringAndSize(PyObject *unicode, Py_ssize_t *psize) { |