summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorGuido van Rossum <guido@python.org>2007-10-15 02:52:41 (GMT)
committerGuido van Rossum <guido@python.org>2007-10-15 02:52:41 (GMT)
commit00bc0e0a2d0b6c403a3c6ab96fa7d3398b5c751e (patch)
tree34fda27260f18f813912d83a2cf060264a736190
parentcdadf242ba32f1b3ef55e74d2eeb021e62da8041 (diff)
downloadcpython-00bc0e0a2d0b6c403a3c6ab96fa7d3398b5c751e.zip
cpython-00bc0e0a2d0b6c403a3c6ab96fa7d3398b5c751e.tar.gz
cpython-00bc0e0a2d0b6c403a3c6ab96fa7d3398b5c751e.tar.bz2
Patch #1272, by Christian Heimes and Alexandre Vassalotti.
Changes to make __file__ a proper Unicode object, using the default filesystem encoding. This is a bit tricky because the default filesystem encoding isn't set by the time we import the first modules; at that point we fudge things a bit. This is okay since __file__ isn't really used much except for error reporting. Tested on OSX and Linux only so far.
-rw-r--r--Include/code.h4
-rw-r--r--Include/unicodeobject.h16
-rw-r--r--Misc/ACKS2
-rw-r--r--Modules/_ctypes/callbacks.c4
-rw-r--r--Modules/posixmodule.c4
-rw-r--r--Modules/pyexpat.c4
-rw-r--r--Objects/codeobject.c19
-rw-r--r--Objects/moduleobject.c4
-rw-r--r--Objects/unicodeobject.c35
-rw-r--r--Python/bltinmodule.c3
-rw-r--r--Python/ceval.c16
-rw-r--r--Python/compile.c4
-rw-r--r--Python/frozen.c2
-rw-r--r--Python/import.c9
-rw-r--r--Python/importdl.c4
-rw-r--r--Python/pythonrun.c3
-rw-r--r--Python/traceback.c4
17 files changed, 96 insertions, 41 deletions
diff --git a/Include/code.h b/Include/code.h
index 2bd6c5b..3f3df49 100644
--- a/Include/code.h
+++ b/Include/code.h
@@ -21,8 +21,8 @@ typedef struct {
PyObject *co_freevars; /* tuple of strings (free variable names) */
PyObject *co_cellvars; /* tuple of strings (cell variable names) */
/* The rest doesn't count for hash/cmp */
- PyObject *co_filename; /* string (where it was loaded from) */
- PyObject *co_name; /* string (name, for reference) */
+ PyObject *co_filename; /* unicode (where it was loaded from) */
+ PyObject *co_name; /* unicode (name, for reference) */
int co_firstlineno; /* first source line number */
PyObject *co_lnotab; /* string (encoding addr<->lineno mapping) */
void *co_zombieframe; /* for optimization only (see frameobject.c) */
diff --git a/Include/unicodeobject.h b/Include/unicodeobject.h
index 4374857..3ef354f 100644
--- a/Include/unicodeobject.h
+++ b/Include/unicodeobject.h
@@ -154,6 +154,7 @@ typedef PY_UNICODE_TYPE Py_UNICODE;
# define PyUnicode_DecodeASCII PyUnicodeUCS2_DecodeASCII
# define PyUnicode_DecodeCharmap PyUnicodeUCS2_DecodeCharmap
# define PyUnicode_DecodeLatin1 PyUnicodeUCS2_DecodeLatin1
+# define PyUnicode_DecodeFSDefault PyUnicodeUCS2_DecodeFSDefault
# define PyUnicode_DecodeRawUnicodeEscape PyUnicodeUCS2_DecodeRawUnicodeEscape
# define PyUnicode_DecodeUTF32 PyUnicodeUCS2_DecodeUTF32
# define PyUnicode_DecodeUTF32Stateful PyUnicodeUCS2_DecodeUTF32Stateful
@@ -245,6 +246,7 @@ typedef PY_UNICODE_TYPE Py_UNICODE;
# define PyUnicode_DecodeASCII PyUnicodeUCS4_DecodeASCII
# define PyUnicode_DecodeCharmap PyUnicodeUCS4_DecodeCharmap
# define PyUnicode_DecodeLatin1 PyUnicodeUCS4_DecodeLatin1
+# define PyUnicode_DecodeFSDefault PyUnicodeUCS4_DecodeFSDefault
# define PyUnicode_DecodeRawUnicodeEscape PyUnicodeUCS4_DecodeRawUnicodeEscape
# define PyUnicode_DecodeUTF32 PyUnicodeUCS4_DecodeUTF32
# define PyUnicode_DecodeUTF32Stateful PyUnicodeUCS4_DecodeUTF32Stateful
@@ -641,6 +643,20 @@ PyAPI_FUNC(PyObject*) PyUnicode_FromOrdinal(int ordinal);
PyAPI_FUNC(PyObject *) _PyUnicode_AsDefaultEncodedString(
PyObject *, const char *);
+/* Decode a null-terminated string using Py_FileSystemDefaultEncoding.
+
+ If the encoding is supported by one of the built-in codecs (i.e., UTF-8,
+ UTF-16, UTF-32, Latin-1 or MBCS), otherwise fallback to UTF-8 and replace
+ invalid characters with '?'.
+
+ The function is intended to be used for paths and file names only
+ during bootstrapping process where the codecs are not set up.
+*/
+
+PyAPI_FUNC(PyObject*) PyUnicode_DecodeFSDefault(
+ const char *s /* encoded string */
+ );
+
/* Return a char* holding the UTF-8 encoded value of the
Unicode object.
diff --git a/Misc/ACKS b/Misc/ACKS
index b9c8fd4..78cc463 100644
--- a/Misc/ACKS
+++ b/Misc/ACKS
@@ -273,6 +273,7 @@ Larry Hastings
Shane Hathaway
Rycharde Hawkes
Jochen Hayek
+Christian Heimes
Thomas Heller
Lance Finn Helsten
Jonathan Hendry
@@ -667,6 +668,7 @@ Michael Urman
Hector Urtubia
Atul Varma
Dmitry Vasiliev
+Alexandre Vassalotti
Frank Vercruesse
Mike Verdone
Jaap Vermeulen
diff --git a/Modules/_ctypes/callbacks.c b/Modules/_ctypes/callbacks.c
index a1a0e0d..385add4 100644
--- a/Modules/_ctypes/callbacks.c
+++ b/Modules/_ctypes/callbacks.c
@@ -34,9 +34,9 @@ void _AddTraceback(char *funcname, char *filename, int lineno)
PyCodeObject *py_code = 0;
PyFrameObject *py_frame = 0;
- py_srcfile = PyString_FromString(filename);
+ py_srcfile = PyUnicode_DecodeFSDefault(filename);
if (!py_srcfile) goto bad;
- py_funcname = PyString_FromString(funcname);
+ py_funcname = PyUnicode_FromString(funcname);
if (!py_funcname) goto bad;
py_globals = PyDict_New();
if (!py_globals) goto bad;
diff --git a/Modules/posixmodule.c b/Modules/posixmodule.c
index 55af338..647ea3e 100644
--- a/Modules/posixmodule.c
+++ b/Modules/posixmodule.c
@@ -5370,7 +5370,7 @@ posix_tempnam(PyObject *self, PyObject *args)
#endif
if (name == NULL)
return PyErr_NoMemory();
- result = PyString_FromString(name);
+ result = PyUnicode_DecodeFSDefault(name);
free(name);
return result;
}
@@ -5428,7 +5428,7 @@ posix_tmpnam(PyObject *self, PyObject *noargs)
Py_XDECREF(err);
return NULL;
}
- return PyString_FromString(buffer);
+ return PyUnicode_DecodeFSDefault(buffer);
}
#endif
diff --git a/Modules/pyexpat.c b/Modules/pyexpat.c
index 6cfb8ec..ae6f143 100644
--- a/Modules/pyexpat.c
+++ b/Modules/pyexpat.c
@@ -232,13 +232,13 @@ getcode(enum HandlerTypes slot, char* func_name, int lineno)
code = PyString_FromString("");
if (code == NULL)
goto failed;
- name = PyString_FromString(func_name);
+ name = PyUnicode_FromString(func_name);
if (name == NULL)
goto failed;
nulltuple = PyTuple_New(0);
if (nulltuple == NULL)
goto failed;
- filename = PyString_FromString(__FILE__);
+ filename = PyUnicode_DecodeFSDefault(__FILE__);
handler_info[slot].tb_code =
PyCode_New(0, /* argcount */
0, /* kwonlyargcount */
diff --git a/Objects/codeobject.c b/Objects/codeobject.c
index 7bd292a..7aeddcc 100644
--- a/Objects/codeobject.c
+++ b/Objects/codeobject.c
@@ -50,6 +50,7 @@ PyCode_New(int argcount, int kwonlyargcount,
{
PyCodeObject *co;
Py_ssize_t i;
+
/* Check argument types */
if (argcount < 0 || nlocals < 0 ||
code == NULL ||
@@ -58,20 +59,16 @@ PyCode_New(int argcount, int kwonlyargcount,
varnames == NULL || !PyTuple_Check(varnames) ||
freevars == NULL || !PyTuple_Check(freevars) ||
cellvars == NULL || !PyTuple_Check(cellvars) ||
- name == NULL || (!PyString_Check(name) && !PyUnicode_Check(name)) ||
- filename == NULL || !PyString_Check(filename) ||
+ name == NULL || !PyUnicode_Check(name) ||
+ filename == NULL || !PyUnicode_Check(filename) ||
lnotab == NULL || !PyString_Check(lnotab) ||
!PyObject_CheckReadBuffer(code)) {
PyErr_BadInternalCall();
return NULL;
}
- if (PyString_Check(name)) {
- name = PyUnicode_FromString(PyString_AS_STRING(name));
- if (name == NULL)
- return NULL;
- } else {
- Py_INCREF(name);
- }
+ Py_INCREF(name);
+ Py_INCREF(filename);
+
intern_strings(names);
intern_strings(varnames);
intern_strings(freevars);
@@ -299,8 +296,8 @@ code_repr(PyCodeObject *co)
if (co->co_firstlineno != 0)
lineno = co->co_firstlineno;
- if (co->co_filename && PyString_Check(co->co_filename))
- filename = PyString_AS_STRING(co->co_filename);
+ if (co->co_filename && PyUnicode_Check(co->co_filename))
+ filename = PyUnicode_AsString(co->co_filename);
return PyUnicode_FromFormat(
"<code object %.100U at %p, file \"%.300s\", line %d>",
co->co_name, co, filename, lineno);
diff --git a/Objects/moduleobject.c b/Objects/moduleobject.c
index fbb9fba..13c1ab4 100644
--- a/Objects/moduleobject.c
+++ b/Objects/moduleobject.c
@@ -86,12 +86,12 @@ PyModule_GetFilename(PyObject *m)
d = ((PyModuleObject *)m)->md_dict;
if (d == NULL ||
(fileobj = PyDict_GetItemString(d, "__file__")) == NULL ||
- !PyString_Check(fileobj))
+ !PyUnicode_Check(fileobj))
{
PyErr_SetString(PyExc_SystemError, "module filename missing");
return NULL;
}
- return PyString_AsString(fileobj);
+ return PyUnicode_AsString(fileobj);
}
void
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index def9011..98723db 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -117,7 +117,11 @@ static PyUnicodeObject *unicode_latin1[256];
/* Default encoding to use and assume when NULL is passed as encoding
parameter; it is fixed to "utf-8". Always use the
- PyUnicode_GetDefaultEncoding() API to access this global. */
+ PyUnicode_GetDefaultEncoding() API to access this global.
+
+ Don't forget to alter Py_FileSystemDefaultEncoding() if you change the
+ hard coded default!
+*/
static const char unicode_default_encoding[] = "utf-8";
Py_UNICODE
@@ -1231,6 +1235,35 @@ PyObject *_PyUnicode_AsDefaultEncodedString(PyObject *unicode,
return v;
}
+PyObject*
+PyUnicode_DecodeFSDefault(const char *s)
+{
+ Py_ssize_t size = (Py_ssize_t)strlen(s);
+
+ /* During the early bootstrapping process, Py_FileSystemDefaultEncoding
+ can be undefined. If it is case, decode using UTF-8. The following assumes
+ that Py_FileSystemDefaultEncoding is set to a built-in encoding during the
+ bootstrapping process where the codecs aren't ready yet.
+ */
+ if (Py_FileSystemDefaultEncoding) {
+#if defined(MS_WINDOWS) && defined(HAVE_USABLE_WCHAR_T)
+ if (strcmp(Py_FileSystemDefaultEncoding, "mbcs")) {
+ return PyUnicode_DecodeMBCS(s, size, "replace");
+ }
+#elif defined(__APPLE__)
+ if (strcmp(Py_FileSystemDefaultEncoding, "utf-8")) {
+ return PyUnicode_DecodeUTF8(s, size, "replace");
+ }
+#endif
+ return PyUnicode_Decode(s, size,
+ Py_FileSystemDefaultEncoding,
+ "replace");
+ }
+ else {
+ return PyUnicode_DecodeUTF8(s, size, "replace");
+ }
+}
+
char*
PyUnicode_AsStringAndSize(PyObject *unicode, Py_ssize_t *psize)
{
diff --git a/Python/bltinmodule.c b/Python/bltinmodule.c
index cafffdc..338c424 100644
--- a/Python/bltinmodule.c
+++ b/Python/bltinmodule.c
@@ -10,6 +10,9 @@
/* The default encoding used by the platform file system APIs
Can remain NULL for all platforms that don't have such a concept
+
+ Don't forget to modify PyUnicode_DecodeFSDefault() if you touch any of the
+ values for Py_FileSystemDefaultEncoding!
*/
#if defined(MS_WINDOWS) && defined(HAVE_USABLE_WCHAR_T)
const char *Py_FileSystemDefaultEncoding = "mbcs";
diff --git a/Python/ceval.c b/Python/ceval.c
index dd6f6c4..ae8434d 100644
--- a/Python/ceval.c
+++ b/Python/ceval.c
@@ -767,7 +767,7 @@ PyEval_EvalFrameEx(PyFrameObject *f, int throwflag)
lltrace = PyDict_GetItemString(f->f_globals, "__lltrace__") != NULL;
#endif
#if defined(Py_DEBUG) || defined(LLTRACE)
- filename = PyString_AsString(co->co_filename);
+ filename = PyUnicode_AsString(co->co_filename);
#endif
why = WHY_NOT;
@@ -2565,7 +2565,7 @@ PyEval_EvalCodeEx(PyCodeObject *co, PyObject *globals, PyObject *locals,
if (argcount > co->co_argcount) {
if (!(co->co_flags & CO_VARARGS)) {
PyErr_Format(PyExc_TypeError,
- "%S() takes %s %d "
+ "%U() takes %s %d "
"%spositional argument%s (%d given)",
co->co_name,
defcount ? "at most" : "exactly",
@@ -2599,7 +2599,7 @@ PyEval_EvalCodeEx(PyCodeObject *co, PyObject *globals, PyObject *locals,
int j;
if (keyword == NULL || !PyUnicode_Check(keyword)) {
PyErr_Format(PyExc_TypeError,
- "%S() keywords must be strings",
+ "%U() keywords must be strings",
co->co_name);
goto fail;
}
@@ -2622,7 +2622,7 @@ PyEval_EvalCodeEx(PyCodeObject *co, PyObject *globals, PyObject *locals,
if (j >= co->co_argcount + co->co_kwonlyargcount) {
if (kwdict == NULL) {
PyErr_Format(PyExc_TypeError,
- "%S() got an unexpected "
+ "%U() got an unexpected "
"keyword argument '%S'",
co->co_name,
keyword);
@@ -2633,7 +2633,7 @@ PyEval_EvalCodeEx(PyCodeObject *co, PyObject *globals, PyObject *locals,
else {
if (GETLOCAL(j) != NULL) {
PyErr_Format(PyExc_TypeError,
- "%S() got multiple "
+ "%U() got multiple "
"values for keyword "
"argument '%S'",
co->co_name,
@@ -2661,7 +2661,7 @@ PyEval_EvalCodeEx(PyCodeObject *co, PyObject *globals, PyObject *locals,
continue;
}
PyErr_Format(PyExc_TypeError,
- "%S() needs keyword-only argument %S",
+ "%U() needs keyword-only argument %S",
co->co_name, name);
goto fail;
}
@@ -2671,7 +2671,7 @@ PyEval_EvalCodeEx(PyCodeObject *co, PyObject *globals, PyObject *locals,
for (i = argcount; i < m; i++) {
if (GETLOCAL(i) == NULL) {
PyErr_Format(PyExc_TypeError,
- "%S() takes %s %d "
+ "%U() takes %s %d "
"%spositional argument%s "
"(%d given)",
co->co_name,
@@ -2699,7 +2699,7 @@ PyEval_EvalCodeEx(PyCodeObject *co, PyObject *globals, PyObject *locals,
else {
if (argcount > 0 || kwcount > 0) {
PyErr_Format(PyExc_TypeError,
- "%S() takes no arguments (%d given)",
+ "%U() takes no arguments (%d given)",
co->co_name,
argcount + kwcount);
goto fail;
diff --git a/Python/compile.c b/Python/compile.c
index d20da0a..93087db 100644
--- a/Python/compile.c
+++ b/Python/compile.c
@@ -1247,7 +1247,7 @@ compiler_make_closure(struct compiler *c, PyCodeObject *co, int args)
PyObject_REPR(name),
PyString_AS_STRING(c->u->u_name),
reftype, arg,
- PyString_AS_STRING(co->co_name),
+ PyUnicode_AsString(co->co_name),
PyObject_REPR(co->co_freevars));
Py_FatalError("compiler_make_closure()");
}
@@ -4001,7 +4001,7 @@ makecode(struct compiler *c, struct assembler *a)
freevars = dict_keys_inorder(c->u->u_freevars, PyTuple_Size(cellvars));
if (!freevars)
goto error;
- filename = PyString_FromString(c->c_filename);
+ filename = PyUnicode_DecodeFSDefault(c->c_filename);
if (!filename)
goto error;
diff --git a/Python/frozen.c b/Python/frozen.c
index d404562..ee06c35 100644
--- a/Python/frozen.c
+++ b/Python/frozen.c
@@ -17,7 +17,7 @@ static unsigned char M___hello__[] = {
131,1,0,1,100,1,0,83,40,2,0,0,0,117,14,0,
0,0,72,101,108,108,111,32,119,111,114,108,100,46,46,46,
78,40,1,0,0,0,117,5,0,0,0,112,114,105,110,116,
- 40,0,0,0,0,40,0,0,0,0,40,0,0,0,0,115,
+ 40,0,0,0,0,40,0,0,0,0,40,0,0,0,0,117,
8,0,0,0,104,101,108,108,111,46,112,121,117,8,0,0,
0,60,109,111,100,117,108,101,62,1,0,0,0,115,0,0,
0,0,
diff --git a/Python/import.c b/Python/import.c
index c2f42e9..21dcbd4 100644
--- a/Python/import.c
+++ b/Python/import.c
@@ -74,10 +74,11 @@ extern time_t PyOS_GetLastModificationTime(char *, FILE *);
3040 (added signature annotations)
3050 (print becomes a function)
3060 (PEP 3115 metaclass syntax)
- 3070 (PEP 3109 raise changes)
+ 3070 (PEP 3109 raise changes)
+ 3080 (PEP 3137 make __file__ and __name__ unicode)
.
*/
-#define MAGIC (3070 | ((long)'\r'<<16) | ((long)'\n'<<24))
+#define MAGIC (3080 | ((long)'\r'<<16) | ((long)'\n'<<24))
/* Magic word as global; note that _PyImport_Init() can change the
value of this global to accommodate for alterations of how the
@@ -652,7 +653,7 @@ PyImport_ExecCodeModuleEx(char *name, PyObject *co, char *pathname)
/* Remember the filename as the __file__ attribute */
v = NULL;
if (pathname != NULL) {
- v = PyString_FromString(pathname);
+ v = PyUnicode_DecodeFSDefault(pathname);
if (v == NULL)
PyErr_Clear();
}
@@ -983,7 +984,7 @@ load_package(char *name, char *pathname)
PySys_WriteStderr("import %s # directory %s\n",
name, pathname);
d = PyModule_GetDict(m);
- file = PyString_FromString(pathname);
+ file = PyUnicode_DecodeFSDefault(pathname);
if (file == NULL)
goto error;
path = Py_BuildValue("[O]", file);
diff --git a/Python/importdl.c b/Python/importdl.c
index 9c325e4..7978c48 100644
--- a/Python/importdl.c
+++ b/Python/importdl.c
@@ -62,7 +62,9 @@ _PyImport_LoadDynamicModule(char *name, char *pathname, FILE *fp)
return NULL;
}
/* Remember the filename as the __file__ attribute */
- if (PyModule_AddStringConstant(m, "__file__", pathname) < 0)
+ PyObject *path;
+ path = PyUnicode_DecodeFSDefault(pathname);
+ if (PyModule_AddObject(m, "__file__", path) < 0)
PyErr_Clear(); /* Not important enough to report */
if (_PyImport_FixupExtension(name, pathname) == NULL)
diff --git a/Python/pythonrun.c b/Python/pythonrun.c
index a37a3e4..4e239c9 100644
--- a/Python/pythonrun.c
+++ b/Python/pythonrun.c
@@ -867,7 +867,8 @@ PyRun_SimpleFileExFlags(FILE *fp, const char *filename, int closeit,
return -1;
d = PyModule_GetDict(m);
if (PyDict_GetItemString(d, "__file__") == NULL) {
- PyObject *f = PyString_FromString(filename);
+ PyObject *f;
+ f = PyUnicode_DecodeFSDefault(filename);
if (f == NULL)
return -1;
if (PyDict_SetItemString(d, "__file__", f) < 0) {
diff --git a/Python/traceback.c b/Python/traceback.c
index 5bb8841..9d7a2e0 100644
--- a/Python/traceback.c
+++ b/Python/traceback.c
@@ -229,10 +229,10 @@ tb_printinternal(PyTracebackObject *tb, PyObject *f, int limit)
while (tb != NULL && err == 0) {
if (depth <= limit) {
err = tb_displayline(f,
- PyString_AsString(
+ PyUnicode_AsString(
tb->tb_frame->f_code->co_filename),
tb->tb_lineno,
- PyString_AsString(tb->tb_frame->f_code->co_name));
+ PyUnicode_AsString(tb->tb_frame->f_code->co_name));
}
depth--;
tb = tb->tb_next;