Patch #1272, by Christian Heimes and Alexandre Vassalotti.

Changes to make __file__ a proper Unicode object, using the default filesystem encoding. This is a bit tricky because the default filesystem encoding isn't set by the time we import the first modules; at that point we fudge things a bit. This is okay since __file__ isn't really used much except for error reporting. Tested on OSX and Linux only so far.
author: Guido van Rossum <guido@python.org> 2007-10-15 02:52:41 (GMT)
committer: Guido van Rossum <guido@python.org> 2007-10-15 02:52:41 (GMT)
commit: 00bc0e0a2d0b6c403a3c6ab96fa7d3398b5c751e (patch)
tree: 34fda27260f18f813912d83a2cf060264a736190 /Objects
parent: cdadf242ba32f1b3ef55e74d2eeb021e62da8041 (diff)
download: cpython-00bc0e0a2d0b6c403a3c6ab96fa7d3398b5c751e.zip
cpython-00bc0e0a2d0b6c403a3c6ab96fa7d3398b5c751e.tar.gz
cpython-00bc0e0a2d0b6c403a3c6ab96fa7d3398b5c751e.tar.bz2
3 files changed, 44 insertions, 14 deletions
diff --git a/Objects/codeobject.c b/Objects/codeobject.c
index 7bd292a..7aeddcc 100644
--- a/Objects/codeobject.c
+++ b/Objects/codeobject.c
@@ -50,6 +50,7 @@ PyCode_New(int argcount, int kwonlyargcount,
 {
 	PyCodeObject *co;
 	Py_ssize_t i;
+
 	/* Check argument types */
 	if (argcount < 0 || nlocals < 0 ||
 	    code == NULL ||
@@ -58,20 +59,16 @@ PyCode_New(int argcount, int kwonlyargcount,
 	    varnames == NULL || !PyTuple_Check(varnames) ||
 	    freevars == NULL || !PyTuple_Check(freevars) ||
 	    cellvars == NULL || !PyTuple_Check(cellvars) ||
-	    name == NULL || (!PyString_Check(name) && !PyUnicode_Check(name)) ||
-	    filename == NULL || !PyString_Check(filename) ||
+	    name == NULL || !PyUnicode_Check(name) ||
+	    filename == NULL || !PyUnicode_Check(filename) ||
 	    lnotab == NULL || !PyString_Check(lnotab) ||
 	    !PyObject_CheckReadBuffer(code)) {
 		PyErr_BadInternalCall();
 		return NULL;
 	}
-	if (PyString_Check(name)) {
-		name = PyUnicode_FromString(PyString_AS_STRING(name));
-		if (name == NULL)
-			return NULL;
-	} else {
-		Py_INCREF(name);
-	}
+	Py_INCREF(name);
+	Py_INCREF(filename);
+
 	intern_strings(names);
 	intern_strings(varnames);
 	intern_strings(freevars);
@@ -299,8 +296,8 @@ code_repr(PyCodeObject *co)
 
 	if (co->co_firstlineno != 0)
 		lineno = co->co_firstlineno;
-	if (co->co_filename && PyString_Check(co->co_filename))
-		filename = PyString_AS_STRING(co->co_filename);
+	if (co->co_filename && PyUnicode_Check(co->co_filename))
+		filename = PyUnicode_AsString(co->co_filename);
 	return PyUnicode_FromFormat(
 	                "<code object %.100U at %p, file \"%.300s\", line %d>",
 	                co->co_name, co, filename, lineno);
diff --git a/Objects/moduleobject.c b/Objects/moduleobject.c
index fbb9fba..13c1ab4 100644
--- a/Objects/moduleobject.c
+++ b/Objects/moduleobject.c
@@ -86,12 +86,12 @@ PyModule_GetFilename(PyObject *m)
 	d = ((PyModuleObject *)m)->md_dict;
 	if (d == NULL ||
 	    (fileobj = PyDict_GetItemString(d, "__file__")) == NULL ||
-	    !PyString_Check(fileobj))
+	    !PyUnicode_Check(fileobj))
 	{
 		PyErr_SetString(PyExc_SystemError, "module filename missing");
 		return NULL;
 	}
-	return PyString_AsString(fileobj);
+	return PyUnicode_AsString(fileobj);
 }
 
 void
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index def9011..98723db 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -117,7 +117,11 @@ static PyUnicodeObject *unicode_latin1[256];
 
 /* Default encoding to use and assume when NULL is passed as encoding
    parameter; it is fixed to "utf-8".  Always use the
-   PyUnicode_GetDefaultEncoding() API to access this global. */
+   PyUnicode_GetDefaultEncoding() API to access this global.
+
+   Don't forget to alter Py_FileSystemDefaultEncoding() if you change the
+   hard coded default!
+*/
 static const char unicode_default_encoding[] = "utf-8";
 
 Py_UNICODE
@@ -1231,6 +1235,35 @@ PyObject *_PyUnicode_AsDefaultEncodedString(PyObject *unicode,
     return v;
 }
 
+PyObject*
+PyUnicode_DecodeFSDefault(const char *s)
+{
+    Py_ssize_t size = (Py_ssize_t)strlen(s);
+
+    /* During the early bootstrapping process, Py_FileSystemDefaultEncoding
+       can be undefined. If it is case, decode using UTF-8. The following assumes
+       that Py_FileSystemDefaultEncoding is set to a built-in encoding during the
+       bootstrapping process where the codecs aren't ready yet.
+    */
+    if (Py_FileSystemDefaultEncoding) {
+#if defined(MS_WINDOWS) && defined(HAVE_USABLE_WCHAR_T)
+        if (strcmp(Py_FileSystemDefaultEncoding, "mbcs")) {
+            return PyUnicode_DecodeMBCS(s, size, "replace");
+        }
+#elif defined(__APPLE__)
+        if (strcmp(Py_FileSystemDefaultEncoding, "utf-8")) {
+            return PyUnicode_DecodeUTF8(s, size, "replace");
+        }
+#endif
+        return PyUnicode_Decode(s, size,
+                                Py_FileSystemDefaultEncoding,
+                                "replace");
+    }
+    else {
+        return PyUnicode_DecodeUTF8(s, size, "replace");
+    }
+}
+
 char*
 PyUnicode_AsStringAndSize(PyObject *unicode, Py_ssize_t *psize)
 {
author	Guido van Rossum <guido@python.org>	2007-10-15 02:52:41 (GMT)
committer	Guido van Rossum <guido@python.org>	2007-10-15 02:52:41 (GMT)
commit	00bc0e0a2d0b6c403a3c6ab96fa7d3398b5c751e (patch)
tree	34fda27260f18f813912d83a2cf060264a736190 /Objects
parent	cdadf242ba32f1b3ef55e74d2eeb021e62da8041 (diff)
download	cpython-00bc0e0a2d0b6c403a3c6ab96fa7d3398b5c751e.zip cpython-00bc0e0a2d0b6c403a3c6ab96fa7d3398b5c751e.tar.gz cpython-00bc0e0a2d0b6c403a3c6ab96fa7d3398b5c751e.tar.bz2