summaryrefslogtreecommitdiffstats
path: root/Objects
diff options
context:
space:
mode:
authorGuido van Rossum <guido@python.org>2007-10-15 02:52:41 (GMT)
committerGuido van Rossum <guido@python.org>2007-10-15 02:52:41 (GMT)
commit00bc0e0a2d0b6c403a3c6ab96fa7d3398b5c751e (patch)
tree34fda27260f18f813912d83a2cf060264a736190 /Objects
parentcdadf242ba32f1b3ef55e74d2eeb021e62da8041 (diff)
downloadcpython-00bc0e0a2d0b6c403a3c6ab96fa7d3398b5c751e.zip
cpython-00bc0e0a2d0b6c403a3c6ab96fa7d3398b5c751e.tar.gz
cpython-00bc0e0a2d0b6c403a3c6ab96fa7d3398b5c751e.tar.bz2
Patch #1272, by Christian Heimes and Alexandre Vassalotti.
Changes to make __file__ a proper Unicode object, using the default filesystem encoding. This is a bit tricky because the default filesystem encoding isn't set by the time we import the first modules; at that point we fudge things a bit. This is okay since __file__ isn't really used much except for error reporting. Tested on OSX and Linux only so far.
Diffstat (limited to 'Objects')
-rw-r--r--Objects/codeobject.c19
-rw-r--r--Objects/moduleobject.c4
-rw-r--r--Objects/unicodeobject.c35
3 files changed, 44 insertions, 14 deletions
diff --git a/Objects/codeobject.c b/Objects/codeobject.c
index 7bd292a..7aeddcc 100644
--- a/Objects/codeobject.c
+++ b/Objects/codeobject.c
@@ -50,6 +50,7 @@ PyCode_New(int argcount, int kwonlyargcount,
{
PyCodeObject *co;
Py_ssize_t i;
+
/* Check argument types */
if (argcount < 0 || nlocals < 0 ||
code == NULL ||
@@ -58,20 +59,16 @@ PyCode_New(int argcount, int kwonlyargcount,
varnames == NULL || !PyTuple_Check(varnames) ||
freevars == NULL || !PyTuple_Check(freevars) ||
cellvars == NULL || !PyTuple_Check(cellvars) ||
- name == NULL || (!PyString_Check(name) && !PyUnicode_Check(name)) ||
- filename == NULL || !PyString_Check(filename) ||
+ name == NULL || !PyUnicode_Check(name) ||
+ filename == NULL || !PyUnicode_Check(filename) ||
lnotab == NULL || !PyString_Check(lnotab) ||
!PyObject_CheckReadBuffer(code)) {
PyErr_BadInternalCall();
return NULL;
}
- if (PyString_Check(name)) {
- name = PyUnicode_FromString(PyString_AS_STRING(name));
- if (name == NULL)
- return NULL;
- } else {
- Py_INCREF(name);
- }
+ Py_INCREF(name);
+ Py_INCREF(filename);
+
intern_strings(names);
intern_strings(varnames);
intern_strings(freevars);
@@ -299,8 +296,8 @@ code_repr(PyCodeObject *co)
if (co->co_firstlineno != 0)
lineno = co->co_firstlineno;
- if (co->co_filename && PyString_Check(co->co_filename))
- filename = PyString_AS_STRING(co->co_filename);
+ if (co->co_filename && PyUnicode_Check(co->co_filename))
+ filename = PyUnicode_AsString(co->co_filename);
return PyUnicode_FromFormat(
"<code object %.100U at %p, file \"%.300s\", line %d>",
co->co_name, co, filename, lineno);
diff --git a/Objects/moduleobject.c b/Objects/moduleobject.c
index fbb9fba..13c1ab4 100644
--- a/Objects/moduleobject.c
+++ b/Objects/moduleobject.c
@@ -86,12 +86,12 @@ PyModule_GetFilename(PyObject *m)
d = ((PyModuleObject *)m)->md_dict;
if (d == NULL ||
(fileobj = PyDict_GetItemString(d, "__file__")) == NULL ||
- !PyString_Check(fileobj))
+ !PyUnicode_Check(fileobj))
{
PyErr_SetString(PyExc_SystemError, "module filename missing");
return NULL;
}
- return PyString_AsString(fileobj);
+ return PyUnicode_AsString(fileobj);
}
void
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index def9011..98723db 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -117,7 +117,11 @@ static PyUnicodeObject *unicode_latin1[256];
/* Default encoding to use and assume when NULL is passed as encoding
parameter; it is fixed to "utf-8". Always use the
- PyUnicode_GetDefaultEncoding() API to access this global. */
+ PyUnicode_GetDefaultEncoding() API to access this global.
+
+ Don't forget to alter Py_FileSystemDefaultEncoding() if you change the
+ hard coded default!
+*/
static const char unicode_default_encoding[] = "utf-8";
Py_UNICODE
@@ -1231,6 +1235,35 @@ PyObject *_PyUnicode_AsDefaultEncodedString(PyObject *unicode,
return v;
}
+PyObject*
+PyUnicode_DecodeFSDefault(const char *s)
+{
+ Py_ssize_t size = (Py_ssize_t)strlen(s);
+
+ /* During the early bootstrapping process, Py_FileSystemDefaultEncoding
+ can be undefined. If it is case, decode using UTF-8. The following assumes
+ that Py_FileSystemDefaultEncoding is set to a built-in encoding during the
+ bootstrapping process where the codecs aren't ready yet.
+ */
+ if (Py_FileSystemDefaultEncoding) {
+#if defined(MS_WINDOWS) && defined(HAVE_USABLE_WCHAR_T)
+ if (strcmp(Py_FileSystemDefaultEncoding, "mbcs")) {
+ return PyUnicode_DecodeMBCS(s, size, "replace");
+ }
+#elif defined(__APPLE__)
+ if (strcmp(Py_FileSystemDefaultEncoding, "utf-8")) {
+ return PyUnicode_DecodeUTF8(s, size, "replace");
+ }
+#endif
+ return PyUnicode_Decode(s, size,
+ Py_FileSystemDefaultEncoding,
+ "replace");
+ }
+ else {
+ return PyUnicode_DecodeUTF8(s, size, "replace");
+ }
+}
+
char*
PyUnicode_AsStringAndSize(PyObject *unicode, Py_ssize_t *psize)
{