summaryrefslogtreecommitdiffstats
path: root/Include
diff options
context:
space:
mode:
authorGuido van Rossum <guido@python.org>2007-10-15 02:52:41 (GMT)
committerGuido van Rossum <guido@python.org>2007-10-15 02:52:41 (GMT)
commit00bc0e0a2d0b6c403a3c6ab96fa7d3398b5c751e (patch)
tree34fda27260f18f813912d83a2cf060264a736190 /Include
parentcdadf242ba32f1b3ef55e74d2eeb021e62da8041 (diff)
downloadcpython-00bc0e0a2d0b6c403a3c6ab96fa7d3398b5c751e.zip
cpython-00bc0e0a2d0b6c403a3c6ab96fa7d3398b5c751e.tar.gz
cpython-00bc0e0a2d0b6c403a3c6ab96fa7d3398b5c751e.tar.bz2
Patch #1272, by Christian Heimes and Alexandre Vassalotti.
Changes to make __file__ a proper Unicode object, using the default filesystem encoding. This is a bit tricky because the default filesystem encoding isn't set by the time we import the first modules; at that point we fudge things a bit. This is okay since __file__ isn't really used much except for error reporting. Tested on OSX and Linux only so far.
Diffstat (limited to 'Include')
-rw-r--r--Include/code.h4
-rw-r--r--Include/unicodeobject.h16
2 files changed, 18 insertions, 2 deletions
diff --git a/Include/code.h b/Include/code.h
index 2bd6c5b..3f3df49 100644
--- a/Include/code.h
+++ b/Include/code.h
@@ -21,8 +21,8 @@ typedef struct {
PyObject *co_freevars; /* tuple of strings (free variable names) */
PyObject *co_cellvars; /* tuple of strings (cell variable names) */
/* The rest doesn't count for hash/cmp */
- PyObject *co_filename; /* string (where it was loaded from) */
- PyObject *co_name; /* string (name, for reference) */
+ PyObject *co_filename; /* unicode (where it was loaded from) */
+ PyObject *co_name; /* unicode (name, for reference) */
int co_firstlineno; /* first source line number */
PyObject *co_lnotab; /* string (encoding addr<->lineno mapping) */
void *co_zombieframe; /* for optimization only (see frameobject.c) */
diff --git a/Include/unicodeobject.h b/Include/unicodeobject.h
index 4374857..3ef354f 100644
--- a/Include/unicodeobject.h
+++ b/Include/unicodeobject.h
@@ -154,6 +154,7 @@ typedef PY_UNICODE_TYPE Py_UNICODE;
# define PyUnicode_DecodeASCII PyUnicodeUCS2_DecodeASCII
# define PyUnicode_DecodeCharmap PyUnicodeUCS2_DecodeCharmap
# define PyUnicode_DecodeLatin1 PyUnicodeUCS2_DecodeLatin1
+# define PyUnicode_DecodeFSDefault PyUnicodeUCS2_DecodeFSDefault
# define PyUnicode_DecodeRawUnicodeEscape PyUnicodeUCS2_DecodeRawUnicodeEscape
# define PyUnicode_DecodeUTF32 PyUnicodeUCS2_DecodeUTF32
# define PyUnicode_DecodeUTF32Stateful PyUnicodeUCS2_DecodeUTF32Stateful
@@ -245,6 +246,7 @@ typedef PY_UNICODE_TYPE Py_UNICODE;
# define PyUnicode_DecodeASCII PyUnicodeUCS4_DecodeASCII
# define PyUnicode_DecodeCharmap PyUnicodeUCS4_DecodeCharmap
# define PyUnicode_DecodeLatin1 PyUnicodeUCS4_DecodeLatin1
+# define PyUnicode_DecodeFSDefault PyUnicodeUCS4_DecodeFSDefault
# define PyUnicode_DecodeRawUnicodeEscape PyUnicodeUCS4_DecodeRawUnicodeEscape
# define PyUnicode_DecodeUTF32 PyUnicodeUCS4_DecodeUTF32
# define PyUnicode_DecodeUTF32Stateful PyUnicodeUCS4_DecodeUTF32Stateful
@@ -641,6 +643,20 @@ PyAPI_FUNC(PyObject*) PyUnicode_FromOrdinal(int ordinal);
PyAPI_FUNC(PyObject *) _PyUnicode_AsDefaultEncodedString(
PyObject *, const char *);
+/* Decode a null-terminated string using Py_FileSystemDefaultEncoding.
+
+ If the encoding is supported by one of the built-in codecs (i.e., UTF-8,
+ UTF-16, UTF-32, Latin-1 or MBCS), otherwise fallback to UTF-8 and replace
+ invalid characters with '?'.
+
+ The function is intended to be used for paths and file names only
+ during bootstrapping process where the codecs are not set up.
+*/
+
+PyAPI_FUNC(PyObject*) PyUnicode_DecodeFSDefault(
+ const char *s /* encoded string */
+ );
+
/* Return a char* holding the UTF-8 encoded value of the
Unicode object.