diff options
author | Guido van Rossum <guido@python.org> | 2007-10-15 02:52:41 (GMT) |
---|---|---|
committer | Guido van Rossum <guido@python.org> | 2007-10-15 02:52:41 (GMT) |
commit | 00bc0e0a2d0b6c403a3c6ab96fa7d3398b5c751e (patch) | |
tree | 34fda27260f18f813912d83a2cf060264a736190 /Include | |
parent | cdadf242ba32f1b3ef55e74d2eeb021e62da8041 (diff) | |
download | cpython-00bc0e0a2d0b6c403a3c6ab96fa7d3398b5c751e.zip cpython-00bc0e0a2d0b6c403a3c6ab96fa7d3398b5c751e.tar.gz cpython-00bc0e0a2d0b6c403a3c6ab96fa7d3398b5c751e.tar.bz2 |
Patch #1272, by Christian Heimes and Alexandre Vassalotti.
Changes to make __file__ a proper Unicode object, using the default
filesystem encoding.
This is a bit tricky because the default filesystem encoding isn't
set by the time we import the first modules; at that point we fudge
things a bit. This is okay since __file__ isn't really used much
except for error reporting.
Tested on OSX and Linux only so far.
Diffstat (limited to 'Include')
-rw-r--r-- | Include/code.h | 4 | ||||
-rw-r--r-- | Include/unicodeobject.h | 16 |
2 files changed, 18 insertions, 2 deletions
diff --git a/Include/code.h b/Include/code.h index 2bd6c5b..3f3df49 100644 --- a/Include/code.h +++ b/Include/code.h @@ -21,8 +21,8 @@ typedef struct { PyObject *co_freevars; /* tuple of strings (free variable names) */ PyObject *co_cellvars; /* tuple of strings (cell variable names) */ /* The rest doesn't count for hash/cmp */ - PyObject *co_filename; /* string (where it was loaded from) */ - PyObject *co_name; /* string (name, for reference) */ + PyObject *co_filename; /* unicode (where it was loaded from) */ + PyObject *co_name; /* unicode (name, for reference) */ int co_firstlineno; /* first source line number */ PyObject *co_lnotab; /* string (encoding addr<->lineno mapping) */ void *co_zombieframe; /* for optimization only (see frameobject.c) */ diff --git a/Include/unicodeobject.h b/Include/unicodeobject.h index 4374857..3ef354f 100644 --- a/Include/unicodeobject.h +++ b/Include/unicodeobject.h @@ -154,6 +154,7 @@ typedef PY_UNICODE_TYPE Py_UNICODE; # define PyUnicode_DecodeASCII PyUnicodeUCS2_DecodeASCII # define PyUnicode_DecodeCharmap PyUnicodeUCS2_DecodeCharmap # define PyUnicode_DecodeLatin1 PyUnicodeUCS2_DecodeLatin1 +# define PyUnicode_DecodeFSDefault PyUnicodeUCS2_DecodeFSDefault # define PyUnicode_DecodeRawUnicodeEscape PyUnicodeUCS2_DecodeRawUnicodeEscape # define PyUnicode_DecodeUTF32 PyUnicodeUCS2_DecodeUTF32 # define PyUnicode_DecodeUTF32Stateful PyUnicodeUCS2_DecodeUTF32Stateful @@ -245,6 +246,7 @@ typedef PY_UNICODE_TYPE Py_UNICODE; # define PyUnicode_DecodeASCII PyUnicodeUCS4_DecodeASCII # define PyUnicode_DecodeCharmap PyUnicodeUCS4_DecodeCharmap # define PyUnicode_DecodeLatin1 PyUnicodeUCS4_DecodeLatin1 +# define PyUnicode_DecodeFSDefault PyUnicodeUCS4_DecodeFSDefault # define PyUnicode_DecodeRawUnicodeEscape PyUnicodeUCS4_DecodeRawUnicodeEscape # define PyUnicode_DecodeUTF32 PyUnicodeUCS4_DecodeUTF32 # define PyUnicode_DecodeUTF32Stateful PyUnicodeUCS4_DecodeUTF32Stateful @@ -641,6 +643,20 @@ PyAPI_FUNC(PyObject*) PyUnicode_FromOrdinal(int ordinal); PyAPI_FUNC(PyObject *) _PyUnicode_AsDefaultEncodedString( PyObject *, const char *); +/* Decode a null-terminated string using Py_FileSystemDefaultEncoding. + + If the encoding is supported by one of the built-in codecs (i.e., UTF-8, + UTF-16, UTF-32, Latin-1 or MBCS), otherwise fallback to UTF-8 and replace + invalid characters with '?'. + + The function is intended to be used for paths and file names only + during bootstrapping process where the codecs are not set up. +*/ + +PyAPI_FUNC(PyObject*) PyUnicode_DecodeFSDefault( + const char *s /* encoded string */ + ); + /* Return a char* holding the UTF-8 encoded value of the Unicode object. |