diff options
author | Victor Stinner <victor.stinner@gmail.com> | 2018-01-10 21:46:15 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2018-01-10 21:46:15 (GMT) |
commit | 2cba6b85797ba60d67389126f184aad5c9e02ff3 (patch) | |
tree | 5cc0972b12e1c85e58c4ff57edc312882f107ff1 /Python | |
parent | f80c0ca13330112fe4d8018609c085ef556cb5bf (diff) | |
download | cpython-2cba6b85797ba60d67389126f184aad5c9e02ff3.zip cpython-2cba6b85797ba60d67389126f184aad5c9e02ff3.tar.gz cpython-2cba6b85797ba60d67389126f184aad5c9e02ff3.tar.bz2 |
bpo-29240: readline now ignores the UTF-8 Mode (#5145)
Add new fuctions ignoring the UTF-8 mode:
* _Py_DecodeCurrentLocale()
* _Py_EncodeCurrentLocale()
* _PyUnicode_DecodeCurrentLocaleAndSize()
* _PyUnicode_EncodeCurrentLocale()
Modify the readline module to use these functions.
Re-enable test_readline.test_nonascii().
Diffstat (limited to 'Python')
-rw-r--r-- | Python/fileutils.c | 80 |
1 files changed, 52 insertions, 28 deletions
diff --git a/Python/fileutils.c b/Python/fileutils.c index 645a179..9275494 100644 --- a/Python/fileutils.c +++ b/Python/fileutils.c @@ -263,7 +263,7 @@ decode_ascii_surrogateescape(const char *arg, size_t *size) #if !defined(__APPLE__) && !defined(__ANDROID__) static wchar_t* -decode_locale(const char* arg, size_t *size) +decode_current_locale(const char* arg, size_t *size) { wchar_t *res; size_t argsize; @@ -380,32 +380,13 @@ oom: #endif -/* Decode a byte string from the locale encoding with the - surrogateescape error handler: undecodable bytes are decoded as characters - in range U+DC80..U+DCFF. If a byte sequence can be decoded as a surrogate - character, escape the bytes using the surrogateescape error handler instead - of decoding them. - - Return a pointer to a newly allocated wide character string, use - PyMem_RawFree() to free the memory. If size is not NULL, write the number of - wide characters excluding the null character into *size - - Return NULL on decoding error or memory allocation error. If *size* is not - NULL, *size is set to (size_t)-1 on memory error or set to (size_t)-2 on - decoding error. - - Decoding errors should never happen, unless there is a bug in the C - library. - - Use the Py_EncodeLocale() function to encode the character string back to a - byte string. */ -wchar_t* -Py_DecodeLocale(const char* arg, size_t *size) +static wchar_t* +decode_locale(const char* arg, size_t *size, int ignore_utf8_mode) { #if defined(__APPLE__) || defined(__ANDROID__) return _Py_DecodeUTF8_surrogateescape(arg, strlen(arg), size); #else - if (Py_UTF8Mode == 1) { + if (!ignore_utf8_mode && Py_UTF8Mode == 1) { return _Py_DecodeUTF8_surrogateescape(arg, strlen(arg), size); } @@ -426,11 +407,45 @@ Py_DecodeLocale(const char* arg, size_t *size) } #endif - return decode_locale(arg, size); + return decode_current_locale(arg, size); #endif /* __APPLE__ or __ANDROID__ */ } +/* Decode a byte string from the locale encoding with the + surrogateescape error handler: undecodable bytes are decoded as characters + in range U+DC80..U+DCFF. If a byte sequence can be decoded as a surrogate + character, escape the bytes using the surrogateescape error handler instead + of decoding them. + + Return a pointer to a newly allocated wide character string, use + PyMem_RawFree() to free the memory. If size is not NULL, write the number of + wide characters excluding the null character into *size + + Return NULL on decoding error or memory allocation error. If *size* is not + NULL, *size is set to (size_t)-1 on memory error or set to (size_t)-2 on + decoding error. + + Decoding errors should never happen, unless there is a bug in the C + library. + + Use the Py_EncodeLocale() function to encode the character string back to a + byte string. */ +wchar_t* +Py_DecodeLocale(const char* arg, size_t *size) +{ + return decode_locale(arg, size, 0); +} + + +/* Similar to Py_DecodeLocale() but ignore the UTF-8 mode */ +wchar_t* +_Py_DecodeCurrentLocale(const char* arg, size_t *size) +{ + return decode_locale(arg, size, 1); +} + + #if !defined(__APPLE__) && !defined(__ANDROID__) static char* encode_current_locale(const wchar_t *text, size_t *error_pos, int raw_malloc) @@ -508,12 +523,13 @@ encode_current_locale(const wchar_t *text, size_t *error_pos, int raw_malloc) #endif static char* -encode_locale(const wchar_t *text, size_t *error_pos, int raw_malloc) +encode_locale(const wchar_t *text, size_t *error_pos, + int raw_malloc, int ignore_utf8_mode) { #if defined(__APPLE__) || defined(__ANDROID__) return _Py_EncodeUTF8_surrogateescape(text, error_pos, raw_malloc); #else /* __APPLE__ */ - if (Py_UTF8Mode == 1) { + if (!ignore_utf8_mode && Py_UTF8Mode == 1) { return _Py_EncodeUTF8_surrogateescape(text, error_pos, raw_malloc); } @@ -544,7 +560,7 @@ encode_locale(const wchar_t *text, size_t *error_pos, int raw_malloc) char* Py_EncodeLocale(const wchar_t *text, size_t *error_pos) { - return encode_locale(text, error_pos, 0); + return encode_locale(text, error_pos, 0, 0); } @@ -553,7 +569,15 @@ Py_EncodeLocale(const wchar_t *text, size_t *error_pos) char* _Py_EncodeLocaleRaw(const wchar_t *text, size_t *error_pos) { - return encode_locale(text, error_pos, 1); + return encode_locale(text, error_pos, 1, 0); +} + + +/* Similar to _Py_EncodeLocaleRaw() but ignore the UTF-8 Mode */ +char* +_Py_EncodeCurrentLocale(const wchar_t *text, size_t *error_pos) +{ + return encode_locale(text, error_pos, 1, 1); } |