bpo-29240: readline now ignores the UTF-8 Mode (#5145)

Add new fuctions ignoring the UTF-8 mode: * _Py_DecodeCurrentLocale() * _Py_EncodeCurrentLocale() * _PyUnicode_DecodeCurrentLocaleAndSize() * _PyUnicode_EncodeCurrentLocale() Modify the readline module to use these functions. Re-enable test_readline.test_nonascii().
author: Victor Stinner <victor.stinner@gmail.com> 2018-01-10 21:46:15 (GMT)
committer: GitHub <noreply@github.com> 2018-01-10 21:46:15 (GMT)
commit: 2cba6b85797ba60d67389126f184aad5c9e02ff3 (patch)
tree: 5cc0972b12e1c85e58c4ff57edc312882f107ff1 /Python
parent: f80c0ca13330112fe4d8018609c085ef556cb5bf (diff)
download: cpython-2cba6b85797ba60d67389126f184aad5c9e02ff3.zip
cpython-2cba6b85797ba60d67389126f184aad5c9e02ff3.tar.gz
cpython-2cba6b85797ba60d67389126f184aad5c9e02ff3.tar.bz2
1 files changed, 52 insertions, 28 deletions
diff --git a/Python/fileutils.c b/Python/fileutils.c
index 645a179..9275494 100644
--- a/Python/fileutils.c
+++ b/Python/fileutils.c
@@ -263,7 +263,7 @@ decode_ascii_surrogateescape(const char *arg, size_t *size)
 
 #if !defined(__APPLE__) && !defined(__ANDROID__)
 static wchar_t*
-decode_locale(const char* arg, size_t *size)
+decode_current_locale(const char* arg, size_t *size)
 {
     wchar_t *res;
     size_t argsize;
@@ -380,32 +380,13 @@ oom:
 #endif
 
 
-/* Decode a byte string from the locale encoding with the
-   surrogateescape error handler: undecodable bytes are decoded as characters
-   in range U+DC80..U+DCFF. If a byte sequence can be decoded as a surrogate
-   character, escape the bytes using the surrogateescape error handler instead
-   of decoding them.
-
-   Return a pointer to a newly allocated wide character string, use
-   PyMem_RawFree() to free the memory. If size is not NULL, write the number of
-   wide characters excluding the null character into *size
-
-   Return NULL on decoding error or memory allocation error. If *size* is not
-   NULL, *size is set to (size_t)-1 on memory error or set to (size_t)-2 on
-   decoding error.
-
-   Decoding errors should never happen, unless there is a bug in the C
-   library.
-
-   Use the Py_EncodeLocale() function to encode the character string back to a
-   byte string. */
-wchar_t*
-Py_DecodeLocale(const char* arg, size_t *size)
+static wchar_t*
+decode_locale(const char* arg, size_t *size, int ignore_utf8_mode)
 {
 #if defined(__APPLE__) || defined(__ANDROID__)
     return _Py_DecodeUTF8_surrogateescape(arg, strlen(arg), size);
 #else
-    if (Py_UTF8Mode == 1) {
+    if (!ignore_utf8_mode && Py_UTF8Mode == 1) {
         return _Py_DecodeUTF8_surrogateescape(arg, strlen(arg), size);
     }
 
@@ -426,11 +407,45 @@ Py_DecodeLocale(const char* arg, size_t *size)
     }
 #endif
 
-    return decode_locale(arg, size);
+    return decode_current_locale(arg, size);
 #endif   /* __APPLE__ or __ANDROID__ */
 }
 
 
+/* Decode a byte string from the locale encoding with the
+   surrogateescape error handler: undecodable bytes are decoded as characters
+   in range U+DC80..U+DCFF. If a byte sequence can be decoded as a surrogate
+   character, escape the bytes using the surrogateescape error handler instead
+   of decoding them.
+
+   Return a pointer to a newly allocated wide character string, use
+   PyMem_RawFree() to free the memory. If size is not NULL, write the number of
+   wide characters excluding the null character into *size
+
+   Return NULL on decoding error or memory allocation error. If *size* is not
+   NULL, *size is set to (size_t)-1 on memory error or set to (size_t)-2 on
+   decoding error.
+
+   Decoding errors should never happen, unless there is a bug in the C
+   library.
+
+   Use the Py_EncodeLocale() function to encode the character string back to a
+   byte string. */
+wchar_t*
+Py_DecodeLocale(const char* arg, size_t *size)
+{
+    return decode_locale(arg, size, 0);
+}
+
+
+/* Similar to Py_DecodeLocale() but ignore the UTF-8 mode */
+wchar_t*
+_Py_DecodeCurrentLocale(const char* arg, size_t *size)
+{
+    return decode_locale(arg, size, 1);
+}
+
+
 #if !defined(__APPLE__) && !defined(__ANDROID__)
 static char*
 encode_current_locale(const wchar_t *text, size_t *error_pos, int raw_malloc)
@@ -508,12 +523,13 @@ encode_current_locale(const wchar_t *text, size_t *error_pos, int raw_malloc)
 #endif
 
 static char*
-encode_locale(const wchar_t *text, size_t *error_pos, int raw_malloc)
+encode_locale(const wchar_t *text, size_t *error_pos,
+              int raw_malloc, int ignore_utf8_mode)
 {
 #if defined(__APPLE__) || defined(__ANDROID__)
     return _Py_EncodeUTF8_surrogateescape(text, error_pos, raw_malloc);
 #else   /* __APPLE__ */
-    if (Py_UTF8Mode == 1) {
+    if (!ignore_utf8_mode && Py_UTF8Mode == 1) {
         return _Py_EncodeUTF8_surrogateescape(text, error_pos, raw_malloc);
     }
 
@@ -544,7 +560,7 @@ encode_locale(const wchar_t *text, size_t *error_pos, int raw_malloc)
 char*
 Py_EncodeLocale(const wchar_t *text, size_t *error_pos)
 {
-    return encode_locale(text, error_pos, 0);
+    return encode_locale(text, error_pos, 0, 0);
 }
 
 
@@ -553,7 +569,15 @@ Py_EncodeLocale(const wchar_t *text, size_t *error_pos)
 char*
 _Py_EncodeLocaleRaw(const wchar_t *text, size_t *error_pos)
 {
-    return encode_locale(text, error_pos, 1);
+    return encode_locale(text, error_pos, 1, 0);
+}
+
+
+/* Similar to _Py_EncodeLocaleRaw() but ignore the UTF-8 Mode */
+char*
+_Py_EncodeCurrentLocale(const wchar_t *text, size_t *error_pos)
+{
+    return encode_locale(text, error_pos, 1, 1);
 }
author	Victor Stinner <victor.stinner@gmail.com>	2018-01-10 21:46:15 (GMT)
committer	GitHub <noreply@github.com>	2018-01-10 21:46:15 (GMT)
commit	2cba6b85797ba60d67389126f184aad5c9e02ff3 (patch)
tree	5cc0972b12e1c85e58c4ff57edc312882f107ff1 /Python
parent	f80c0ca13330112fe4d8018609c085ef556cb5bf (diff)
download	cpython-2cba6b85797ba60d67389126f184aad5c9e02ff3.zip cpython-2cba6b85797ba60d67389126f184aad5c9e02ff3.tar.gz cpython-2cba6b85797ba60d67389126f184aad5c9e02ff3.tar.bz2