summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSerhiy Storchaka <storchaka@gmail.com>2014-05-15 11:37:42 (GMT)
committerSerhiy Storchaka <storchaka@gmail.com>2014-05-15 11:37:42 (GMT)
commit88d8fb6af603176b6e55766da067f84115e35406 (patch)
treee3c6f09e9117def635e418e83b7c46d349ac9d25
parent8e4efbe115b0062d08c394fbdb6b39bb39304fd3 (diff)
downloadcpython-88d8fb6af603176b6e55766da067f84115e35406.zip
cpython-88d8fb6af603176b6e55766da067f84115e35406.tar.gz
cpython-88d8fb6af603176b6e55766da067f84115e35406.tar.bz2
Issue #13916: Disallowed the surrogatepass error handler for non UTF-*
encodings.
-rw-r--r--Lib/test/test_codecs.py13
-rw-r--r--Misc/NEWS3
-rw-r--r--Python/codecs.c23
3 files changed, 35 insertions, 4 deletions
diff --git a/Lib/test/test_codecs.py b/Lib/test/test_codecs.py
index 7459010..e4d7a60 100644
--- a/Lib/test/test_codecs.py
+++ b/Lib/test/test_codecs.py
@@ -2807,6 +2807,9 @@ class CodePageTest(unittest.TestCase):
('[\u20ac]', 'replace', b'[?]'),
('[\xff]', 'backslashreplace', b'[\\xff]'),
('[\xff]', 'xmlcharrefreplace', b'[&#255;]'),
+ ('\udcff', 'strict', None),
+ ('[\udcff]', 'surrogateescape', b'[\xff]'),
+ ('[\udcff]', 'surrogatepass', None),
))
self.check_decode(932, (
(b'abc', 'strict', 'abc'),
@@ -2816,6 +2819,7 @@ class CodePageTest(unittest.TestCase):
(b'[\xff]', 'ignore', '[]'),
(b'[\xff]', 'replace', '[\ufffd]'),
(b'[\xff]', 'surrogateescape', '[\udcff]'),
+ (b'[\xff]', 'surrogatepass', None),
(b'\x81\x00abc', 'strict', None),
(b'\x81\x00abc', 'ignore', '\x00abc'),
(b'\x81\x00abc', 'replace', '\ufffd\x00abc'),
@@ -2826,14 +2830,23 @@ class CodePageTest(unittest.TestCase):
('abc', 'strict', b'abc'),
('\xe9\u20ac', 'strict', b'\xe9\x80'),
('\xff', 'strict', b'\xff'),
+ # test error handlers
('\u0141', 'strict', None),
('\u0141', 'ignore', b''),
('\u0141', 'replace', b'L'),
+ ('\udc98', 'surrogateescape', b'\x98'),
+ ('\udc98', 'surrogatepass', None),
))
self.check_decode(1252, (
(b'abc', 'strict', 'abc'),
(b'\xe9\x80', 'strict', '\xe9\u20ac'),
(b'\xff', 'strict', '\xff'),
+ # invalid bytes
+ (b'[\x98]', 'strict', None),
+ (b'[\x98]', 'ignore', '[]'),
+ (b'[\x98]', 'replace', '[\ufffd]'),
+ (b'[\x98]', 'surrogateescape', '[\udc98]'),
+ (b'[\x98]', 'surrogatepass', None),
))
def test_cp_utf7(self):
diff --git a/Misc/NEWS b/Misc/NEWS
index 2412c48..7c42136 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -84,6 +84,9 @@ Core and Builtins
Library
-------
+- Issue #13916: Disallowed the surrogatepass error handler for non UTF-*
+ encodings.
+
- Issue #20998: Fixed re.fullmatch() of repeated single character pattern
with ignore case. Original patch by Matthew Barnett.
diff --git a/Python/codecs.c b/Python/codecs.c
index e06d6e0..7003136 100644
--- a/Python/codecs.c
+++ b/Python/codecs.c
@@ -901,6 +901,7 @@ PyObject *PyCodec_BackslashReplaceErrors(PyObject *exc)
}
}
+#define ENC_UNKNOWN -1
#define ENC_UTF8 0
#define ENC_UTF16BE 1
#define ENC_UTF16LE 2
@@ -916,7 +917,11 @@ get_standard_encoding(const char *encoding, int *bytelength)
encoding += 3;
if (*encoding == '-' || *encoding == '_' )
encoding++;
- if (encoding[0] == '1' && encoding[1] == '6') {
+ if (encoding[0] == '8' && encoding[1] == '\0') {
+ *bytelength = 3;
+ return ENC_UTF8;
+ }
+ else if (encoding[0] == '1' && encoding[1] == '6') {
encoding += 2;
*bytelength = 2;
if (*encoding == '\0') {
@@ -955,9 +960,7 @@ get_standard_encoding(const char *encoding, int *bytelength)
}
}
}
- /* utf-8 */
- *bytelength = 3;
- return ENC_UTF8;
+ return ENC_UNKNOWN;
}
/* This handler is declared static until someone demonstrates
@@ -994,6 +997,12 @@ PyCodec_SurrogatePassErrors(PyObject *exc)
}
code = get_standard_encoding(encoding, &bytelength);
Py_DECREF(encode);
+ if (code == ENC_UNKNOWN) {
+ /* Not supported, fail with original exception */
+ PyErr_SetObject(PyExceptionInstance_Class(exc), exc);
+ Py_DECREF(object);
+ return NULL;
+ }
res = PyBytes_FromStringAndSize(NULL, bytelength*(end-start));
if (!res) {
@@ -1068,6 +1077,12 @@ PyCodec_SurrogatePassErrors(PyObject *exc)
}
code = get_standard_encoding(encoding, &bytelength);
Py_DECREF(encode);
+ if (code == ENC_UNKNOWN) {
+ /* Not supported, fail with original exception */
+ PyErr_SetObject(PyExceptionInstance_Class(exc), exc);
+ Py_DECREF(object);
+ return NULL;
+ }
/* Try decoding a single surrogate character. If
there are more, let the codec call us again. */