diff options
author | Martin v. Löwis <martin@v.loewis.de> | 2009-05-05 04:43:17 (GMT) |
---|---|---|
committer | Martin v. Löwis <martin@v.loewis.de> | 2009-05-05 04:43:17 (GMT) |
commit | 011e8420339245f9b55d41082ec6036f2f83a182 (patch) | |
tree | 6e278775c41c1d50c62e3a42b960797813d245ef /Python/codecs.c | |
parent | 93f65a177b36396dddd1e2938cc037288a7eb400 (diff) | |
download | cpython-011e8420339245f9b55d41082ec6036f2f83a182.zip cpython-011e8420339245f9b55d41082ec6036f2f83a182.tar.gz cpython-011e8420339245f9b55d41082ec6036f2f83a182.tar.bz2 |
Issue #5915: Implement PEP 383, Non-decodable Bytes in
System Character Interfaces.
Diffstat (limited to 'Python/codecs.c')
-rw-r--r-- | Python/codecs.c | 89 |
1 files changed, 89 insertions, 0 deletions
diff --git a/Python/codecs.c b/Python/codecs.c index 633a24c..7e3ff8a 100644 --- a/Python/codecs.c +++ b/Python/codecs.c @@ -829,6 +829,82 @@ PyCodec_SurrogateErrors(PyObject *exc) } } +static PyObject * +PyCodec_UTF8bErrors(PyObject *exc) +{ + PyObject *restuple; + PyObject *object; + Py_ssize_t start; + Py_ssize_t end; + PyObject *res; + if (PyObject_IsInstance(exc, PyExc_UnicodeEncodeError)) { + Py_UNICODE *p; + Py_UNICODE *startp; + char *outp; + if (PyUnicodeEncodeError_GetStart(exc, &start)) + return NULL; + if (PyUnicodeEncodeError_GetEnd(exc, &end)) + return NULL; + if (!(object = PyUnicodeEncodeError_GetObject(exc))) + return NULL; + startp = PyUnicode_AS_UNICODE(object); + res = PyBytes_FromStringAndSize(NULL, end-start); + if (!res) { + Py_DECREF(object); + return NULL; + } + outp = PyBytes_AsString(res); + for (p = startp+start; p < startp+end; p++) { + Py_UNICODE ch = *p; + if (ch < 0xdc80 || ch > 0xdcff) { + /* Not a UTF-8b surrogate, fail with original exception */ + PyErr_SetObject(PyExceptionInstance_Class(exc), exc); + Py_DECREF(res); + Py_DECREF(object); + return NULL; + } + *outp++ = ch - 0xdc00; + } + restuple = Py_BuildValue("(On)", res, end); + Py_DECREF(res); + Py_DECREF(object); + return restuple; + } + else if (PyObject_IsInstance(exc, PyExc_UnicodeDecodeError)) { + unsigned char *p; + Py_UNICODE ch[4]; /* decode up to 4 bad bytes. */ + int consumed = 0; + if (PyUnicodeDecodeError_GetStart(exc, &start)) + return NULL; + if (PyUnicodeDecodeError_GetEnd(exc, &end)) + return NULL; + if (!(object = PyUnicodeDecodeError_GetObject(exc))) + return NULL; + if (!(p = (unsigned char*)PyBytes_AsString(object))) { + Py_DECREF(object); + return NULL; + } + while (consumed < 4 && consumed < end-start) { + /* Refuse to escape ASCII bytes. */ + if (p[start+consumed] < 128) + break; + ch[consumed] = 0xdc00 + p[start+consumed]; + consumed++; + } + Py_DECREF(object); + if (!consumed) { + /* codec complained about ASCII byte. */ + PyErr_SetObject(PyExceptionInstance_Class(exc), exc); + return NULL; + } + return Py_BuildValue("(u#n)", ch, consumed, start+consumed); + } + else { + wrong_exception_type(exc); + return NULL; + } +} + static PyObject *strict_errors(PyObject *self, PyObject *exc) { @@ -864,6 +940,11 @@ static PyObject *surrogates_errors(PyObject *self, PyObject *exc) return PyCodec_SurrogateErrors(exc); } +static PyObject *utf8b_errors(PyObject *self, PyObject *exc) +{ + return PyCodec_UTF8bErrors(exc); +} + static int _PyCodecRegistry_Init(void) { static struct { @@ -918,6 +999,14 @@ static int _PyCodecRegistry_Init(void) surrogates_errors, METH_O } + }, + { + "utf8b", + { + "utf8b", + utf8b_errors, + METH_O + } } }; |