summaryrefslogtreecommitdiffstats
path: root/Python/codecs.c
diff options
context:
space:
mode:
authorMartin v. Löwis <martin@v.loewis.de>2009-05-05 04:43:17 (GMT)
committerMartin v. Löwis <martin@v.loewis.de>2009-05-05 04:43:17 (GMT)
commit011e8420339245f9b55d41082ec6036f2f83a182 (patch)
tree6e278775c41c1d50c62e3a42b960797813d245ef /Python/codecs.c
parent93f65a177b36396dddd1e2938cc037288a7eb400 (diff)
downloadcpython-011e8420339245f9b55d41082ec6036f2f83a182.zip
cpython-011e8420339245f9b55d41082ec6036f2f83a182.tar.gz
cpython-011e8420339245f9b55d41082ec6036f2f83a182.tar.bz2
Issue #5915: Implement PEP 383, Non-decodable Bytes in
System Character Interfaces.
Diffstat (limited to 'Python/codecs.c')
-rw-r--r--Python/codecs.c89
1 files changed, 89 insertions, 0 deletions
diff --git a/Python/codecs.c b/Python/codecs.c
index 633a24c..7e3ff8a 100644
--- a/Python/codecs.c
+++ b/Python/codecs.c
@@ -829,6 +829,82 @@ PyCodec_SurrogateErrors(PyObject *exc)
}
}
+static PyObject *
+PyCodec_UTF8bErrors(PyObject *exc)
+{
+ PyObject *restuple;
+ PyObject *object;
+ Py_ssize_t start;
+ Py_ssize_t end;
+ PyObject *res;
+ if (PyObject_IsInstance(exc, PyExc_UnicodeEncodeError)) {
+ Py_UNICODE *p;
+ Py_UNICODE *startp;
+ char *outp;
+ if (PyUnicodeEncodeError_GetStart(exc, &start))
+ return NULL;
+ if (PyUnicodeEncodeError_GetEnd(exc, &end))
+ return NULL;
+ if (!(object = PyUnicodeEncodeError_GetObject(exc)))
+ return NULL;
+ startp = PyUnicode_AS_UNICODE(object);
+ res = PyBytes_FromStringAndSize(NULL, end-start);
+ if (!res) {
+ Py_DECREF(object);
+ return NULL;
+ }
+ outp = PyBytes_AsString(res);
+ for (p = startp+start; p < startp+end; p++) {
+ Py_UNICODE ch = *p;
+ if (ch < 0xdc80 || ch > 0xdcff) {
+ /* Not a UTF-8b surrogate, fail with original exception */
+ PyErr_SetObject(PyExceptionInstance_Class(exc), exc);
+ Py_DECREF(res);
+ Py_DECREF(object);
+ return NULL;
+ }
+ *outp++ = ch - 0xdc00;
+ }
+ restuple = Py_BuildValue("(On)", res, end);
+ Py_DECREF(res);
+ Py_DECREF(object);
+ return restuple;
+ }
+ else if (PyObject_IsInstance(exc, PyExc_UnicodeDecodeError)) {
+ unsigned char *p;
+ Py_UNICODE ch[4]; /* decode up to 4 bad bytes. */
+ int consumed = 0;
+ if (PyUnicodeDecodeError_GetStart(exc, &start))
+ return NULL;
+ if (PyUnicodeDecodeError_GetEnd(exc, &end))
+ return NULL;
+ if (!(object = PyUnicodeDecodeError_GetObject(exc)))
+ return NULL;
+ if (!(p = (unsigned char*)PyBytes_AsString(object))) {
+ Py_DECREF(object);
+ return NULL;
+ }
+ while (consumed < 4 && consumed < end-start) {
+ /* Refuse to escape ASCII bytes. */
+ if (p[start+consumed] < 128)
+ break;
+ ch[consumed] = 0xdc00 + p[start+consumed];
+ consumed++;
+ }
+ Py_DECREF(object);
+ if (!consumed) {
+ /* codec complained about ASCII byte. */
+ PyErr_SetObject(PyExceptionInstance_Class(exc), exc);
+ return NULL;
+ }
+ return Py_BuildValue("(u#n)", ch, consumed, start+consumed);
+ }
+ else {
+ wrong_exception_type(exc);
+ return NULL;
+ }
+}
+
static PyObject *strict_errors(PyObject *self, PyObject *exc)
{
@@ -864,6 +940,11 @@ static PyObject *surrogates_errors(PyObject *self, PyObject *exc)
return PyCodec_SurrogateErrors(exc);
}
+static PyObject *utf8b_errors(PyObject *self, PyObject *exc)
+{
+ return PyCodec_UTF8bErrors(exc);
+}
+
static int _PyCodecRegistry_Init(void)
{
static struct {
@@ -918,6 +999,14 @@ static int _PyCodecRegistry_Init(void)
surrogates_errors,
METH_O
}
+ },
+ {
+ "utf8b",
+ {
+ "utf8b",
+ utf8b_errors,
+ METH_O
+ }
}
};