diff options
author | Marc-André Lemburg <mal@egenix.com> | 2008-06-06 12:18:17 (GMT) |
---|---|---|
committer | Marc-André Lemburg <mal@egenix.com> | 2008-06-06 12:18:17 (GMT) |
commit | b2750b5d334e9c8d262009069bce41c15803eca0 (patch) | |
tree | 2c501adf96b37d3afbc32e6fdc344fade85cf3d5 /Objects/unicodeobject.c | |
parent | 4efb518185d32d573ff65f11b94c6031340a018a (diff) | |
download | cpython-b2750b5d334e9c8d262009069bce41c15803eca0.zip cpython-b2750b5d334e9c8d262009069bce41c15803eca0.tar.gz cpython-b2750b5d334e9c8d262009069bce41c15803eca0.tar.bz2 |
Move the codec decode type checks to bytes/bytearray.decode().
Use faster PyUnicode_FromEncodedObject() for bytes/bytearray.decode().
Add new PyCodec_KnownEncoding() API.
Add new PyUnicode_AsDecodedUnicode() and PyUnicode_AsEncodedUnicode() APIs.
Add missing PyUnicode_AsDecodedObject() to unicodeobject.h
Fix punicode codec to also work on memoryviews.
Diffstat (limited to 'Objects/unicodeobject.c')
-rw-r--r-- | Objects/unicodeobject.c | 96 |
1 files changed, 89 insertions, 7 deletions
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 78e38b5..fc8c8a9 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -1099,14 +1099,18 @@ PyObject *PyUnicode_FromEncodedObject(register PyObject *obj, /* Coerce object */ if (PyBytes_Check(obj)) { - s = PyBytes_AS_STRING(obj); - len = PyBytes_GET_SIZE(obj); - } + s = PyBytes_AS_STRING(obj); + len = PyBytes_GET_SIZE(obj); + } + else if (PyByteArray_Check(obj)) { + s = PyByteArray_AS_STRING(obj); + len = PyByteArray_GET_SIZE(obj); + } else if (PyObject_AsCharBuffer(obj, &s, &len)) { /* Overwrite the error message with something more useful in case of a TypeError. */ if (PyErr_ExceptionMatches(PyExc_TypeError)) - PyErr_Format(PyExc_TypeError, + PyErr_Format(PyExc_TypeError, "coercing to Unicode: need string or buffer, " "%.80s found", Py_TYPE(obj)->tp_name); @@ -1188,7 +1192,7 @@ PyObject *PyUnicode_Decode(const char *s, goto onError; if (!PyUnicode_Check(unicode)) { PyErr_Format(PyExc_TypeError, - "decoder did not return an unicode object (type=%.400s)", + "decoder did not return a unicode object (type=%.400s)", Py_TYPE(unicode)->tp_name); Py_DECREF(unicode); goto onError; @@ -1225,6 +1229,37 @@ PyObject *PyUnicode_AsDecodedObject(PyObject *unicode, return NULL; } +PyObject *PyUnicode_AsDecodedUnicode(PyObject *unicode, + const char *encoding, + const char *errors) +{ + PyObject *v; + + if (!PyUnicode_Check(unicode)) { + PyErr_BadArgument(); + goto onError; + } + + if (encoding == NULL) + encoding = PyUnicode_GetDefaultEncoding(); + + /* Decode via the codec registry */ + v = PyCodec_Decode(unicode, encoding, errors); + if (v == NULL) + goto onError; + if (!PyUnicode_Check(v)) { + PyErr_Format(PyExc_TypeError, + "decoder did not return a unicode object (type=%.400s)", + Py_TYPE(v)->tp_name); + Py_DECREF(v); + goto onError; + } + return v; + + onError: + return NULL; +} + PyObject *PyUnicode_Encode(const Py_UNICODE *s, Py_ssize_t size, const char *encoding, @@ -1296,7 +1331,54 @@ PyObject *PyUnicode_AsEncodedString(PyObject *unicode, v = PyCodec_Encode(unicode, encoding, errors); if (v == NULL) goto onError; - assert(PyBytes_Check(v)); + if (PyByteArray_Check(v)) { + char msg[100]; + PyOS_snprintf(msg, sizeof(msg), + "encoder %s returned buffer instead of bytes", + encoding); + if (PyErr_WarnEx(PyExc_RuntimeWarning, msg, 1) < 0) { + v = NULL; + goto onError; + } + v = PyBytes_FromStringAndSize(PyByteArray_AS_STRING(v), Py_SIZE(v)); + } + else if (!PyBytes_Check(v)) { + PyErr_Format(PyExc_TypeError, + "encoder did not return a bytes object (type=%.400s)", + Py_TYPE(v)->tp_name); + v = NULL; + } + return v; + + onError: + return NULL; +} + +PyObject *PyUnicode_AsEncodedUnicode(PyObject *unicode, + const char *encoding, + const char *errors) +{ + PyObject *v; + + if (!PyUnicode_Check(unicode)) { + PyErr_BadArgument(); + goto onError; + } + + if (encoding == NULL) + encoding = PyUnicode_GetDefaultEncoding(); + + /* Encode via the codec registry */ + v = PyCodec_Encode(unicode, encoding, errors); + if (v == NULL) + goto onError; + if (!PyUnicode_Check(v)) { + PyErr_Format(PyExc_TypeError, + "encoder did not return an unicode object (type=%.400s)", + Py_TYPE(v)->tp_name); + Py_DECREF(v); + goto onError; + } return v; onError: @@ -6617,7 +6699,7 @@ unicode_encode(PyUnicodeObject *self, PyObject *args) if (!PyArg_ParseTuple(args, "|ss:encode", &encoding, &errors)) return NULL; - v = PyUnicode_AsEncodedObject((PyObject *)self, encoding, errors); + v = PyUnicode_AsEncodedString((PyObject *)self, encoding, errors); if (v == NULL) goto onError; if (!PyBytes_Check(v)) { |