summaryrefslogtreecommitdiffstats
path: root/Objects/unicodeobject.c
diff options
context:
space:
mode:
authorMarc-André Lemburg <mal@egenix.com>2008-06-06 12:18:17 (GMT)
committerMarc-André Lemburg <mal@egenix.com>2008-06-06 12:18:17 (GMT)
commitb2750b5d334e9c8d262009069bce41c15803eca0 (patch)
tree2c501adf96b37d3afbc32e6fdc344fade85cf3d5 /Objects/unicodeobject.c
parent4efb518185d32d573ff65f11b94c6031340a018a (diff)
downloadcpython-b2750b5d334e9c8d262009069bce41c15803eca0.zip
cpython-b2750b5d334e9c8d262009069bce41c15803eca0.tar.gz
cpython-b2750b5d334e9c8d262009069bce41c15803eca0.tar.bz2
Move the codec decode type checks to bytes/bytearray.decode().
Use faster PyUnicode_FromEncodedObject() for bytes/bytearray.decode(). Add new PyCodec_KnownEncoding() API. Add new PyUnicode_AsDecodedUnicode() and PyUnicode_AsEncodedUnicode() APIs. Add missing PyUnicode_AsDecodedObject() to unicodeobject.h Fix punicode codec to also work on memoryviews.
Diffstat (limited to 'Objects/unicodeobject.c')
-rw-r--r--Objects/unicodeobject.c96
1 files changed, 89 insertions, 7 deletions
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index 78e38b5..fc8c8a9 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -1099,14 +1099,18 @@ PyObject *PyUnicode_FromEncodedObject(register PyObject *obj,
/* Coerce object */
if (PyBytes_Check(obj)) {
- s = PyBytes_AS_STRING(obj);
- len = PyBytes_GET_SIZE(obj);
- }
+ s = PyBytes_AS_STRING(obj);
+ len = PyBytes_GET_SIZE(obj);
+ }
+ else if (PyByteArray_Check(obj)) {
+ s = PyByteArray_AS_STRING(obj);
+ len = PyByteArray_GET_SIZE(obj);
+ }
else if (PyObject_AsCharBuffer(obj, &s, &len)) {
/* Overwrite the error message with something more useful in
case of a TypeError. */
if (PyErr_ExceptionMatches(PyExc_TypeError))
- PyErr_Format(PyExc_TypeError,
+ PyErr_Format(PyExc_TypeError,
"coercing to Unicode: need string or buffer, "
"%.80s found",
Py_TYPE(obj)->tp_name);
@@ -1188,7 +1192,7 @@ PyObject *PyUnicode_Decode(const char *s,
goto onError;
if (!PyUnicode_Check(unicode)) {
PyErr_Format(PyExc_TypeError,
- "decoder did not return an unicode object (type=%.400s)",
+ "decoder did not return a unicode object (type=%.400s)",
Py_TYPE(unicode)->tp_name);
Py_DECREF(unicode);
goto onError;
@@ -1225,6 +1229,37 @@ PyObject *PyUnicode_AsDecodedObject(PyObject *unicode,
return NULL;
}
+PyObject *PyUnicode_AsDecodedUnicode(PyObject *unicode,
+ const char *encoding,
+ const char *errors)
+{
+ PyObject *v;
+
+ if (!PyUnicode_Check(unicode)) {
+ PyErr_BadArgument();
+ goto onError;
+ }
+
+ if (encoding == NULL)
+ encoding = PyUnicode_GetDefaultEncoding();
+
+ /* Decode via the codec registry */
+ v = PyCodec_Decode(unicode, encoding, errors);
+ if (v == NULL)
+ goto onError;
+ if (!PyUnicode_Check(v)) {
+ PyErr_Format(PyExc_TypeError,
+ "decoder did not return a unicode object (type=%.400s)",
+ Py_TYPE(v)->tp_name);
+ Py_DECREF(v);
+ goto onError;
+ }
+ return v;
+
+ onError:
+ return NULL;
+}
+
PyObject *PyUnicode_Encode(const Py_UNICODE *s,
Py_ssize_t size,
const char *encoding,
@@ -1296,7 +1331,54 @@ PyObject *PyUnicode_AsEncodedString(PyObject *unicode,
v = PyCodec_Encode(unicode, encoding, errors);
if (v == NULL)
goto onError;
- assert(PyBytes_Check(v));
+ if (PyByteArray_Check(v)) {
+ char msg[100];
+ PyOS_snprintf(msg, sizeof(msg),
+ "encoder %s returned buffer instead of bytes",
+ encoding);
+ if (PyErr_WarnEx(PyExc_RuntimeWarning, msg, 1) < 0) {
+ v = NULL;
+ goto onError;
+ }
+ v = PyBytes_FromStringAndSize(PyByteArray_AS_STRING(v), Py_SIZE(v));
+ }
+ else if (!PyBytes_Check(v)) {
+ PyErr_Format(PyExc_TypeError,
+ "encoder did not return a bytes object (type=%.400s)",
+ Py_TYPE(v)->tp_name);
+ v = NULL;
+ }
+ return v;
+
+ onError:
+ return NULL;
+}
+
+PyObject *PyUnicode_AsEncodedUnicode(PyObject *unicode,
+ const char *encoding,
+ const char *errors)
+{
+ PyObject *v;
+
+ if (!PyUnicode_Check(unicode)) {
+ PyErr_BadArgument();
+ goto onError;
+ }
+
+ if (encoding == NULL)
+ encoding = PyUnicode_GetDefaultEncoding();
+
+ /* Encode via the codec registry */
+ v = PyCodec_Encode(unicode, encoding, errors);
+ if (v == NULL)
+ goto onError;
+ if (!PyUnicode_Check(v)) {
+ PyErr_Format(PyExc_TypeError,
+ "encoder did not return an unicode object (type=%.400s)",
+ Py_TYPE(v)->tp_name);
+ Py_DECREF(v);
+ goto onError;
+ }
return v;
onError:
@@ -6617,7 +6699,7 @@ unicode_encode(PyUnicodeObject *self, PyObject *args)
if (!PyArg_ParseTuple(args, "|ss:encode", &encoding, &errors))
return NULL;
- v = PyUnicode_AsEncodedObject((PyObject *)self, encoding, errors);
+ v = PyUnicode_AsEncodedString((PyObject *)self, encoding, errors);
if (v == NULL)
goto onError;
if (!PyBytes_Check(v)) {