Optimize built-in unicode codecs by avoiding unnecessary copying.

The approach used is similiar to what is currently used in the version of unicodeobject.c in Python 2.x. The only difference is we use _PyBytes_Resize instead of _PyString_Resize.
author: Alexandre Vassalotti <alexandre@peadrop.com> 2008-12-27 09:16:49 (GMT)
committer: Alexandre Vassalotti <alexandre@peadrop.com> 2008-12-27 09:16:49 (GMT)
commit: 44531cb2dbb7d752b98b2b8195f759a559973612 (patch)
tree: afb50e4a83bc0b87475f3a5c40707e5a2c8e1fb3
parent: 9cb6f7f7a5afd587820ca378ad1129427f25b58f (diff)
download: cpython-44531cb2dbb7d752b98b2b8195f759a559973612.zip
cpython-44531cb2dbb7d752b98b2b8195f759a559973612.tar.gz
cpython-44531cb2dbb7d752b98b2b8195f759a559973612.tar.bz2
1 files changed, 58 insertions, 50 deletions
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index e57b60c..fa74631 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -1873,7 +1873,7 @@ PyObject *PyUnicode_EncodeUTF7(const Py_UNICODE *s,
                    int encodeWhiteSpace,
                    const char *errors)
 {
-    PyObject *v, *result;
+    PyObject *v;
     /* It might be possible to tighten this worst case */
     Py_ssize_t cbAllocated = 5 * size;
     int inShift = 0;
@@ -1889,11 +1889,11 @@ PyObject *PyUnicode_EncodeUTF7(const Py_UNICODE *s,
     if (cbAllocated / 5 != size)
         return PyErr_NoMemory();
 
-    v = PyByteArray_FromStringAndSize(NULL, cbAllocated);
+    v = PyBytes_FromStringAndSize(NULL, cbAllocated);
     if (v == NULL)
         return NULL;
 
-    start = out = PyByteArray_AS_STRING(v);
+    start = out = PyBytes_AS_STRING(v);
     for (;i < size; ++i) {
         Py_UNICODE ch = s[i];
 
@@ -1958,10 +1958,9 @@ PyObject *PyUnicode_EncodeUTF7(const Py_UNICODE *s,
         *out++= B64(charsleft << (6-bitsleft) );
         *out++ = '-';
     }
-
-    result = PyBytes_FromStringAndSize(PyByteArray_AS_STRING(v), out - start);
-    Py_DECREF(v);
-    return result;
+    if (_PyBytes_Resize(&v, out - start) < 0)
+        return NULL;
+    return v;
 }
 
 #undef SPECIAL
@@ -2479,7 +2478,7 @@ PyUnicode_EncodeUTF32(const Py_UNICODE *s,
 		      const char *errors,
 		      int byteorder)
 {
-    PyObject *v, *result;
+    PyObject *v;
     unsigned char *p;
     Py_ssize_t nsize, bytesize;
 #ifndef Py_UNICODE_WIDE
@@ -2515,11 +2514,11 @@ PyUnicode_EncodeUTF32(const Py_UNICODE *s,
     bytesize = nsize * 4;
     if (bytesize / 4 != nsize)
 	return PyErr_NoMemory();
-    v = PyByteArray_FromStringAndSize(NULL, bytesize);
+    v = PyBytes_FromStringAndSize(NULL, bytesize);
     if (v == NULL)
         return NULL;
 
-    p = (unsigned char *)PyByteArray_AS_STRING(v);
+    p = (unsigned char *)PyBytes_AS_STRING(v);
     if (byteorder == 0)
 	STORECHAR(0xFEFF);
     if (size == 0)
@@ -2556,9 +2555,7 @@ PyUnicode_EncodeUTF32(const Py_UNICODE *s,
     }
 
   done:
-    result = PyBytes_FromStringAndSize(PyByteArray_AS_STRING(v), Py_SIZE(v));
-    Py_DECREF(v);
-    return result;
+    return v;
 #undef STORECHAR
 }
 
@@ -2757,7 +2754,7 @@ PyUnicode_EncodeUTF16(const Py_UNICODE *s,
 		      const char *errors,
 		      int byteorder)
 {
-    PyObject *v, *result;
+    PyObject *v;
     unsigned char *p;
     Py_ssize_t nsize, bytesize;
 #ifdef Py_UNICODE_WIDE
@@ -2792,11 +2789,11 @@ PyUnicode_EncodeUTF16(const Py_UNICODE *s,
     bytesize = nsize * 2;
     if (bytesize / 2 != nsize)
 	return PyErr_NoMemory();
-    v = PyByteArray_FromStringAndSize(NULL, bytesize);
+    v = PyBytes_FromStringAndSize(NULL, bytesize);
     if (v == NULL)
         return NULL;
 
-    p = (unsigned char *)PyByteArray_AS_STRING(v);
+    p = (unsigned char *)PyBytes_AS_STRING(v);
     if (byteorder == 0)
 	STORECHAR(0xFEFF);
     if (size == 0)
@@ -2828,9 +2825,7 @@ PyUnicode_EncodeUTF16(const Py_UNICODE *s,
     }
 
   done:
-    result = PyBytes_FromStringAndSize(PyByteArray_AS_STRING(v), Py_SIZE(v));
-    Py_DECREF(v);
-    return result;
+    return v;
 #undef STORECHAR
 }
 
@@ -3120,7 +3115,7 @@ static const char *hexdigits = "0123456789abcdef";
 PyObject *PyUnicode_EncodeUnicodeEscape(const Py_UNICODE *s,
 					Py_ssize_t size)
 {
-    PyObject *repr, *result;
+    PyObject *repr;
     char *p;
 
 #ifdef Py_UNICODE_WIDE
@@ -3147,17 +3142,20 @@ PyObject *PyUnicode_EncodeUnicodeEscape(const Py_UNICODE *s,
        escape.
     */
 
+    if (size == 0)
+        return PyBytes_FromStringAndSize(NULL, 0);
+
     if (size > (PY_SSIZE_T_MAX - 2 - 1) / expandsize)
 	return PyErr_NoMemory();
 
-    repr = PyByteArray_FromStringAndSize(NULL,
+    repr = PyBytes_FromStringAndSize(NULL,
         2
         + expandsize*size
         + 1);
     if (repr == NULL)
         return NULL;
 
-    p = PyByteArray_AS_STRING(repr);
+    p = PyBytes_AS_STRING(repr);
 
     while (size-- > 0) {
         Py_UNICODE ch = *s++;
@@ -3249,13 +3247,13 @@ PyObject *PyUnicode_EncodeUnicodeEscape(const Py_UNICODE *s,
             *p++ = (char) ch;
     }
 
-    result = PyBytes_FromStringAndSize(PyByteArray_AS_STRING(repr),
-                                        p - PyByteArray_AS_STRING(repr));
-    Py_DECREF(repr);
-    return result;
+    assert(p - PyBytes_AS_STRING(repr) > 0);
+    if (_PyBytes_Resize(&repr, p - PyBytes_AS_STRING(repr)) < 0)
+        return NULL;
+    return repr;
 }
 
-PyObject *PyUnicode_AsUnicodeEscapeString(PyObject *unicode)
+PyObject *PyUnicodeAsUnicodeEscapeString(PyObject *unicode)
 {
     PyObject *s;
     if (!PyUnicode_Check(unicode)) {
@@ -3389,7 +3387,7 @@ PyObject *PyUnicode_DecodeRawUnicodeEscape(const char *s,
 PyObject *PyUnicode_EncodeRawUnicodeEscape(const Py_UNICODE *s,
 					   Py_ssize_t size)
 {
-    PyObject *repr, *result;
+    PyObject *repr;
     char *p;
     char *q;
 
@@ -3402,13 +3400,13 @@ PyObject *PyUnicode_EncodeRawUnicodeEscape(const Py_UNICODE *s,
     if (size > PY_SSIZE_T_MAX / expandsize)
 	return PyErr_NoMemory();
     
-    repr = PyByteArray_FromStringAndSize(NULL, expandsize * size);
+    repr = PyBytes_FromStringAndSize(NULL, expandsize * size);
     if (repr == NULL)
         return NULL;
     if (size == 0)
-        goto done;
+        return repr;
 
-    p = q = PyByteArray_AS_STRING(repr);
+    p = q = PyBytes_AS_STRING(repr);
     while (size-- > 0) {
         Py_UNICODE ch = *s++;
 #ifdef Py_UNICODE_WIDE
@@ -3468,10 +3466,10 @@ PyObject *PyUnicode_EncodeRawUnicodeEscape(const Py_UNICODE *s,
     }
     size = p - q;
 
-  done:
-    result = PyBytes_FromStringAndSize(PyByteArray_AS_STRING(repr), size);
-    Py_DECREF(repr);
-    return result;
+    assert(size > 0);
+    if (_PyBytes_Resize(&repr, size) < 0)
+        return NULL;
+    return repr;
 }
 
 PyObject *PyUnicode_AsRawUnicodeEscapeString(PyObject *unicode)
@@ -3706,7 +3704,6 @@ static PyObject *unicode_encode_ucs1(const Py_UNICODE *p,
     const char *reason = (limit == 256) ? "ordinal not in range(256)" : "ordinal not in range(128)";
     PyObject *errorHandler = NULL;
     PyObject *exc = NULL;
-    PyObject *result = NULL;
     /* the following variable is used for caching string comparisons
      * -1=not initialized, 0=unknown, 1=strict, 2=replace, 3=ignore, 4=xmlcharrefreplace */
     int known_errorHandler = -1;
@@ -3715,10 +3712,10 @@ static PyObject *unicode_encode_ucs1(const Py_UNICODE *p,
        replacements, if we need more, we'll resize */
     if (size == 0)
         return PyBytes_FromStringAndSize(NULL, 0);
-    res = PyByteArray_FromStringAndSize(NULL, size);
+    res = PyBytes_FromStringAndSize(NULL, size);
     if (res == NULL)
         return NULL;
-    str = PyByteArray_AS_STRING(res);
+    str = PyBytes_AS_STRING(res);
     ressize = size;
 
     while (p<endp) {
@@ -3768,7 +3765,7 @@ static PyObject *unicode_encode_ucs1(const Py_UNICODE *p,
 		    p = collend;
 		    break;
 		case 4: /* xmlcharrefreplace */
-		    respos = str - PyByteArray_AS_STRING(res);
+		    respos = str - PyBytes_AS_STRING(res);
 		    /* determine replacement size (temporarily (mis)uses p) */
 		    for (p = collstart, repsize = 0; p < collend; ++p) {
 			if (*p<10)
@@ -3795,9 +3792,9 @@ static PyObject *unicode_encode_ucs1(const Py_UNICODE *p,
 		    if (requiredsize > ressize) {
 			if (requiredsize<2*ressize)
 			    requiredsize = 2*ressize;
-			if (PyByteArray_Resize(res, requiredsize))
+			if (_PyBytes_Resize(&res, requiredsize))
 			    goto onError;
-			str = PyByteArray_AS_STRING(res) + respos;
+			str = PyBytes_AS_STRING(res) + respos;
 			ressize = requiredsize;
 		    }
 		    /* generate replacement (temporarily (mis)uses p) */
@@ -3815,17 +3812,17 @@ static PyObject *unicode_encode_ucs1(const Py_UNICODE *p,
 		    /* need more space? (at least enough for what we
 		       have+the replacement+the rest of the string, so
 		       we won't have to check space for encodable characters) */
-		    respos = str - PyByteArray_AS_STRING(res);
+		    respos = str - PyBytes_AS_STRING(res);
 		    repsize = PyUnicode_GET_SIZE(repunicode);
 		    requiredsize = respos+repsize+(endp-collend);
 		    if (requiredsize > ressize) {
 			if (requiredsize<2*ressize)
 			    requiredsize = 2*ressize;
-			if (PyByteArray_Resize(res, requiredsize)) {
+			if (_PyBytes_Resize(&res, requiredsize)) {
 			    Py_DECREF(repunicode);
 			    goto onError;
 			}
-			str = PyByteArray_AS_STRING(res) + respos;
+			str = PyBytes_AS_STRING(res) + respos;
 			ressize = requiredsize;
 		    }
 		    /* check if there is anything unencodable in the replacement
@@ -3845,13 +3842,23 @@ static PyObject *unicode_encode_ucs1(const Py_UNICODE *p,
 	    }
 	}
     }
-    result = PyBytes_FromStringAndSize(PyByteArray_AS_STRING(res),
-                                        str - PyByteArray_AS_STRING(res));
+    /* Resize if we allocated to much */
+    size = str - PyBytes_AS_STRING(res);
+    if (size < ressize) { /* If this falls res will be NULL */
+        assert(size > 0);
+        if (_PyBytes_Resize(&res, size) < 0)
+            goto onError;
+    }
+
+    Py_XDECREF(errorHandler);
+    Py_XDECREF(exc);
+    return res;
+
   onError:
-    Py_DECREF(res);
+    Py_XDECREF(res);
     Py_XDECREF(errorHandler);
     Py_XDECREF(exc);
-    return result;
+    return NULL;
 }
 
 PyObject *PyUnicode_EncodeLatin1(const Py_UNICODE *p,
@@ -4104,7 +4111,7 @@ static int encode_mbcs(PyObject **repr,
     else {
 	/* Extend string object */
 	n = PyBytes_Size(*repr);
-	if (_PyBytes_Resize(repr, n + mbcssize) < 0)
+	if (_PyBytes_Resize(&repr, n + mbcssize) < 0)
 	    return -1;
     }
 
@@ -4834,7 +4841,8 @@ PyObject *PyUnicode_EncodeCharmap(const Py_UNICODE *p,
 
     /* Resize if we allocated to much */
     if (respos<PyBytes_GET_SIZE(res))
-	_PyBytes_Resize(&res, respos);
+        if (_PyBytes_Resize(&res, respos) < 0)
+            goto onError;
 
     Py_XDECREF(exc);
     Py_XDECREF(errorHandler);
author	Alexandre Vassalotti <alexandre@peadrop.com>	2008-12-27 09:16:49 (GMT)
committer	Alexandre Vassalotti <alexandre@peadrop.com>	2008-12-27 09:16:49 (GMT)
commit	44531cb2dbb7d752b98b2b8195f759a559973612 (patch)
tree	afb50e4a83bc0b87475f3a5c40707e5a2c8e1fb3
parent	9cb6f7f7a5afd587820ca378ad1129427f25b58f (diff)
download	cpython-44531cb2dbb7d752b98b2b8195f759a559973612.zip cpython-44531cb2dbb7d752b98b2b8195f759a559973612.tar.gz cpython-44531cb2dbb7d752b98b2b8195f759a559973612.tar.bz2