diff options
author | Alexandre Vassalotti <alexandre@peadrop.com> | 2008-12-27 09:16:49 (GMT) |
---|---|---|
committer | Alexandre Vassalotti <alexandre@peadrop.com> | 2008-12-27 09:16:49 (GMT) |
commit | 44531cb2dbb7d752b98b2b8195f759a559973612 (patch) | |
tree | afb50e4a83bc0b87475f3a5c40707e5a2c8e1fb3 /Objects | |
parent | 9cb6f7f7a5afd587820ca378ad1129427f25b58f (diff) | |
download | cpython-44531cb2dbb7d752b98b2b8195f759a559973612.zip cpython-44531cb2dbb7d752b98b2b8195f759a559973612.tar.gz cpython-44531cb2dbb7d752b98b2b8195f759a559973612.tar.bz2 |
Optimize built-in unicode codecs by avoiding unnecessary copying.
The approach used is similiar to what is currently used in the version
of unicodeobject.c in Python 2.x. The only difference is we use
_PyBytes_Resize instead of _PyString_Resize.
Diffstat (limited to 'Objects')
-rw-r--r-- | Objects/unicodeobject.c | 108 |
1 files changed, 58 insertions, 50 deletions
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index e57b60c..fa74631 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -1873,7 +1873,7 @@ PyObject *PyUnicode_EncodeUTF7(const Py_UNICODE *s, int encodeWhiteSpace, const char *errors) { - PyObject *v, *result; + PyObject *v; /* It might be possible to tighten this worst case */ Py_ssize_t cbAllocated = 5 * size; int inShift = 0; @@ -1889,11 +1889,11 @@ PyObject *PyUnicode_EncodeUTF7(const Py_UNICODE *s, if (cbAllocated / 5 != size) return PyErr_NoMemory(); - v = PyByteArray_FromStringAndSize(NULL, cbAllocated); + v = PyBytes_FromStringAndSize(NULL, cbAllocated); if (v == NULL) return NULL; - start = out = PyByteArray_AS_STRING(v); + start = out = PyBytes_AS_STRING(v); for (;i < size; ++i) { Py_UNICODE ch = s[i]; @@ -1958,10 +1958,9 @@ PyObject *PyUnicode_EncodeUTF7(const Py_UNICODE *s, *out++= B64(charsleft << (6-bitsleft) ); *out++ = '-'; } - - result = PyBytes_FromStringAndSize(PyByteArray_AS_STRING(v), out - start); - Py_DECREF(v); - return result; + if (_PyBytes_Resize(&v, out - start) < 0) + return NULL; + return v; } #undef SPECIAL @@ -2479,7 +2478,7 @@ PyUnicode_EncodeUTF32(const Py_UNICODE *s, const char *errors, int byteorder) { - PyObject *v, *result; + PyObject *v; unsigned char *p; Py_ssize_t nsize, bytesize; #ifndef Py_UNICODE_WIDE @@ -2515,11 +2514,11 @@ PyUnicode_EncodeUTF32(const Py_UNICODE *s, bytesize = nsize * 4; if (bytesize / 4 != nsize) return PyErr_NoMemory(); - v = PyByteArray_FromStringAndSize(NULL, bytesize); + v = PyBytes_FromStringAndSize(NULL, bytesize); if (v == NULL) return NULL; - p = (unsigned char *)PyByteArray_AS_STRING(v); + p = (unsigned char *)PyBytes_AS_STRING(v); if (byteorder == 0) STORECHAR(0xFEFF); if (size == 0) @@ -2556,9 +2555,7 @@ PyUnicode_EncodeUTF32(const Py_UNICODE *s, } done: - result = PyBytes_FromStringAndSize(PyByteArray_AS_STRING(v), Py_SIZE(v)); - Py_DECREF(v); - return result; + return v; #undef STORECHAR } @@ -2757,7 +2754,7 @@ PyUnicode_EncodeUTF16(const Py_UNICODE *s, const char *errors, int byteorder) { - PyObject *v, *result; + PyObject *v; unsigned char *p; Py_ssize_t nsize, bytesize; #ifdef Py_UNICODE_WIDE @@ -2792,11 +2789,11 @@ PyUnicode_EncodeUTF16(const Py_UNICODE *s, bytesize = nsize * 2; if (bytesize / 2 != nsize) return PyErr_NoMemory(); - v = PyByteArray_FromStringAndSize(NULL, bytesize); + v = PyBytes_FromStringAndSize(NULL, bytesize); if (v == NULL) return NULL; - p = (unsigned char *)PyByteArray_AS_STRING(v); + p = (unsigned char *)PyBytes_AS_STRING(v); if (byteorder == 0) STORECHAR(0xFEFF); if (size == 0) @@ -2828,9 +2825,7 @@ PyUnicode_EncodeUTF16(const Py_UNICODE *s, } done: - result = PyBytes_FromStringAndSize(PyByteArray_AS_STRING(v), Py_SIZE(v)); - Py_DECREF(v); - return result; + return v; #undef STORECHAR } @@ -3120,7 +3115,7 @@ static const char *hexdigits = "0123456789abcdef"; PyObject *PyUnicode_EncodeUnicodeEscape(const Py_UNICODE *s, Py_ssize_t size) { - PyObject *repr, *result; + PyObject *repr; char *p; #ifdef Py_UNICODE_WIDE @@ -3147,17 +3142,20 @@ PyObject *PyUnicode_EncodeUnicodeEscape(const Py_UNICODE *s, escape. */ + if (size == 0) + return PyBytes_FromStringAndSize(NULL, 0); + if (size > (PY_SSIZE_T_MAX - 2 - 1) / expandsize) return PyErr_NoMemory(); - repr = PyByteArray_FromStringAndSize(NULL, + repr = PyBytes_FromStringAndSize(NULL, 2 + expandsize*size + 1); if (repr == NULL) return NULL; - p = PyByteArray_AS_STRING(repr); + p = PyBytes_AS_STRING(repr); while (size-- > 0) { Py_UNICODE ch = *s++; @@ -3249,13 +3247,13 @@ PyObject *PyUnicode_EncodeUnicodeEscape(const Py_UNICODE *s, *p++ = (char) ch; } - result = PyBytes_FromStringAndSize(PyByteArray_AS_STRING(repr), - p - PyByteArray_AS_STRING(repr)); - Py_DECREF(repr); - return result; + assert(p - PyBytes_AS_STRING(repr) > 0); + if (_PyBytes_Resize(&repr, p - PyBytes_AS_STRING(repr)) < 0) + return NULL; + return repr; } -PyObject *PyUnicode_AsUnicodeEscapeString(PyObject *unicode) +PyObject *PyUnicodeAsUnicodeEscapeString(PyObject *unicode) { PyObject *s; if (!PyUnicode_Check(unicode)) { @@ -3389,7 +3387,7 @@ PyObject *PyUnicode_DecodeRawUnicodeEscape(const char *s, PyObject *PyUnicode_EncodeRawUnicodeEscape(const Py_UNICODE *s, Py_ssize_t size) { - PyObject *repr, *result; + PyObject *repr; char *p; char *q; @@ -3402,13 +3400,13 @@ PyObject *PyUnicode_EncodeRawUnicodeEscape(const Py_UNICODE *s, if (size > PY_SSIZE_T_MAX / expandsize) return PyErr_NoMemory(); - repr = PyByteArray_FromStringAndSize(NULL, expandsize * size); + repr = PyBytes_FromStringAndSize(NULL, expandsize * size); if (repr == NULL) return NULL; if (size == 0) - goto done; + return repr; - p = q = PyByteArray_AS_STRING(repr); + p = q = PyBytes_AS_STRING(repr); while (size-- > 0) { Py_UNICODE ch = *s++; #ifdef Py_UNICODE_WIDE @@ -3468,10 +3466,10 @@ PyObject *PyUnicode_EncodeRawUnicodeEscape(const Py_UNICODE *s, } size = p - q; - done: - result = PyBytes_FromStringAndSize(PyByteArray_AS_STRING(repr), size); - Py_DECREF(repr); - return result; + assert(size > 0); + if (_PyBytes_Resize(&repr, size) < 0) + return NULL; + return repr; } PyObject *PyUnicode_AsRawUnicodeEscapeString(PyObject *unicode) @@ -3706,7 +3704,6 @@ static PyObject *unicode_encode_ucs1(const Py_UNICODE *p, const char *reason = (limit == 256) ? "ordinal not in range(256)" : "ordinal not in range(128)"; PyObject *errorHandler = NULL; PyObject *exc = NULL; - PyObject *result = NULL; /* the following variable is used for caching string comparisons * -1=not initialized, 0=unknown, 1=strict, 2=replace, 3=ignore, 4=xmlcharrefreplace */ int known_errorHandler = -1; @@ -3715,10 +3712,10 @@ static PyObject *unicode_encode_ucs1(const Py_UNICODE *p, replacements, if we need more, we'll resize */ if (size == 0) return PyBytes_FromStringAndSize(NULL, 0); - res = PyByteArray_FromStringAndSize(NULL, size); + res = PyBytes_FromStringAndSize(NULL, size); if (res == NULL) return NULL; - str = PyByteArray_AS_STRING(res); + str = PyBytes_AS_STRING(res); ressize = size; while (p<endp) { @@ -3768,7 +3765,7 @@ static PyObject *unicode_encode_ucs1(const Py_UNICODE *p, p = collend; break; case 4: /* xmlcharrefreplace */ - respos = str - PyByteArray_AS_STRING(res); + respos = str - PyBytes_AS_STRING(res); /* determine replacement size (temporarily (mis)uses p) */ for (p = collstart, repsize = 0; p < collend; ++p) { if (*p<10) @@ -3795,9 +3792,9 @@ static PyObject *unicode_encode_ucs1(const Py_UNICODE *p, if (requiredsize > ressize) { if (requiredsize<2*ressize) requiredsize = 2*ressize; - if (PyByteArray_Resize(res, requiredsize)) + if (_PyBytes_Resize(&res, requiredsize)) goto onError; - str = PyByteArray_AS_STRING(res) + respos; + str = PyBytes_AS_STRING(res) + respos; ressize = requiredsize; } /* generate replacement (temporarily (mis)uses p) */ @@ -3815,17 +3812,17 @@ static PyObject *unicode_encode_ucs1(const Py_UNICODE *p, /* need more space? (at least enough for what we have+the replacement+the rest of the string, so we won't have to check space for encodable characters) */ - respos = str - PyByteArray_AS_STRING(res); + respos = str - PyBytes_AS_STRING(res); repsize = PyUnicode_GET_SIZE(repunicode); requiredsize = respos+repsize+(endp-collend); if (requiredsize > ressize) { if (requiredsize<2*ressize) requiredsize = 2*ressize; - if (PyByteArray_Resize(res, requiredsize)) { + if (_PyBytes_Resize(&res, requiredsize)) { Py_DECREF(repunicode); goto onError; } - str = PyByteArray_AS_STRING(res) + respos; + str = PyBytes_AS_STRING(res) + respos; ressize = requiredsize; } /* check if there is anything unencodable in the replacement @@ -3845,13 +3842,23 @@ static PyObject *unicode_encode_ucs1(const Py_UNICODE *p, } } } - result = PyBytes_FromStringAndSize(PyByteArray_AS_STRING(res), - str - PyByteArray_AS_STRING(res)); + /* Resize if we allocated to much */ + size = str - PyBytes_AS_STRING(res); + if (size < ressize) { /* If this falls res will be NULL */ + assert(size > 0); + if (_PyBytes_Resize(&res, size) < 0) + goto onError; + } + + Py_XDECREF(errorHandler); + Py_XDECREF(exc); + return res; + onError: - Py_DECREF(res); + Py_XDECREF(res); Py_XDECREF(errorHandler); Py_XDECREF(exc); - return result; + return NULL; } PyObject *PyUnicode_EncodeLatin1(const Py_UNICODE *p, @@ -4104,7 +4111,7 @@ static int encode_mbcs(PyObject **repr, else { /* Extend string object */ n = PyBytes_Size(*repr); - if (_PyBytes_Resize(repr, n + mbcssize) < 0) + if (_PyBytes_Resize(&repr, n + mbcssize) < 0) return -1; } @@ -4834,7 +4841,8 @@ PyObject *PyUnicode_EncodeCharmap(const Py_UNICODE *p, /* Resize if we allocated to much */ if (respos<PyBytes_GET_SIZE(res)) - _PyBytes_Resize(&res, respos); + if (_PyBytes_Resize(&res, respos) < 0) + goto onError; Py_XDECREF(exc); Py_XDECREF(errorHandler); |