diff options
author | Victor Stinner <victor.stinner@haypocalc.com> | 2011-03-02 01:03:14 (GMT) |
---|---|---|
committer | Victor Stinner <victor.stinner@haypocalc.com> | 2011-03-02 01:03:14 (GMT) |
commit | a5c68c3cb7bc5068833742dc10a3cd5a19e69e12 (patch) | |
tree | a35feeaad912317ffb10c797c95b4e08bbd1b647 /Objects/unicodeobject.c | |
parent | f3fd733f928752c9e35f8f5141a54cd21c0993b5 (diff) | |
download | cpython-a5c68c3cb7bc5068833742dc10a3cd5a19e69e12.zip cpython-a5c68c3cb7bc5068833742dc10a3cd5a19e69e12.tar.gz cpython-a5c68c3cb7bc5068833742dc10a3cd5a19e69e12.tar.bz2 |
Issue #8923: cache str.encode() result
When a string is encoded to UTF-8 in strict mode, the result is cached into the
object. Examples: str.encode(), str.encode('utf-8'), PyUnicode_AsUTF8String()
and PyUnicode_AsEncodedString(unicode, "utf-8", NULL).
Diffstat (limited to 'Objects/unicodeobject.c')
-rw-r--r-- | Objects/unicodeobject.c | 25 |
1 files changed, 16 insertions, 9 deletions
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index e4539cd..6801259 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -1710,17 +1710,21 @@ PyUnicode_AsEncodedString(PyObject *unicode, } if (encoding == NULL) - return PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(unicode), - PyUnicode_GET_SIZE(unicode), - errors); + return PyUnicode_AsUTF8String(unicode); /* Shortcuts for common default encodings */ if (normalize_encoding(encoding, lower, sizeof(lower))) { if ((strcmp(lower, "utf-8") == 0) || (strcmp(lower, "utf8") == 0)) - return PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(unicode), - PyUnicode_GET_SIZE(unicode), - errors); + { + if (errors == NULL || strcmp(errors, "strict") == 0) { + return PyUnicode_AsUTF8String(unicode); + } else { + return PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(unicode), + PyUnicode_GET_SIZE(unicode), + errors); + } + } else if ((strcmp(lower, "latin-1") == 0) || (strcmp(lower, "latin1") == 0) || (strcmp(lower, "iso-8859-1") == 0)) @@ -3077,13 +3081,16 @@ PyUnicode_EncodeUTF8(const Py_UNICODE *s, PyObject * PyUnicode_AsUTF8String(PyObject *unicode) { + PyObject *utf8; if (!PyUnicode_Check(unicode)) { PyErr_BadArgument(); return NULL; } - return PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(unicode), - PyUnicode_GET_SIZE(unicode), - NULL); + utf8 = _PyUnicode_AsDefaultEncodedString(unicode); + if (utf8 == NULL) + return NULL; + Py_INCREF(utf8); + return utf8; } /* --- UTF-32 Codec ------------------------------------------------------- */ |