summaryrefslogtreecommitdiffstats
path: root/Objects/unicodeobject.c
diff options
context:
space:
mode:
authorVictor Stinner <victor.stinner@haypocalc.com>2011-03-02 01:03:14 (GMT)
committerVictor Stinner <victor.stinner@haypocalc.com>2011-03-02 01:03:14 (GMT)
commita5c68c3cb7bc5068833742dc10a3cd5a19e69e12 (patch)
treea35feeaad912317ffb10c797c95b4e08bbd1b647 /Objects/unicodeobject.c
parentf3fd733f928752c9e35f8f5141a54cd21c0993b5 (diff)
downloadcpython-a5c68c3cb7bc5068833742dc10a3cd5a19e69e12.zip
cpython-a5c68c3cb7bc5068833742dc10a3cd5a19e69e12.tar.gz
cpython-a5c68c3cb7bc5068833742dc10a3cd5a19e69e12.tar.bz2
Issue #8923: cache str.encode() result
When a string is encoded to UTF-8 in strict mode, the result is cached into the object. Examples: str.encode(), str.encode('utf-8'), PyUnicode_AsUTF8String() and PyUnicode_AsEncodedString(unicode, "utf-8", NULL).
Diffstat (limited to 'Objects/unicodeobject.c')
-rw-r--r--Objects/unicodeobject.c25
1 files changed, 16 insertions, 9 deletions
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index e4539cd..6801259 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -1710,17 +1710,21 @@ PyUnicode_AsEncodedString(PyObject *unicode,
}
if (encoding == NULL)
- return PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(unicode),
- PyUnicode_GET_SIZE(unicode),
- errors);
+ return PyUnicode_AsUTF8String(unicode);
/* Shortcuts for common default encodings */
if (normalize_encoding(encoding, lower, sizeof(lower))) {
if ((strcmp(lower, "utf-8") == 0) ||
(strcmp(lower, "utf8") == 0))
- return PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(unicode),
- PyUnicode_GET_SIZE(unicode),
- errors);
+ {
+ if (errors == NULL || strcmp(errors, "strict") == 0) {
+ return PyUnicode_AsUTF8String(unicode);
+ } else {
+ return PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(unicode),
+ PyUnicode_GET_SIZE(unicode),
+ errors);
+ }
+ }
else if ((strcmp(lower, "latin-1") == 0) ||
(strcmp(lower, "latin1") == 0) ||
(strcmp(lower, "iso-8859-1") == 0))
@@ -3077,13 +3081,16 @@ PyUnicode_EncodeUTF8(const Py_UNICODE *s,
PyObject *
PyUnicode_AsUTF8String(PyObject *unicode)
{
+ PyObject *utf8;
if (!PyUnicode_Check(unicode)) {
PyErr_BadArgument();
return NULL;
}
- return PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(unicode),
- PyUnicode_GET_SIZE(unicode),
- NULL);
+ utf8 = _PyUnicode_AsDefaultEncodedString(unicode);
+ if (utf8 == NULL)
+ return NULL;
+ Py_INCREF(utf8);
+ return utf8;
}
/* --- UTF-32 Codec ------------------------------------------------------- */