diff options
author | Victor Stinner <victor.stinner@gmail.com> | 2013-04-14 16:45:39 (GMT) |
---|---|---|
committer | Victor Stinner <victor.stinner@gmail.com> | 2013-04-14 16:45:39 (GMT) |
commit | 55c08781e8bcbba18e016060e47aabc754fddc79 (patch) | |
tree | 1d80075bfaeb527ecf60a07c9a1cf8e73f9f6a07 /Objects | |
parent | af03757d20a13b4090d06e0a198122be194aa6b0 (diff) | |
download | cpython-55c08781e8bcbba18e016060e47aabc754fddc79.zip cpython-55c08781e8bcbba18e016060e47aabc754fddc79.tar.gz cpython-55c08781e8bcbba18e016060e47aabc754fddc79.tar.bz2 |
Optimize repr(str): use _PyUnicode_FastCopyCharacters() when no character is escaped
Diffstat (limited to 'Objects')
-rw-r--r-- | Objects/unicodeobject.c | 147 |
1 files changed, 78 insertions, 69 deletions
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 0996afb..907fa5a 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -11968,7 +11968,7 @@ unicode_repr(PyObject *unicode) Py_ssize_t isize; Py_ssize_t osize, squote, dquote, i, o; Py_UCS4 max, quote; - int ikind, okind; + int ikind, okind, unchanged; void *idata, *odata; if (PyUnicode_READY(unicode) == -1) @@ -11979,7 +11979,7 @@ unicode_repr(PyObject *unicode) /* Compute length of output, quote characters, and maximum character */ - osize = 2; /* quotes */ + osize = 0; max = 127; squote = dquote = 0; ikind = PyUnicode_KIND(unicode); @@ -12010,7 +12010,9 @@ unicode_repr(PyObject *unicode) } quote = '\''; + unchanged = (osize == isize); if (squote) { + unchanged = 0; if (dquote) /* Both squote and dquote present. Use squote, and escape them */ @@ -12018,6 +12020,7 @@ unicode_repr(PyObject *unicode) else quote = '"'; } + osize += 2; /* quotes */ repr = PyUnicode_New(osize, max); if (repr == NULL) @@ -12027,82 +12030,88 @@ unicode_repr(PyObject *unicode) PyUnicode_WRITE(okind, odata, 0, quote); PyUnicode_WRITE(okind, odata, osize-1, quote); + if (unchanged) { + _PyUnicode_FastCopyCharacters(repr, 1, + unicode, 0, + isize); + } + else { + for (i = 0, o = 1; i < isize; i++) { + Py_UCS4 ch = PyUnicode_READ(ikind, idata, i); - for (i = 0, o = 1; i < isize; i++) { - Py_UCS4 ch = PyUnicode_READ(ikind, idata, i); - - /* Escape quotes and backslashes */ - if ((ch == quote) || (ch == '\\')) { - PyUnicode_WRITE(okind, odata, o++, '\\'); - PyUnicode_WRITE(okind, odata, o++, ch); - continue; - } + /* Escape quotes and backslashes */ + if ((ch == quote) || (ch == '\\')) { + PyUnicode_WRITE(okind, odata, o++, '\\'); + PyUnicode_WRITE(okind, odata, o++, ch); + continue; + } - /* Map special whitespace to '\t', \n', '\r' */ - if (ch == '\t') { - PyUnicode_WRITE(okind, odata, o++, '\\'); - PyUnicode_WRITE(okind, odata, o++, 't'); - } - else if (ch == '\n') { - PyUnicode_WRITE(okind, odata, o++, '\\'); - PyUnicode_WRITE(okind, odata, o++, 'n'); - } - else if (ch == '\r') { - PyUnicode_WRITE(okind, odata, o++, '\\'); - PyUnicode_WRITE(okind, odata, o++, 'r'); - } + /* Map special whitespace to '\t', \n', '\r' */ + if (ch == '\t') { + PyUnicode_WRITE(okind, odata, o++, '\\'); + PyUnicode_WRITE(okind, odata, o++, 't'); + } + else if (ch == '\n') { + PyUnicode_WRITE(okind, odata, o++, '\\'); + PyUnicode_WRITE(okind, odata, o++, 'n'); + } + else if (ch == '\r') { + PyUnicode_WRITE(okind, odata, o++, '\\'); + PyUnicode_WRITE(okind, odata, o++, 'r'); + } - /* Map non-printable US ASCII to '\xhh' */ - else if (ch < ' ' || ch == 0x7F) { - PyUnicode_WRITE(okind, odata, o++, '\\'); - PyUnicode_WRITE(okind, odata, o++, 'x'); - PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[(ch >> 4) & 0x000F]); - PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[ch & 0x000F]); - } + /* Map non-printable US ASCII to '\xhh' */ + else if (ch < ' ' || ch == 0x7F) { + PyUnicode_WRITE(okind, odata, o++, '\\'); + PyUnicode_WRITE(okind, odata, o++, 'x'); + PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[(ch >> 4) & 0x000F]); + PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[ch & 0x000F]); + } - /* Copy ASCII characters as-is */ - else if (ch < 0x7F) { - PyUnicode_WRITE(okind, odata, o++, ch); - } + /* Copy ASCII characters as-is */ + else if (ch < 0x7F) { + PyUnicode_WRITE(okind, odata, o++, ch); + } - /* Non-ASCII characters */ - else { - /* Map Unicode whitespace and control characters - (categories Z* and C* except ASCII space) - */ - if (!Py_UNICODE_ISPRINTABLE(ch)) { - PyUnicode_WRITE(okind, odata, o++, '\\'); - /* Map 8-bit characters to '\xhh' */ - if (ch <= 0xff) { - PyUnicode_WRITE(okind, odata, o++, 'x'); - PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[(ch >> 4) & 0x000F]); - PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[ch & 0x000F]); - } - /* Map 16-bit characters to '\uxxxx' */ - else if (ch <= 0xffff) { - PyUnicode_WRITE(okind, odata, o++, 'u'); - PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[(ch >> 12) & 0xF]); - PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[(ch >> 8) & 0xF]); - PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[(ch >> 4) & 0xF]); - PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[ch & 0xF]); + /* Non-ASCII characters */ + else { + /* Map Unicode whitespace and control characters + (categories Z* and C* except ASCII space) + */ + if (!Py_UNICODE_ISPRINTABLE(ch)) { + PyUnicode_WRITE(okind, odata, o++, '\\'); + /* Map 8-bit characters to '\xhh' */ + if (ch <= 0xff) { + PyUnicode_WRITE(okind, odata, o++, 'x'); + PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[(ch >> 4) & 0x000F]); + PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[ch & 0x000F]); + } + /* Map 16-bit characters to '\uxxxx' */ + else if (ch <= 0xffff) { + PyUnicode_WRITE(okind, odata, o++, 'u'); + PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[(ch >> 12) & 0xF]); + PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[(ch >> 8) & 0xF]); + PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[(ch >> 4) & 0xF]); + PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[ch & 0xF]); + } + /* Map 21-bit characters to '\U00xxxxxx' */ + else { + PyUnicode_WRITE(okind, odata, o++, 'U'); + PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[(ch >> 28) & 0xF]); + PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[(ch >> 24) & 0xF]); + PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[(ch >> 20) & 0xF]); + PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[(ch >> 16) & 0xF]); + PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[(ch >> 12) & 0xF]); + PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[(ch >> 8) & 0xF]); + PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[(ch >> 4) & 0xF]); + PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[ch & 0xF]); + } } - /* Map 21-bit characters to '\U00xxxxxx' */ + /* Copy characters as-is */ else { - PyUnicode_WRITE(okind, odata, o++, 'U'); - PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[(ch >> 28) & 0xF]); - PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[(ch >> 24) & 0xF]); - PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[(ch >> 20) & 0xF]); - PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[(ch >> 16) & 0xF]); - PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[(ch >> 12) & 0xF]); - PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[(ch >> 8) & 0xF]); - PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[(ch >> 4) & 0xF]); - PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[ch & 0xF]); + PyUnicode_WRITE(okind, odata, o++, ch); } } - /* Copy characters as-is */ - else { - PyUnicode_WRITE(okind, odata, o++, ch); - } } } /* Closing quote already added at the beginning */ |