diff options
author | Serhiy Storchaka <storchaka@gmail.com> | 2013-04-13 19:45:04 (GMT) |
---|---|---|
committer | Serhiy Storchaka <storchaka@gmail.com> | 2013-04-13 19:45:04 (GMT) |
commit | e2cef885a25967605007248cb158671b765df002 (patch) | |
tree | d8e0596e3acc23cdf93c56fc419c846dd585779d /Objects/unicodeobject.c | |
parent | a707f299cb325a8de7bb7cae746b264573bc1cfc (diff) | |
download | cpython-e2cef885a25967605007248cb158671b765df002.zip cpython-e2cef885a25967605007248cb158671b765df002.tar.gz cpython-e2cef885a25967605007248cb158671b765df002.tar.bz2 |
Issue #16061: Speed up str.replace() for replacing 1-character strings.
Diffstat (limited to 'Objects/unicodeobject.c')
-rw-r--r-- | Objects/unicodeobject.c | 64 |
1 files changed, 38 insertions, 26 deletions
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index e52571d..3688f4a 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -605,6 +605,7 @@ make_bloom_mask(int kind, void* ptr, Py_ssize_t len) #include "stringlib/split.h" #include "stringlib/count.h" #include "stringlib/find.h" +#include "stringlib/replace.h" #include "stringlib/find_max_char.h" #include "stringlib/localeutil.h" #include "stringlib/undef.h" @@ -615,6 +616,7 @@ make_bloom_mask(int kind, void* ptr, Py_ssize_t len) #include "stringlib/split.h" #include "stringlib/count.h" #include "stringlib/find.h" +#include "stringlib/replace.h" #include "stringlib/find_max_char.h" #include "stringlib/localeutil.h" #include "stringlib/undef.h" @@ -625,6 +627,7 @@ make_bloom_mask(int kind, void* ptr, Py_ssize_t len) #include "stringlib/split.h" #include "stringlib/count.h" #include "stringlib/find.h" +#include "stringlib/replace.h" #include "stringlib/find_max_char.h" #include "stringlib/localeutil.h" #include "stringlib/undef.h" @@ -9927,6 +9930,31 @@ anylib_count(int kind, PyObject *sstr, void* sbuf, Py_ssize_t slen, return 0; } +static void +replace_1char_inplace(PyObject *u, Py_ssize_t pos, + Py_UCS4 u1, Py_UCS4 u2, Py_ssize_t maxcount) +{ + int kind = PyUnicode_KIND(u); + void *data = PyUnicode_DATA(u); + Py_ssize_t len = PyUnicode_GET_LENGTH(u); + if (kind == PyUnicode_1BYTE_KIND) { + ucs1lib_replace_1char_inplace((Py_UCS1 *)data + pos, + (Py_UCS1 *)data + len, + u1, u2, maxcount); + } + else if (kind == PyUnicode_2BYTE_KIND) { + ucs2lib_replace_1char_inplace((Py_UCS2 *)data + pos, + (Py_UCS2 *)data + len, + u1, u2, maxcount); + } + else { + assert(kind == PyUnicode_4BYTE_KIND); + ucs4lib_replace_1char_inplace((Py_UCS4 *)data + pos, + (Py_UCS4 *)data + len, + u1, u2, maxcount); + } +} + static PyObject * replace(PyObject *self, PyObject *str1, PyObject *str2, Py_ssize_t maxcount) @@ -9943,7 +9971,7 @@ replace(PyObject *self, PyObject *str1, Py_ssize_t len1 = PyUnicode_GET_LENGTH(str1); Py_ssize_t len2 = PyUnicode_GET_LENGTH(str2); int mayshrink; - Py_UCS4 maxchar, maxchar_str2; + Py_UCS4 maxchar, maxchar_str1, maxchar_str2; if (maxcount < 0) maxcount = PY_SSIZE_T_MAX; @@ -9952,15 +9980,16 @@ replace(PyObject *self, PyObject *str1, if (str1 == str2) goto nothing; - if (skind < kind1) - /* substring too wide to be present */ - goto nothing; maxchar = PyUnicode_MAX_CHAR_VALUE(self); + maxchar_str1 = PyUnicode_MAX_CHAR_VALUE(str1); + if (maxchar < maxchar_str1) + /* substring too wide to be present */ + goto nothing; maxchar_str2 = PyUnicode_MAX_CHAR_VALUE(str2); /* Replacing str1 with str2 may cause a maxchar reduction in the result string. */ - mayshrink = (maxchar_str2 < maxchar); + mayshrink = (maxchar_str2 < maxchar_str1) && (maxchar == maxchar_str1); maxchar = MAX_MAXCHAR(maxchar, maxchar_str2); if (len1 == len2) { @@ -9970,36 +9999,19 @@ replace(PyObject *self, PyObject *str1, if (len1 == 1) { /* replace characters */ Py_UCS4 u1, u2; - int rkind; - Py_ssize_t index, pos; - char *src, *rbuf; + Py_ssize_t pos; u1 = PyUnicode_READ(kind1, buf1, 0); - pos = findchar(sbuf, PyUnicode_KIND(self), slen, u1, 1); + pos = findchar(sbuf, skind, slen, u1, 1); if (pos < 0) goto nothing; u2 = PyUnicode_READ(kind2, buf2, 0); u = PyUnicode_New(slen, maxchar); if (!u) goto error; - _PyUnicode_FastCopyCharacters(u, 0, self, 0, slen); - rkind = PyUnicode_KIND(u); - rbuf = PyUnicode_DATA(u); - PyUnicode_WRITE(rkind, rbuf, pos, u2); - index = 0; - src = sbuf; - while (--maxcount) - { - pos++; - src += pos * PyUnicode_KIND(self); - slen -= pos; - index += pos; - pos = findchar(src, PyUnicode_KIND(self), slen, u1, 1); - if (pos < 0) - break; - PyUnicode_WRITE(rkind, rbuf, index + pos, u2); - } + _PyUnicode_FastCopyCharacters(u, 0, self, 0, slen); + replace_1char_inplace(u, pos, u1, u2, maxcount); } else { int rkind = skind; |